summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-18 17:14:14 -0300
committerGravatar ReinUsesLisp2021-02-13 02:17:24 -0300
commit2b95c137ff1796daf2b262f90c25104db1161b5f (patch)
tree4f5c57a9a476ba92488713e2ca99d1aa212c99ad /src/video_core/buffer_cache
parentbuffer_cache: Split CreateBuffer in separate functions (diff)
downloadyuzu-2b95c137ff1796daf2b262f90c25104db1161b5f.tar.gz
yuzu-2b95c137ff1796daf2b262f90c25104db1161b5f.tar.xz
yuzu-2b95c137ff1796daf2b262f90c25104db1161b5f.zip
buffer_cache: Heuristically detect stream buffers
Detect when a memory region has been joined several times and increase the size of the created buffer on those instances. The buffer is assumed to be a "stream buffer", increasing its size should stop us from constantly recreating it and fragmenting memory.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h11
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h28
2 files changed, 33 insertions, 6 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 8a5e6a3e7..0c00ae280 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -251,6 +251,16 @@ public:
251 flags &= ~BufferFlagBits::Picked; 251 flags &= ~BufferFlagBits::Picked;
252 } 252 }
253 253
254 /// Increases the likeliness of this being a stream buffer
255 void IncreaseStreamScore(int score) noexcept {
256 stream_score += score;
257 }
258
259 /// Returns the likeliness of this being a stream buffer
260 [[nodiscard]] int StreamScore() const noexcept {
261 return stream_score;
262 }
263
254 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer 264 /// Returns true when vaddr -> vaddr+size is fully contained in the buffer
255 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { 265 [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept {
256 return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); 266 return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes();
@@ -574,6 +584,7 @@ private:
574 VAddr cpu_addr = 0; 584 VAddr cpu_addr = 0;
575 Words words; 585 Words words;
576 BufferFlagBits flags{}; 586 BufferFlagBits flags{};
587 int stream_score = 0;
577}; 588};
578 589
579} // namespace VideoCommon 590} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 3c44c3b39..0fff42826 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -75,6 +75,7 @@ class BufferCache {
75 std::vector<BufferId> ids; 75 std::vector<BufferId> ids;
76 VAddr begin; 76 VAddr begin;
77 VAddr end; 77 VAddr end;
78 bool has_stream_leap = false;
78 }; 79 };
79 80
80 struct Binding { 81 struct Binding {
@@ -228,7 +229,7 @@ private:
228 229
229 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); 230 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
230 231
231 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id); 232 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
232 233
233 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); 234 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
234 235
@@ -670,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
670 const VAddr cpu_addr = binding.cpu_addr; 671 const VAddr cpu_addr = binding.cpu_addr;
671 const u32 size = binding.size; 672 const u32 size = binding.size;
672 Buffer& buffer = slot_buffers[binding.buffer_id]; 673 Buffer& buffer = slot_buffers[binding.buffer_id];
673 if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { 674 if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) {
674 if constexpr (IS_OPENGL) { 675 if constexpr (IS_OPENGL) {
675 if (runtime.HasFastBufferSubData()) { 676 if (runtime.HasFastBufferSubData()) {
676 // Fast path for Nvidia 677 // Fast path for Nvidia
@@ -1000,9 +1001,12 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
1000template <class P> 1001template <class P>
1001typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1002typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
1002 u32 wanted_size) { 1003 u32 wanted_size) {
1004 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1003 std::vector<BufferId> overlap_ids; 1005 std::vector<BufferId> overlap_ids;
1004 VAddr begin = cpu_addr; 1006 VAddr begin = cpu_addr;
1005 VAddr end = cpu_addr + wanted_size; 1007 VAddr end = cpu_addr + wanted_size;
1008 int stream_score = 0;
1009 bool has_stream_leap = false;
1006 for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) { 1010 for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) {
1007 const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS]; 1011 const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS];
1008 if (!overlap_id) { 1012 if (!overlap_id) {
@@ -1012,26 +1016,38 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1012 if (overlap.IsPicked()) { 1016 if (overlap.IsPicked()) {
1013 continue; 1017 continue;
1014 } 1018 }
1015 overlap.Pick();
1016 overlap_ids.push_back(overlap_id); 1019 overlap_ids.push_back(overlap_id);
1020 overlap.Pick();
1017 const VAddr overlap_cpu_addr = overlap.CpuAddr(); 1021 const VAddr overlap_cpu_addr = overlap.CpuAddr();
1018 if (overlap_cpu_addr < begin) { 1022 if (overlap_cpu_addr < begin) {
1019 cpu_addr = begin = overlap_cpu_addr; 1023 cpu_addr = begin = overlap_cpu_addr;
1020 } 1024 }
1021 end = std::max(end, overlap_cpu_addr + overlap.SizeBytes()); 1025 end = std::max(end, overlap_cpu_addr + overlap.SizeBytes());
1026
1027 stream_score += overlap.StreamScore();
1028 if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) {
1029 // When this memory region has been joined a bunch of times, we assume it's being used
1030 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1031 has_stream_leap = true;
1032 end += PAGE_SIZE * 256;
1033 }
1022 } 1034 }
1023 return OverlapResult{ 1035 return OverlapResult{
1024 .ids = std::move(overlap_ids), 1036 .ids = std::move(overlap_ids),
1025 .begin = begin, 1037 .begin = begin,
1026 .end = end, 1038 .end = end,
1039 .has_stream_leap = has_stream_leap,
1027 }; 1040 };
1028} 1041}
1029 1042
1030template <class P> 1043template <class P>
1031void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id) { 1044void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1045 bool accumulate_stream_score) {
1032 Buffer& new_buffer = slot_buffers[new_buffer_id]; 1046 Buffer& new_buffer = slot_buffers[new_buffer_id];
1033 Buffer& overlap = slot_buffers[overlap_id]; 1047 Buffer& overlap = slot_buffers[overlap_id];
1034 1048 if (accumulate_stream_score) {
1049 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
1050 }
1035 std::vector<BufferCopy> copies; 1051 std::vector<BufferCopy> copies;
1036 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); 1052 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
1037 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { 1053 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) {
@@ -1056,7 +1072,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1056 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1072 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1057 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1073 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1058 for (const BufferId overlap_id : overlap.ids) { 1074 for (const BufferId overlap_id : overlap.ids) {
1059 JoinOverlap(new_buffer_id, overlap_id); 1075 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1060 } 1076 }
1061 Register(new_buffer_id); 1077 Register(new_buffer_id);
1062 return new_buffer_id; 1078 return new_buffer_id;