diff options
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 11 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 28 |
2 files changed, 33 insertions, 6 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 8a5e6a3e7..0c00ae280 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -251,6 +251,16 @@ public: | |||
| 251 | flags &= ~BufferFlagBits::Picked; | 251 | flags &= ~BufferFlagBits::Picked; |
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | /// Increases the likeliness of this being a stream buffer | ||
| 255 | void IncreaseStreamScore(int score) noexcept { | ||
| 256 | stream_score += score; | ||
| 257 | } | ||
| 258 | |||
| 259 | /// Returns the likeliness of this being a stream buffer | ||
| 260 | [[nodiscard]] int StreamScore() const noexcept { | ||
| 261 | return stream_score; | ||
| 262 | } | ||
| 263 | |||
| 254 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer | 264 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer |
| 255 | [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { | 265 | [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { |
| 256 | return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); | 266 | return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); |
| @@ -574,6 +584,7 @@ private: | |||
| 574 | VAddr cpu_addr = 0; | 584 | VAddr cpu_addr = 0; |
| 575 | Words words; | 585 | Words words; |
| 576 | BufferFlagBits flags{}; | 586 | BufferFlagBits flags{}; |
| 587 | int stream_score = 0; | ||
| 577 | }; | 588 | }; |
| 578 | 589 | ||
| 579 | } // namespace VideoCommon | 590 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3c44c3b39..0fff42826 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -75,6 +75,7 @@ class BufferCache { | |||
| 75 | std::vector<BufferId> ids; | 75 | std::vector<BufferId> ids; |
| 76 | VAddr begin; | 76 | VAddr begin; |
| 77 | VAddr end; | 77 | VAddr end; |
| 78 | bool has_stream_leap = false; | ||
| 78 | }; | 79 | }; |
| 79 | 80 | ||
| 80 | struct Binding { | 81 | struct Binding { |
| @@ -228,7 +229,7 @@ private: | |||
| 228 | 229 | ||
| 229 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | 230 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); |
| 230 | 231 | ||
| 231 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id); | 232 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); |
| 232 | 233 | ||
| 233 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | 234 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); |
| 234 | 235 | ||
| @@ -670,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 670 | const VAddr cpu_addr = binding.cpu_addr; | 671 | const VAddr cpu_addr = binding.cpu_addr; |
| 671 | const u32 size = binding.size; | 672 | const u32 size = binding.size; |
| 672 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 673 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 673 | if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { | 674 | if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) { |
| 674 | if constexpr (IS_OPENGL) { | 675 | if constexpr (IS_OPENGL) { |
| 675 | if (runtime.HasFastBufferSubData()) { | 676 | if (runtime.HasFastBufferSubData()) { |
| 676 | // Fast path for Nvidia | 677 | // Fast path for Nvidia |
| @@ -1000,9 +1001,12 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | |||
| 1000 | template <class P> | 1001 | template <class P> |
| 1001 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | 1002 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, |
| 1002 | u32 wanted_size) { | 1003 | u32 wanted_size) { |
| 1004 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | ||
| 1003 | std::vector<BufferId> overlap_ids; | 1005 | std::vector<BufferId> overlap_ids; |
| 1004 | VAddr begin = cpu_addr; | 1006 | VAddr begin = cpu_addr; |
| 1005 | VAddr end = cpu_addr + wanted_size; | 1007 | VAddr end = cpu_addr + wanted_size; |
| 1008 | int stream_score = 0; | ||
| 1009 | bool has_stream_leap = false; | ||
| 1006 | for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) { | 1010 | for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) { |
| 1007 | const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS]; | 1011 | const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS]; |
| 1008 | if (!overlap_id) { | 1012 | if (!overlap_id) { |
| @@ -1012,26 +1016,38 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1012 | if (overlap.IsPicked()) { | 1016 | if (overlap.IsPicked()) { |
| 1013 | continue; | 1017 | continue; |
| 1014 | } | 1018 | } |
| 1015 | overlap.Pick(); | ||
| 1016 | overlap_ids.push_back(overlap_id); | 1019 | overlap_ids.push_back(overlap_id); |
| 1020 | overlap.Pick(); | ||
| 1017 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); | 1021 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); |
| 1018 | if (overlap_cpu_addr < begin) { | 1022 | if (overlap_cpu_addr < begin) { |
| 1019 | cpu_addr = begin = overlap_cpu_addr; | 1023 | cpu_addr = begin = overlap_cpu_addr; |
| 1020 | } | 1024 | } |
| 1021 | end = std::max(end, overlap_cpu_addr + overlap.SizeBytes()); | 1025 | end = std::max(end, overlap_cpu_addr + overlap.SizeBytes()); |
| 1026 | |||
| 1027 | stream_score += overlap.StreamScore(); | ||
| 1028 | if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { | ||
| 1029 | // When this memory region has been joined a bunch of times, we assume it's being used | ||
| 1030 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | ||
| 1031 | has_stream_leap = true; | ||
| 1032 | end += PAGE_SIZE * 256; | ||
| 1033 | } | ||
| 1022 | } | 1034 | } |
| 1023 | return OverlapResult{ | 1035 | return OverlapResult{ |
| 1024 | .ids = std::move(overlap_ids), | 1036 | .ids = std::move(overlap_ids), |
| 1025 | .begin = begin, | 1037 | .begin = begin, |
| 1026 | .end = end, | 1038 | .end = end, |
| 1039 | .has_stream_leap = has_stream_leap, | ||
| 1027 | }; | 1040 | }; |
| 1028 | } | 1041 | } |
| 1029 | 1042 | ||
| 1030 | template <class P> | 1043 | template <class P> |
| 1031 | void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id) { | 1044 | void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, |
| 1045 | bool accumulate_stream_score) { | ||
| 1032 | Buffer& new_buffer = slot_buffers[new_buffer_id]; | 1046 | Buffer& new_buffer = slot_buffers[new_buffer_id]; |
| 1033 | Buffer& overlap = slot_buffers[overlap_id]; | 1047 | Buffer& overlap = slot_buffers[overlap_id]; |
| 1034 | 1048 | if (accumulate_stream_score) { | |
| 1049 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); | ||
| 1050 | } | ||
| 1035 | std::vector<BufferCopy> copies; | 1051 | std::vector<BufferCopy> copies; |
| 1036 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); | 1052 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); |
| 1037 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { | 1053 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { |
| @@ -1056,7 +1072,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1056 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1072 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1057 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1073 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| 1058 | for (const BufferId overlap_id : overlap.ids) { | 1074 | for (const BufferId overlap_id : overlap.ids) { |
| 1059 | JoinOverlap(new_buffer_id, overlap_id); | 1075 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1060 | } | 1076 | } |
| 1061 | Register(new_buffer_id); | 1077 | Register(new_buffer_id); |
| 1062 | return new_buffer_id; | 1078 | return new_buffer_id; |