diff options
| author | 2021-03-08 17:48:55 -0800 | |
|---|---|---|
| committer | 2021-03-08 17:48:55 -0800 | |
| commit | d1a7b2eca74ec3895df8cd046a36837ec2d70c64 (patch) | |
| tree | b563d301bfcfe822454bd3ea6a05381c7d41b47d /src | |
| parent | Merge pull request #5990 from german77/mousePanningV2 (diff) | |
| parent | buffer_cache: Heuristically decide to skip cache on uniform buffers (diff) | |
| download | yuzu-d1a7b2eca74ec3895df8cd046a36837ec2d70c64.tar.gz yuzu-d1a7b2eca74ec3895df8cd046a36837ec2d70c64.tar.xz yuzu-d1a7b2eca74ec3895df8cd046a36837ec2d70c64.zip | |
Merge pull request #6021 from ReinUsesLisp/skip-cache-heuristic
buffer_cache: Heuristically decide to skip cache on uniform buffers
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 45 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 |
2 files changed, 37 insertions, 11 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2a6844ab1..4de1e37e5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <deque> | 9 | #include <deque> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | 11 | #include <mutex> |
| 12 | #include <numeric> | ||
| 12 | #include <span> | 13 | #include <span> |
| 13 | #include <unordered_map> | 14 | #include <unordered_map> |
| 14 | #include <vector> | 15 | #include <vector> |
| @@ -91,7 +92,7 @@ class BufferCache { | |||
| 91 | }; | 92 | }; |
| 92 | 93 | ||
| 93 | public: | 94 | public: |
| 94 | static constexpr u32 SKIP_CACHE_SIZE = 4096; | 95 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096; |
| 95 | 96 | ||
| 96 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 97 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 97 | Tegra::Engines::Maxwell3D& maxwell3d_, | 98 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| @@ -240,9 +241,9 @@ private: | |||
| 240 | template <bool insert> | 241 | template <bool insert> |
| 241 | void ChangeRegister(BufferId buffer_id); | 242 | void ChangeRegister(BufferId buffer_id); |
| 242 | 243 | ||
| 243 | void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | 244 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 244 | 245 | ||
| 245 | void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | 246 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); |
| 246 | 247 | ||
| 247 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 248 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 248 | std::span<BufferCopy> copies); | 249 | std::span<BufferCopy> copies); |
| @@ -297,6 +298,11 @@ private: | |||
| 297 | 298 | ||
| 298 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | 299 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; |
| 299 | 300 | ||
| 301 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 302 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 303 | |||
| 304 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 305 | |||
| 300 | bool has_deleted_buffers = false; | 306 | bool has_deleted_buffers = false; |
| 301 | 307 | ||
| 302 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | 308 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> |
| @@ -328,6 +334,19 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 328 | 334 | ||
| 329 | template <class P> | 335 | template <class P> |
| 330 | void BufferCache<P>::TickFrame() { | 336 | void BufferCache<P>::TickFrame() { |
| 337 | // Calculate hits and shots and move hit bits to the right | ||
| 338 | const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); | ||
| 339 | const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); | ||
| 340 | std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, | ||
| 341 | uniform_cache_hits.begin() + 1); | ||
| 342 | std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, | ||
| 343 | uniform_cache_shots.begin() + 1); | ||
| 344 | uniform_cache_hits[0] = 0; | ||
| 345 | uniform_cache_shots[0] = 0; | ||
| 346 | |||
| 347 | const bool skip_preferred = hits * 256 < shots * 251; | ||
| 348 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||
| 349 | |||
| 331 | delayed_destruction_ring.Tick(); | 350 | delayed_destruction_ring.Tick(); |
| 332 | } | 351 | } |
| 333 | 352 | ||
| @@ -671,7 +690,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 671 | const VAddr cpu_addr = binding.cpu_addr; | 690 | const VAddr cpu_addr = binding.cpu_addr; |
| 672 | const u32 size = binding.size; | 691 | const u32 size = binding.size; |
| 673 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 692 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 674 | if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { | 693 | if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) { |
| 675 | if constexpr (IS_OPENGL) { | 694 | if constexpr (IS_OPENGL) { |
| 676 | if (runtime.HasFastBufferSubData()) { | 695 | if (runtime.HasFastBufferSubData()) { |
| 677 | // Fast path for Nvidia | 696 | // Fast path for Nvidia |
| @@ -692,7 +711,12 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 692 | return; | 711 | return; |
| 693 | } | 712 | } |
| 694 | // Classic cached path | 713 | // Classic cached path |
| 695 | SynchronizeBuffer(buffer, cpu_addr, size); | 714 | const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); |
| 715 | if (sync_cached) { | ||
| 716 | ++uniform_cache_hits[0]; | ||
| 717 | } | ||
| 718 | ++uniform_cache_shots[0]; | ||
| 719 | |||
| 696 | if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { | 720 | if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { |
| 697 | // Skip binding if it's not needed and if the bound buffer is not the fast version | 721 | // Skip binding if it's not needed and if the bound buffer is not the fast version |
| 698 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens | 722 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens |
| @@ -1106,15 +1130,15 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1106 | } | 1130 | } |
| 1107 | 1131 | ||
| 1108 | template <class P> | 1132 | template <class P> |
| 1109 | void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1133 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1110 | if (buffer.CpuAddr() == 0) { | 1134 | if (buffer.CpuAddr() == 0) { |
| 1111 | return; | 1135 | return true; |
| 1112 | } | 1136 | } |
| 1113 | SynchronizeBufferImpl(buffer, cpu_addr, size); | 1137 | return SynchronizeBufferImpl(buffer, cpu_addr, size); |
| 1114 | } | 1138 | } |
| 1115 | 1139 | ||
| 1116 | template <class P> | 1140 | template <class P> |
| 1117 | void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1141 | bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1118 | boost::container::small_vector<BufferCopy, 4> copies; | 1142 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1119 | u64 total_size_bytes = 0; | 1143 | u64 total_size_bytes = 0; |
| 1120 | u64 largest_copy = 0; | 1144 | u64 largest_copy = 0; |
| @@ -1128,10 +1152,11 @@ void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1128 | largest_copy = std::max(largest_copy, range_size); | 1152 | largest_copy = std::max(largest_copy, range_size); |
| 1129 | }); | 1153 | }); |
| 1130 | if (total_size_bytes == 0) { | 1154 | if (total_size_bytes == 0) { |
| 1131 | return; | 1155 | return true; |
| 1132 | } | 1156 | } |
| 1133 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | 1157 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); |
| 1134 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | 1158 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); |
| 1159 | return false; | ||
| 1135 | } | 1160 | } |
| 1136 | 1161 | ||
| 1137 | template <class P> | 1162 | template <class P> |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 6da3906a4..c225d1fc9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -73,7 +73,8 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | |||
| 73 | for (auto& stage_uniforms : fast_uniforms) { | 73 | for (auto& stage_uniforms : fast_uniforms) { |
| 74 | for (OGLBuffer& buffer : stage_uniforms) { | 74 | for (OGLBuffer& buffer : stage_uniforms) { |
| 75 | buffer.Create(); | 75 | buffer.Create(); |
| 76 | glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); | 76 | glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr, |
| 77 | GL_STREAM_DRAW); | ||
| 77 | } | 78 | } |
| 78 | } | 79 | } |
| 79 | for (auto& stage_uniforms : copy_uniforms) { | 80 | for (auto& stage_uniforms : copy_uniforms) { |