summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h45
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
2 files changed, 37 insertions, 11 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2a6844ab1..4de1e37e5 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -9,6 +9,7 @@
9#include <deque> 9#include <deque>
10#include <memory> 10#include <memory>
11#include <mutex> 11#include <mutex>
12#include <numeric>
12#include <span> 13#include <span>
13#include <unordered_map> 14#include <unordered_map>
14#include <vector> 15#include <vector>
@@ -91,7 +92,7 @@ class BufferCache {
91 }; 92 };
92 93
93public: 94public:
94 static constexpr u32 SKIP_CACHE_SIZE = 4096; 95 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096;
95 96
96 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 97 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
97 Tegra::Engines::Maxwell3D& maxwell3d_, 98 Tegra::Engines::Maxwell3D& maxwell3d_,
@@ -240,9 +241,9 @@ private:
240 template <bool insert> 241 template <bool insert>
241 void ChangeRegister(BufferId buffer_id); 242 void ChangeRegister(BufferId buffer_id);
242 243
243 void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 244 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
244 245
245 void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); 246 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
246 247
247 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 248 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
248 std::span<BufferCopy> copies); 249 std::span<BufferCopy> copies);
@@ -297,6 +298,11 @@ private:
297 298
298 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; 299 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
299 300
301 std::array<u32, 16> uniform_cache_hits{};
302 std::array<u32, 16> uniform_cache_shots{};
303
304 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
305
300 bool has_deleted_buffers = false; 306 bool has_deleted_buffers = false;
301 307
302 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 308 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
@@ -328,6 +334,19 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
328 334
329template <class P> 335template <class P>
330void BufferCache<P>::TickFrame() { 336void BufferCache<P>::TickFrame() {
337 // Calculate hits and shots and move hit bits to the right
338 const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
339 const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
340 std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
341 uniform_cache_hits.begin() + 1);
342 std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
343 uniform_cache_shots.begin() + 1);
344 uniform_cache_hits[0] = 0;
345 uniform_cache_shots[0] = 0;
346
347 const bool skip_preferred = hits * 256 < shots * 251;
348 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
349
331 delayed_destruction_ring.Tick(); 350 delayed_destruction_ring.Tick();
332} 351}
333 352
@@ -671,7 +690,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
671 const VAddr cpu_addr = binding.cpu_addr; 690 const VAddr cpu_addr = binding.cpu_addr;
672 const u32 size = binding.size; 691 const u32 size = binding.size;
673 Buffer& buffer = slot_buffers[binding.buffer_id]; 692 Buffer& buffer = slot_buffers[binding.buffer_id];
674 if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { 693 if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) {
675 if constexpr (IS_OPENGL) { 694 if constexpr (IS_OPENGL) {
676 if (runtime.HasFastBufferSubData()) { 695 if (runtime.HasFastBufferSubData()) {
677 // Fast path for Nvidia 696 // Fast path for Nvidia
@@ -692,7 +711,12 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
692 return; 711 return;
693 } 712 }
694 // Classic cached path 713 // Classic cached path
695 SynchronizeBuffer(buffer, cpu_addr, size); 714 const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
715 if (sync_cached) {
716 ++uniform_cache_hits[0];
717 }
718 ++uniform_cache_shots[0];
719
696 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { 720 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
697 // Skip binding if it's not needed and if the bound buffer is not the fast version 721 // Skip binding if it's not needed and if the bound buffer is not the fast version
698 // This exists to avoid instances where the fast buffer is bound and a GPU write happens 722 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
@@ -1106,15 +1130,15 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1106} 1130}
1107 1131
1108template <class P> 1132template <class P>
1109void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1133bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1110 if (buffer.CpuAddr() == 0) { 1134 if (buffer.CpuAddr() == 0) {
1111 return; 1135 return true;
1112 } 1136 }
1113 SynchronizeBufferImpl(buffer, cpu_addr, size); 1137 return SynchronizeBufferImpl(buffer, cpu_addr, size);
1114} 1138}
1115 1139
1116template <class P> 1140template <class P>
1117void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { 1141bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
1118 boost::container::small_vector<BufferCopy, 4> copies; 1142 boost::container::small_vector<BufferCopy, 4> copies;
1119 u64 total_size_bytes = 0; 1143 u64 total_size_bytes = 0;
1120 u64 largest_copy = 0; 1144 u64 largest_copy = 0;
@@ -1128,10 +1152,11 @@ void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1128 largest_copy = std::max(largest_copy, range_size); 1152 largest_copy = std::max(largest_copy, range_size);
1129 }); 1153 });
1130 if (total_size_bytes == 0) { 1154 if (total_size_bytes == 0) {
1131 return; 1155 return true;
1132 } 1156 }
1133 const std::span<BufferCopy> copies_span(copies.data(), copies.size()); 1157 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1134 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); 1158 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1159 return false;
1135} 1160}
1136 1161
1137template <class P> 1162template <class P>
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 6da3906a4..c225d1fc9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -73,7 +73,8 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
73 for (auto& stage_uniforms : fast_uniforms) { 73 for (auto& stage_uniforms : fast_uniforms) {
74 for (OGLBuffer& buffer : stage_uniforms) { 74 for (OGLBuffer& buffer : stage_uniforms) {
75 buffer.Create(); 75 buffer.Create();
76 glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); 76 glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
77 GL_STREAM_DRAW);
77 } 78 }
78 } 79 }
79 for (auto& stage_uniforms : copy_uniforms) { 80 for (auto& stage_uniforms : copy_uniforms) {