summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-06-02 02:15:07 -0300
committerGravatar ameerj2021-07-22 21:51:34 -0400
commit4a2361a1e2271727f3259e8e4a60869165537253 (patch)
treed741b0e808aa6a622c01dd047d66211c201e0f85 /src/video_core/buffer_cache
parenttransform_feedback: Read buffer stride from index instead of layout (diff)
downloadyuzu-4a2361a1e2271727f3259e8e4a60869165537253.tar.gz
yuzu-4a2361a1e2271727f3259e8e4a60869165537253.tar.xz
yuzu-4a2361a1e2271727f3259e8e4a60869165537253.zip
buffer_cache: Reduce uniform buffer size from shader usage
Increases performance significantly on certain titles.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h42
1 files changed, 26 insertions, 16 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d6b9eb99f..ec64f2293 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
44using BufferId = SlotId; 44using BufferId = SlotId;
45 45
46using VideoCore::Surface::PixelFormat; 46using VideoCore::Surface::PixelFormat;
47using namespace Common::Literals;
47 48
48constexpr u32 NUM_VERTEX_BUFFERS = 32; 49constexpr u32 NUM_VERTEX_BUFFERS = 32;
49constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; 50constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
@@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16;
53constexpr u32 NUM_TEXTURE_BUFFERS = 16; 54constexpr u32 NUM_TEXTURE_BUFFERS = 16;
54constexpr u32 NUM_STAGES = 5; 55constexpr u32 NUM_STAGES = 5;
55 56
56using namespace Common::Literals; 57using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
58using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
57 59
58template <typename P> 60template <typename P>
59class BufferCache { 61class BufferCache {
@@ -142,9 +144,10 @@ public:
142 144
143 void BindHostComputeBuffers(); 145 void BindHostComputeBuffers();
144 146
145 void SetEnabledUniformBuffers(const std::array<u32, NUM_STAGES>& mask); 147 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
148 const UniformBufferSizes* sizes);
146 149
147 void SetEnabledComputeUniformBuffers(u32 enabled); 150 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
148 151
149 void UnbindGraphicsStorageBuffers(size_t stage); 152 void UnbindGraphicsStorageBuffers(size_t stage);
150 153
@@ -384,8 +387,11 @@ private:
384 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; 387 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
385 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; 388 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
386 389
387 std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; 390 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
388 u32 enabled_compute_uniform_buffers = 0; 391 u32 enabled_compute_uniform_buffer_mask = 0;
392
393 const UniformBufferSizes* uniform_buffer_sizes{};
394 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
389 395
390 std::array<u32, NUM_STAGES> enabled_storage_buffers{}; 396 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
391 std::array<u32, NUM_STAGES> written_storage_buffers{}; 397 std::array<u32, NUM_STAGES> written_storage_buffers{};
@@ -670,18 +676,22 @@ void BufferCache<P>::BindHostComputeBuffers() {
670} 676}
671 677
672template <class P> 678template <class P>
673void BufferCache<P>::SetEnabledUniformBuffers(const std::array<u32, NUM_STAGES>& mask) { 679void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
680 const UniformBufferSizes* sizes) {
674 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 681 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
675 if (enabled_uniform_buffers != mask) { 682 if (enabled_uniform_buffer_masks != mask) {
676 dirty_uniform_buffers.fill(~u32{0}); 683 dirty_uniform_buffers.fill(~u32{0});
677 } 684 }
678 } 685 }
679 enabled_uniform_buffers = mask; 686 enabled_uniform_buffer_masks = mask;
687 uniform_buffer_sizes = sizes;
680} 688}
681 689
682template <class P> 690template <class P>
683void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { 691void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
684 enabled_compute_uniform_buffers = enabled; 692 const ComputeUniformBufferSizes* sizes) {
693 enabled_compute_uniform_buffer_mask = mask;
694 compute_uniform_buffer_sizes = sizes;
685} 695}
686 696
687template <class P> 697template <class P>
@@ -984,7 +994,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
984 dirty = std::exchange(dirty_uniform_buffers[stage], 0); 994 dirty = std::exchange(dirty_uniform_buffers[stage], 0);
985 } 995 }
986 u32 binding_index = 0; 996 u32 binding_index = 0;
987 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 997 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
988 const bool needs_bind = ((dirty >> index) & 1) != 0; 998 const bool needs_bind = ((dirty >> index) & 1) != 0;
989 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); 999 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
990 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1000 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -998,7 +1008,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
998 bool needs_bind) { 1008 bool needs_bind) {
999 const Binding& binding = uniform_buffers[stage][index]; 1009 const Binding& binding = uniform_buffers[stage][index];
1000 const VAddr cpu_addr = binding.cpu_addr; 1010 const VAddr cpu_addr = binding.cpu_addr;
1001 const u32 size = binding.size; 1011 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
1002 Buffer& buffer = slot_buffers[binding.buffer_id]; 1012 Buffer& buffer = slot_buffers[binding.buffer_id];
1003 TouchBuffer(buffer); 1013 TouchBuffer(buffer);
1004 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 1014 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
@@ -1113,11 +1123,11 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1113 dirty_uniform_buffers.fill(~u32{0}); 1123 dirty_uniform_buffers.fill(~u32{0});
1114 } 1124 }
1115 u32 binding_index = 0; 1125 u32 binding_index = 0;
1116 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1126 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1117 const Binding& binding = compute_uniform_buffers[index]; 1127 const Binding& binding = compute_uniform_buffers[index];
1118 Buffer& buffer = slot_buffers[binding.buffer_id]; 1128 Buffer& buffer = slot_buffers[binding.buffer_id];
1119 TouchBuffer(buffer); 1129 TouchBuffer(buffer);
1120 const u32 size = binding.size; 1130 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
1121 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1131 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1122 1132
1123 const u32 offset = buffer.Offset(binding.cpu_addr); 1133 const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1261,7 +1271,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1261 1271
1262template <class P> 1272template <class P>
1263void BufferCache<P>::UpdateUniformBuffers(size_t stage) { 1273void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1264 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 1274 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
1265 Binding& binding = uniform_buffers[stage][index]; 1275 Binding& binding = uniform_buffers[stage][index];
1266 if (binding.buffer_id) { 1276 if (binding.buffer_id) {
1267 // Already updated 1277 // Already updated
@@ -1334,7 +1344,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1334 1344
1335template <class P> 1345template <class P>
1336void BufferCache<P>::UpdateComputeUniformBuffers() { 1346void BufferCache<P>::UpdateComputeUniformBuffers() {
1337 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1347 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1338 Binding& binding = compute_uniform_buffers[index]; 1348 Binding& binding = compute_uniform_buffers[index];
1339 binding = NULL_BINDING; 1349 binding = NULL_BINDING;
1340 const auto& launch_desc = kepler_compute.launch_description; 1350 const auto& launch_desc = kepler_compute.launch_description;