diff options
| author | 2021-06-02 02:15:07 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:34 -0400 | |
| commit | 4a2361a1e2271727f3259e8e4a60869165537253 (patch) | |
| tree | d741b0e808aa6a622c01dd047d66211c201e0f85 /src/video_core/buffer_cache | |
| parent | transform_feedback: Read buffer stride from index instead of layout (diff) | |
| download | yuzu-4a2361a1e2271727f3259e8e4a60869165537253.tar.gz yuzu-4a2361a1e2271727f3259e8e4a60869165537253.tar.xz yuzu-4a2361a1e2271727f3259e8e4a60869165537253.zip | |
buffer_cache: Reduce uniform buffer size from shader usage
Increases performance significantly on certain titles.
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 42 |
1 files changed, 26 insertions, 16 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d6b9eb99f..ec64f2293 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -44,6 +44,7 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); | |||
| 44 | using BufferId = SlotId; | 44 | using BufferId = SlotId; |
| 45 | 45 | ||
| 46 | using VideoCore::Surface::PixelFormat; | 46 | using VideoCore::Surface::PixelFormat; |
| 47 | using namespace Common::Literals; | ||
| 47 | 48 | ||
| 48 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | 49 | constexpr u32 NUM_VERTEX_BUFFERS = 32; |
| 49 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | 50 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; |
| @@ -53,7 +54,8 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; | |||
| 53 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | 54 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; |
| 54 | constexpr u32 NUM_STAGES = 5; | 55 | constexpr u32 NUM_STAGES = 5; |
| 55 | 56 | ||
| 56 | using namespace Common::Literals; | 57 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; |
| 58 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 57 | 59 | ||
| 58 | template <typename P> | 60 | template <typename P> |
| 59 | class BufferCache { | 61 | class BufferCache { |
| @@ -142,9 +144,10 @@ public: | |||
| 142 | 144 | ||
| 143 | void BindHostComputeBuffers(); | 145 | void BindHostComputeBuffers(); |
| 144 | 146 | ||
| 145 | void SetEnabledUniformBuffers(const std::array<u32, NUM_STAGES>& mask); | 147 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 148 | const UniformBufferSizes* sizes); | ||
| 146 | 149 | ||
| 147 | void SetEnabledComputeUniformBuffers(u32 enabled); | 150 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); |
| 148 | 151 | ||
| 149 | void UnbindGraphicsStorageBuffers(size_t stage); | 152 | void UnbindGraphicsStorageBuffers(size_t stage); |
| 150 | 153 | ||
| @@ -384,8 +387,11 @@ private: | |||
| 384 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | 387 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; |
| 385 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | 388 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; |
| 386 | 389 | ||
| 387 | std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; | 390 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; |
| 388 | u32 enabled_compute_uniform_buffers = 0; | 391 | u32 enabled_compute_uniform_buffer_mask = 0; |
| 392 | |||
| 393 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 394 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 389 | 395 | ||
| 390 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | 396 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; |
| 391 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | 397 | std::array<u32, NUM_STAGES> written_storage_buffers{}; |
| @@ -670,18 +676,22 @@ void BufferCache<P>::BindHostComputeBuffers() { | |||
| 670 | } | 676 | } |
| 671 | 677 | ||
| 672 | template <class P> | 678 | template <class P> |
| 673 | void BufferCache<P>::SetEnabledUniformBuffers(const std::array<u32, NUM_STAGES>& mask) { | 679 | void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 680 | const UniformBufferSizes* sizes) { | ||
| 674 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 681 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 675 | if (enabled_uniform_buffers != mask) { | 682 | if (enabled_uniform_buffer_masks != mask) { |
| 676 | dirty_uniform_buffers.fill(~u32{0}); | 683 | dirty_uniform_buffers.fill(~u32{0}); |
| 677 | } | 684 | } |
| 678 | } | 685 | } |
| 679 | enabled_uniform_buffers = mask; | 686 | enabled_uniform_buffer_masks = mask; |
| 687 | uniform_buffer_sizes = sizes; | ||
| 680 | } | 688 | } |
| 681 | 689 | ||
| 682 | template <class P> | 690 | template <class P> |
| 683 | void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { | 691 | void BufferCache<P>::SetComputeUniformBufferState(u32 mask, |
| 684 | enabled_compute_uniform_buffers = enabled; | 692 | const ComputeUniformBufferSizes* sizes) { |
| 693 | enabled_compute_uniform_buffer_mask = mask; | ||
| 694 | compute_uniform_buffer_sizes = sizes; | ||
| 685 | } | 695 | } |
| 686 | 696 | ||
| 687 | template <class P> | 697 | template <class P> |
| @@ -984,7 +994,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | |||
| 984 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); | 994 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); |
| 985 | } | 995 | } |
| 986 | u32 binding_index = 0; | 996 | u32 binding_index = 0; |
| 987 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | 997 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 988 | const bool needs_bind = ((dirty >> index) & 1) != 0; | 998 | const bool needs_bind = ((dirty >> index) & 1) != 0; |
| 989 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); | 999 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); |
| 990 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 1000 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| @@ -998,7 +1008,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 998 | bool needs_bind) { | 1008 | bool needs_bind) { |
| 999 | const Binding& binding = uniform_buffers[stage][index]; | 1009 | const Binding& binding = uniform_buffers[stage][index]; |
| 1000 | const VAddr cpu_addr = binding.cpu_addr; | 1010 | const VAddr cpu_addr = binding.cpu_addr; |
| 1001 | const u32 size = binding.size; | 1011 | const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); |
| 1002 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1012 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1003 | TouchBuffer(buffer); | 1013 | TouchBuffer(buffer); |
| 1004 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 1014 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| @@ -1113,11 +1123,11 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 1113 | dirty_uniform_buffers.fill(~u32{0}); | 1123 | dirty_uniform_buffers.fill(~u32{0}); |
| 1114 | } | 1124 | } |
| 1115 | u32 binding_index = 0; | 1125 | u32 binding_index = 0; |
| 1116 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 1126 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1117 | const Binding& binding = compute_uniform_buffers[index]; | 1127 | const Binding& binding = compute_uniform_buffers[index]; |
| 1118 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1128 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 1119 | TouchBuffer(buffer); | 1129 | TouchBuffer(buffer); |
| 1120 | const u32 size = binding.size; | 1130 | const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); |
| 1121 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 1131 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 1122 | 1132 | ||
| 1123 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1133 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| @@ -1261,7 +1271,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1261 | 1271 | ||
| 1262 | template <class P> | 1272 | template <class P> |
| 1263 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | 1273 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { |
| 1264 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | 1274 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 1265 | Binding& binding = uniform_buffers[stage][index]; | 1275 | Binding& binding = uniform_buffers[stage][index]; |
| 1266 | if (binding.buffer_id) { | 1276 | if (binding.buffer_id) { |
| 1267 | // Already updated | 1277 | // Already updated |
| @@ -1334,7 +1344,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1334 | 1344 | ||
| 1335 | template <class P> | 1345 | template <class P> |
| 1336 | void BufferCache<P>::UpdateComputeUniformBuffers() { | 1346 | void BufferCache<P>::UpdateComputeUniformBuffers() { |
| 1337 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | 1347 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1338 | Binding& binding = compute_uniform_buffers[index]; | 1348 | Binding& binding = compute_uniform_buffers[index]; |
| 1339 | binding = NULL_BINDING; | 1349 | binding = NULL_BINDING; |
| 1340 | const auto& launch_desc = kepler_compute.launch_description; | 1350 | const auto& launch_desc = kepler_compute.launch_description; |