summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h269
1 files changed, 237 insertions, 32 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 5a0b6f0c0..24c858104 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -31,6 +31,7 @@
31#include "video_core/engines/maxwell_3d.h" 31#include "video_core/engines/maxwell_3d.h"
32#include "video_core/memory_manager.h" 32#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 33#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h"
34#include "video_core/texture_cache/slot_vector.h" 35#include "video_core/texture_cache/slot_vector.h"
35#include "video_core/texture_cache/types.h" 36#include "video_core/texture_cache/types.h"
36 37
@@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory);
42 43
43using BufferId = SlotId; 44using BufferId = SlotId;
44 45
46using VideoCore::Surface::PixelFormat;
47using namespace Common::Literals;
48
45constexpr u32 NUM_VERTEX_BUFFERS = 32; 49constexpr u32 NUM_VERTEX_BUFFERS = 32;
46constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; 50constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
47constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; 51constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
48constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; 52constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
49constexpr u32 NUM_STORAGE_BUFFERS = 16; 53constexpr u32 NUM_STORAGE_BUFFERS = 16;
54constexpr u32 NUM_TEXTURE_BUFFERS = 16;
50constexpr u32 NUM_STAGES = 5; 55constexpr u32 NUM_STAGES = 5;
51 56
52using namespace Common::Literals; 57using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
58using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
53 59
54template <typename P> 60template <typename P>
55class BufferCache { 61class BufferCache {
@@ -67,6 +73,7 @@ class BufferCache {
67 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; 73 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
68 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; 74 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
69 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 75 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
76 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
70 77
71 static constexpr BufferId NULL_BUFFER_ID{0}; 78 static constexpr BufferId NULL_BUFFER_ID{0};
72 79
@@ -96,6 +103,10 @@ class BufferCache {
96 BufferId buffer_id; 103 BufferId buffer_id;
97 }; 104 };
98 105
106 struct TextureBufferBinding : Binding {
107 PixelFormat format;
108 };
109
99 static constexpr Binding NULL_BINDING{ 110 static constexpr Binding NULL_BINDING{
100 .cpu_addr = 0, 111 .cpu_addr = 0,
101 .size = 0, 112 .size = 0,
@@ -133,20 +144,31 @@ public:
133 144
134 void BindHostComputeBuffers(); 145 void BindHostComputeBuffers();
135 146
136 void SetEnabledUniformBuffers(size_t stage, u32 enabled); 147 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
148 const UniformBufferSizes* sizes);
137 149
138 void SetEnabledComputeUniformBuffers(u32 enabled); 150 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
139 151
140 void UnbindGraphicsStorageBuffers(size_t stage); 152 void UnbindGraphicsStorageBuffers(size_t stage);
141 153
142 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 154 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
143 bool is_written); 155 bool is_written);
144 156
157 void UnbindGraphicsTextureBuffers(size_t stage);
158
159 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
160 PixelFormat format, bool is_written, bool is_image);
161
145 void UnbindComputeStorageBuffers(); 162 void UnbindComputeStorageBuffers();
146 163
147 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 164 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
148 bool is_written); 165 bool is_written);
149 166
167 void UnbindComputeTextureBuffers();
168
169 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
170 bool is_written, bool is_image);
171
150 void FlushCachedWrites(); 172 void FlushCachedWrites();
151 173
152 /// Return true when there are uncommitted buffers to be downloaded 174 /// Return true when there are uncommitted buffers to be downloaded
@@ -178,6 +200,7 @@ public:
178 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 200 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
179 201
180 std::mutex mutex; 202 std::mutex mutex;
203 Runtime& runtime;
181 204
182private: 205private:
183 template <typename Func> 206 template <typename Func>
@@ -254,12 +277,16 @@ private:
254 277
255 void BindHostGraphicsStorageBuffers(size_t stage); 278 void BindHostGraphicsStorageBuffers(size_t stage);
256 279
280 void BindHostGraphicsTextureBuffers(size_t stage);
281
257 void BindHostTransformFeedbackBuffers(); 282 void BindHostTransformFeedbackBuffers();
258 283
259 void BindHostComputeUniformBuffers(); 284 void BindHostComputeUniformBuffers();
260 285
261 void BindHostComputeStorageBuffers(); 286 void BindHostComputeStorageBuffers();
262 287
288 void BindHostComputeTextureBuffers();
289
263 void DoUpdateGraphicsBuffers(bool is_indexed); 290 void DoUpdateGraphicsBuffers(bool is_indexed);
264 291
265 void DoUpdateComputeBuffers(); 292 void DoUpdateComputeBuffers();
@@ -274,6 +301,8 @@ private:
274 301
275 void UpdateStorageBuffers(size_t stage); 302 void UpdateStorageBuffers(size_t stage);
276 303
304 void UpdateTextureBuffers(size_t stage);
305
277 void UpdateTransformFeedbackBuffers(); 306 void UpdateTransformFeedbackBuffers();
278 307
279 void UpdateTransformFeedbackBuffer(u32 index); 308 void UpdateTransformFeedbackBuffer(u32 index);
@@ -282,6 +311,8 @@ private:
282 311
283 void UpdateComputeStorageBuffers(); 312 void UpdateComputeStorageBuffers();
284 313
314 void UpdateComputeTextureBuffers();
315
285 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); 316 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
286 317
287 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); 318 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
@@ -323,6 +354,9 @@ private:
323 354
324 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; 355 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
325 356
357 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
358 PixelFormat format);
359
326 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); 360 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
327 361
328 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); 362 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
@@ -336,7 +370,6 @@ private:
336 Tegra::Engines::KeplerCompute& kepler_compute; 370 Tegra::Engines::KeplerCompute& kepler_compute;
337 Tegra::MemoryManager& gpu_memory; 371 Tegra::MemoryManager& gpu_memory;
338 Core::Memory::Memory& cpu_memory; 372 Core::Memory::Memory& cpu_memory;
339 Runtime& runtime;
340 373
341 SlotVector<Buffer> slot_buffers; 374 SlotVector<Buffer> slot_buffers;
342 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 375 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -347,20 +380,30 @@ private:
347 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; 380 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
348 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; 381 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
349 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; 382 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
383 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
350 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; 384 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
351 385
352 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; 386 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
353 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; 387 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
388 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
389
390 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
391 u32 enabled_compute_uniform_buffer_mask = 0;
354 392
355 std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; 393 const UniformBufferSizes* uniform_buffer_sizes{};
356 u32 enabled_compute_uniform_buffers = 0; 394 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
357 395
358 std::array<u32, NUM_STAGES> enabled_storage_buffers{}; 396 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
359 std::array<u32, NUM_STAGES> written_storage_buffers{}; 397 std::array<u32, NUM_STAGES> written_storage_buffers{};
360 u32 enabled_compute_storage_buffers = 0; 398 u32 enabled_compute_storage_buffers = 0;
361 u32 written_compute_storage_buffers = 0; 399 u32 written_compute_storage_buffers = 0;
362 400
363 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; 401 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
402 std::array<u32, NUM_STAGES> written_texture_buffers{};
403 std::array<u32, NUM_STAGES> image_texture_buffers{};
404 u32 enabled_compute_texture_buffers = 0;
405 u32 written_compute_texture_buffers = 0;
406 u32 image_compute_texture_buffers = 0;
364 407
365 std::array<u32, 16> uniform_cache_hits{}; 408 std::array<u32, 16> uniform_cache_hits{};
366 std::array<u32, 16> uniform_cache_shots{}; 409 std::array<u32, 16> uniform_cache_shots{};
@@ -371,6 +414,10 @@ private:
371 414
372 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 415 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
373 dirty_uniform_buffers{}; 416 dirty_uniform_buffers{};
417 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
418 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
419 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
420 uniform_buffer_binding_sizes{};
374 421
375 std::vector<BufferId> cached_write_buffer_ids; 422 std::vector<BufferId> cached_write_buffer_ids;
376 423
@@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
394 Tegra::Engines::KeplerCompute& kepler_compute_, 441 Tegra::Engines::KeplerCompute& kepler_compute_,
395 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 442 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
396 Runtime& runtime_) 443 Runtime& runtime_)
397 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, 444 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
398 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { 445 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
399 // Ensure the first slot is used for the null buffer 446 // Ensure the first slot is used for the null buffer
400 void(slot_buffers.insert(runtime, NullBufferParams{})); 447 void(slot_buffers.insert(runtime, NullBufferParams{}));
401 deletion_iterator = slot_buffers.end(); 448 deletion_iterator = slot_buffers.end();
@@ -615,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) {
615 MICROPROFILE_SCOPE(GPU_BindUploadBuffers); 662 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
616 BindHostGraphicsUniformBuffers(stage); 663 BindHostGraphicsUniformBuffers(stage);
617 BindHostGraphicsStorageBuffers(stage); 664 BindHostGraphicsStorageBuffers(stage);
665 BindHostGraphicsTextureBuffers(stage);
618} 666}
619 667
620template <class P> 668template <class P>
@@ -622,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() {
622 MICROPROFILE_SCOPE(GPU_BindUploadBuffers); 670 MICROPROFILE_SCOPE(GPU_BindUploadBuffers);
623 BindHostComputeUniformBuffers(); 671 BindHostComputeUniformBuffers();
624 BindHostComputeStorageBuffers(); 672 BindHostComputeStorageBuffers();
673 BindHostComputeTextureBuffers();
625} 674}
626 675
627template <class P> 676template <class P>
628void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { 677void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
678 const UniformBufferSizes* sizes) {
629 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 679 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
630 if (enabled_uniform_buffers[stage] != enabled) { 680 if (enabled_uniform_buffer_masks != mask) {
631 dirty_uniform_buffers[stage] = ~u32{0}; 681 if constexpr (IS_OPENGL) {
682 fast_bound_uniform_buffers.fill(0);
683 }
684 dirty_uniform_buffers.fill(~u32{0});
685 uniform_buffer_binding_sizes.fill({});
632 } 686 }
633 } 687 }
634 enabled_uniform_buffers[stage] = enabled; 688 enabled_uniform_buffer_masks = mask;
689 uniform_buffer_sizes = sizes;
635} 690}
636 691
637template <class P> 692template <class P>
638void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { 693void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
639 enabled_compute_uniform_buffers = enabled; 694 const ComputeUniformBufferSizes* sizes) {
695 enabled_compute_uniform_buffer_mask = mask;
696 compute_uniform_buffer_sizes = sizes;
640} 697}
641 698
642template <class P> 699template <class P>
@@ -657,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
657} 714}
658 715
659template <class P> 716template <class P>
717void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
718 enabled_texture_buffers[stage] = 0;
719 written_texture_buffers[stage] = 0;
720 image_texture_buffers[stage] = 0;
721}
722
723template <class P>
724void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
725 u32 size, PixelFormat format, bool is_written,
726 bool is_image) {
727 enabled_texture_buffers[stage] |= 1U << tbo_index;
728 written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
729 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
730 image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
731 }
732 texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
733}
734
735template <class P>
660void BufferCache<P>::UnbindComputeStorageBuffers() { 736void BufferCache<P>::UnbindComputeStorageBuffers() {
661 enabled_compute_storage_buffers = 0; 737 enabled_compute_storage_buffers = 0;
662 written_compute_storage_buffers = 0; 738 written_compute_storage_buffers = 0;
739 image_compute_texture_buffers = 0;
663} 740}
664 741
665template <class P> 742template <class P>
@@ -677,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
677} 754}
678 755
679template <class P> 756template <class P>
757void BufferCache<P>::UnbindComputeTextureBuffers() {
758 enabled_compute_texture_buffers = 0;
759 written_compute_texture_buffers = 0;
760 image_compute_texture_buffers = 0;
761}
762
763template <class P>
764void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
765 PixelFormat format, bool is_written, bool is_image) {
766 enabled_compute_texture_buffers |= 1U << tbo_index;
767 written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
768 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
769 image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
770 }
771 compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
772}
773
774template <class P>
680void BufferCache<P>::FlushCachedWrites() { 775void BufferCache<P>::FlushCachedWrites() {
681 for (const BufferId buffer_id : cached_write_buffer_ids) { 776 for (const BufferId buffer_id : cached_write_buffer_ids) {
682 slot_buffers[buffer_id].FlushCachedWrites(); 777 slot_buffers[buffer_id].FlushCachedWrites();
@@ -901,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
901 dirty = std::exchange(dirty_uniform_buffers[stage], 0); 996 dirty = std::exchange(dirty_uniform_buffers[stage], 0);
902 } 997 }
903 u32 binding_index = 0; 998 u32 binding_index = 0;
904 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 999 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
905 const bool needs_bind = ((dirty >> index) & 1) != 0; 1000 const bool needs_bind = ((dirty >> index) & 1) != 0;
906 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); 1001 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
907 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1002 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -915,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
915 bool needs_bind) { 1010 bool needs_bind) {
916 const Binding& binding = uniform_buffers[stage][index]; 1011 const Binding& binding = uniform_buffers[stage][index];
917 const VAddr cpu_addr = binding.cpu_addr; 1012 const VAddr cpu_addr = binding.cpu_addr;
918 const u32 size = binding.size; 1013 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
919 Buffer& buffer = slot_buffers[binding.buffer_id]; 1014 Buffer& buffer = slot_buffers[binding.buffer_id];
920 TouchBuffer(buffer); 1015 TouchBuffer(buffer);
921 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 1016 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
@@ -925,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
925 if constexpr (IS_OPENGL) { 1020 if constexpr (IS_OPENGL) {
926 if (runtime.HasFastBufferSubData()) { 1021 if (runtime.HasFastBufferSubData()) {
927 // Fast path for Nvidia 1022 // Fast path for Nvidia
928 if (!HasFastUniformBufferBound(stage, binding_index)) { 1023 const bool should_fast_bind =
1024 !HasFastUniformBufferBound(stage, binding_index) ||
1025 uniform_buffer_binding_sizes[stage][binding_index] != size;
1026 if (should_fast_bind) {
929 // We only have to bind when the currently bound buffer is not the fast version 1027 // We only have to bind when the currently bound buffer is not the fast version
1028 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
1029 uniform_buffer_binding_sizes[stage][binding_index] = size;
930 runtime.BindFastUniformBuffer(stage, binding_index, size); 1030 runtime.BindFastUniformBuffer(stage, binding_index, size);
931 } 1031 }
932 const auto span = ImmediateBufferWithData(cpu_addr, size); 1032 const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -934,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
934 return; 1034 return;
935 } 1035 }
936 } 1036 }
937 fast_bound_uniform_buffers[stage] |= 1U << binding_index; 1037 if constexpr (IS_OPENGL) {
938 1038 fast_bound_uniform_buffers[stage] |= 1U << binding_index;
1039 uniform_buffer_binding_sizes[stage][binding_index] = size;
1040 }
939 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 1041 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
940 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 1042 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
941 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); 1043 cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size);
@@ -948,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
948 } 1050 }
949 ++uniform_cache_shots[0]; 1051 ++uniform_cache_shots[0];
950 1052
951 if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { 1053 // Skip binding if it's not needed and if the bound buffer is not the fast version
952 // Skip binding if it's not needed and if the bound buffer is not the fast version 1054 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
953 // This exists to avoid instances where the fast buffer is bound and a GPU write happens 1055 needs_bind |= HasFastUniformBufferBound(stage, binding_index);
1056 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1057 needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size;
1058 }
1059 if (!needs_bind) {
954 return; 1060 return;
955 } 1061 }
956 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
957
958 const u32 offset = buffer.Offset(cpu_addr); 1062 const u32 offset = buffer.Offset(cpu_addr);
1063 if constexpr (IS_OPENGL) {
1064 // Fast buffer will be unbound
1065 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
1066
1067 // Mark the index as dirty if offset doesn't match
1068 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
1069 dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
1070 }
1071 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1072 uniform_buffer_binding_sizes[stage][binding_index] = size;
1073 }
959 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 1074 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
960 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); 1075 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
961 } else { 1076 } else {
@@ -985,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
985} 1100}
986 1101
987template <class P> 1102template <class P>
1103void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1104 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1105 const TextureBufferBinding& binding = texture_buffers[stage][index];
1106 Buffer& buffer = slot_buffers[binding.buffer_id];
1107 const u32 size = binding.size;
1108 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1109
1110 const u32 offset = buffer.Offset(binding.cpu_addr);
1111 const PixelFormat format = binding.format;
1112 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1113 if (((image_texture_buffers[stage] >> index) & 1) != 0) {
1114 runtime.BindImageBuffer(buffer, offset, size, format);
1115 } else {
1116 runtime.BindTextureBuffer(buffer, offset, size, format);
1117 }
1118 } else {
1119 runtime.BindTextureBuffer(buffer, offset, size, format);
1120 }
1121 });
1122}
1123
1124template <class P>
988void BufferCache<P>::BindHostTransformFeedbackBuffers() { 1125void BufferCache<P>::BindHostTransformFeedbackBuffers() {
989 if (maxwell3d.regs.tfb_enabled == 0) { 1126 if (maxwell3d.regs.tfb_enabled == 0) {
990 return; 1127 return;
@@ -1006,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1006 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1143 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1007 // Mark all uniform buffers as dirty 1144 // Mark all uniform buffers as dirty
1008 dirty_uniform_buffers.fill(~u32{0}); 1145 dirty_uniform_buffers.fill(~u32{0});
1146 fast_bound_uniform_buffers.fill(0);
1009 } 1147 }
1010 u32 binding_index = 0; 1148 u32 binding_index = 0;
1011 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1149 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1012 const Binding& binding = compute_uniform_buffers[index]; 1150 const Binding& binding = compute_uniform_buffers[index];
1013 Buffer& buffer = slot_buffers[binding.buffer_id]; 1151 Buffer& buffer = slot_buffers[binding.buffer_id];
1014 TouchBuffer(buffer); 1152 TouchBuffer(buffer);
1015 const u32 size = binding.size; 1153 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
1016 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1154 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1017 1155
1018 const u32 offset = buffer.Offset(binding.cpu_addr); 1156 const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -1047,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1047} 1185}
1048 1186
1049template <class P> 1187template <class P>
1188void BufferCache<P>::BindHostComputeTextureBuffers() {
1189 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1190 const TextureBufferBinding& binding = compute_texture_buffers[index];
1191 Buffer& buffer = slot_buffers[binding.buffer_id];
1192 const u32 size = binding.size;
1193 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1194
1195 const u32 offset = buffer.Offset(binding.cpu_addr);
1196 const PixelFormat format = binding.format;
1197 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1198 if (((image_compute_texture_buffers >> index) & 1) != 0) {
1199 runtime.BindImageBuffer(buffer, offset, size, format);
1200 } else {
1201 runtime.BindTextureBuffer(buffer, offset, size, format);
1202 }
1203 } else {
1204 runtime.BindTextureBuffer(buffer, offset, size, format);
1205 }
1206 });
1207}
1208
1209template <class P>
1050void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { 1210void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1051 if (is_indexed) { 1211 if (is_indexed) {
1052 UpdateIndexBuffer(); 1212 UpdateIndexBuffer();
@@ -1056,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
1056 for (size_t stage = 0; stage < NUM_STAGES; ++stage) { 1216 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
1057 UpdateUniformBuffers(stage); 1217 UpdateUniformBuffers(stage);
1058 UpdateStorageBuffers(stage); 1218 UpdateStorageBuffers(stage);
1219 UpdateTextureBuffers(stage);
1059 } 1220 }
1060} 1221}
1061 1222
@@ -1063,6 +1224,7 @@ template <class P>
1063void BufferCache<P>::DoUpdateComputeBuffers() { 1224void BufferCache<P>::DoUpdateComputeBuffers() {
1064 UpdateComputeUniformBuffers(); 1225 UpdateComputeUniformBuffers();
1065 UpdateComputeStorageBuffers(); 1226 UpdateComputeStorageBuffers();
1227 UpdateComputeTextureBuffers();
1066} 1228}
1067 1229
1068template <class P> 1230template <class P>
@@ -1132,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1132 1294
1133template <class P> 1295template <class P>
1134void BufferCache<P>::UpdateUniformBuffers(size_t stage) { 1296void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1135 ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { 1297 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) {
1136 Binding& binding = uniform_buffers[stage][index]; 1298 Binding& binding = uniform_buffers[stage][index];
1137 if (binding.buffer_id) { 1299 if (binding.buffer_id) {
1138 // Already updated 1300 // Already updated
@@ -1163,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1163} 1325}
1164 1326
1165template <class P> 1327template <class P>
1328void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1329 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1330 Binding& binding = texture_buffers[stage][index];
1331 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1332 // Mark buffer as written if needed
1333 if (((written_texture_buffers[stage] >> index) & 1) != 0) {
1334 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1335 }
1336 });
1337}
1338
1339template <class P>
1166void BufferCache<P>::UpdateTransformFeedbackBuffers() { 1340void BufferCache<P>::UpdateTransformFeedbackBuffers() {
1167 if (maxwell3d.regs.tfb_enabled == 0) { 1341 if (maxwell3d.regs.tfb_enabled == 0) {
1168 return; 1342 return;
@@ -1193,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1193 1367
1194template <class P> 1368template <class P>
1195void BufferCache<P>::UpdateComputeUniformBuffers() { 1369void BufferCache<P>::UpdateComputeUniformBuffers() {
1196 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 1370 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1197 Binding& binding = compute_uniform_buffers[index]; 1371 Binding& binding = compute_uniform_buffers[index];
1198 binding = NULL_BINDING; 1372 binding = NULL_BINDING;
1199 const auto& launch_desc = kepler_compute.launch_description; 1373 const auto& launch_desc = kepler_compute.launch_description;
@@ -1214,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1214 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1388 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
1215 // Resolve buffer 1389 // Resolve buffer
1216 Binding& binding = compute_storage_buffers[index]; 1390 Binding& binding = compute_storage_buffers[index];
1217 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1391 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1218 binding.buffer_id = buffer_id;
1219 // Mark as written if needed 1392 // Mark as written if needed
1220 if (((written_compute_storage_buffers >> index) & 1) != 0) { 1393 if (((written_compute_storage_buffers >> index) & 1) != 0) {
1221 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); 1394 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1395 }
1396 });
1397}
1398
1399template <class P>
1400void BufferCache<P>::UpdateComputeTextureBuffers() {
1401 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1402 Binding& binding = compute_texture_buffers[index];
1403 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1404 // Mark as written if needed
1405 if (((written_compute_texture_buffers >> index) & 1) != 0) {
1406 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1222 } 1407 }
1223 }); 1408 });
1224} 1409}
@@ -1551,6 +1736,7 @@ template <class P>
1551void BufferCache<P>::NotifyBufferDeletion() { 1736void BufferCache<P>::NotifyBufferDeletion() {
1552 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1737 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1553 dirty_uniform_buffers.fill(~u32{0}); 1738 dirty_uniform_buffers.fill(~u32{0});
1739 uniform_buffer_binding_sizes.fill({});
1554 } 1740 }
1555 auto& flags = maxwell3d.dirty.flags; 1741 auto& flags = maxwell3d.dirty.flags;
1556 flags[Dirty::IndexBuffer] = true; 1742 flags[Dirty::IndexBuffer] = true;
@@ -1578,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1578} 1764}
1579 1765
1580template <class P> 1766template <class P>
1767typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
1768 GPUVAddr gpu_addr, u32 size, PixelFormat format) {
1769 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1770 TextureBufferBinding binding;
1771 if (!cpu_addr || size == 0) {
1772 binding.cpu_addr = 0;
1773 binding.size = 0;
1774 binding.buffer_id = NULL_BUFFER_ID;
1775 binding.format = PixelFormat::Invalid;
1776 } else {
1777 binding.cpu_addr = *cpu_addr;
1778 binding.size = size;
1779 binding.buffer_id = BufferId{};
1780 binding.format = format;
1781 }
1782 return binding;
1783}
1784
1785template <class P>
1581std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { 1786std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) {
1582 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); 1787 u8* const base_pointer = cpu_memory.GetPointer(cpu_addr);
1583 if (IsRangeGranular(cpu_addr, size) || 1788 if (IsRangeGranular(cpu_addr, size) ||