summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Kelebek12023-05-27 15:45:36 +0100
committerGravatar Kelebek12023-05-27 17:04:18 +0100
commitb0bea13ed8422119cc4f09763be095cbc3762795 (patch)
treec800abba1887e405774011fffcbe7fde60fa5e46 /src/video_core/buffer_cache
parentMerge pull request #10414 from liamwhite/anv-push-descriptor (diff)
downloadyuzu-b0bea13ed8422119cc4f09763be095cbc3762795.tar.gz
yuzu-b0bea13ed8422119cc4f09763be095cbc3762795.tar.xz
yuzu-b0bea13ed8422119cc4f09763be095cbc3762795.zip
Move buffer bindings to per-channel state
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h291
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h141
3 files changed, 227 insertions, 209 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index 40db243d2..4b4f7061b 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -2,6 +2,8 @@
2// SPDX-License-Identifier: GPL-3.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#include "common/microprofile.h" 4#include "common/microprofile.h"
5#include "video_core/buffer_cache/buffer_cache_base.h"
6#include "video_core/control/channel_state_cache.inc"
5 7
6namespace VideoCommon { 8namespace VideoCommon {
7 9
@@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12
9MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); 11MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128));
10MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); 12MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128));
11 13
14template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>;
15
12} // namespace VideoCommon 16} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 65494097b..c336be707 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -64,17 +64,22 @@ void BufferCache<P>::RunGarbageCollector() {
64template <class P> 64template <class P>
65void BufferCache<P>::TickFrame() { 65void BufferCache<P>::TickFrame() {
66 // Calculate hits and shots and move hit bits to the right 66 // Calculate hits and shots and move hit bits to the right
67 const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); 67
68 const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); 68 const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(),
69 std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, 69 channel_state->uniform_cache_hits.end());
70 uniform_cache_hits.begin() + 1); 70 const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(),
71 std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, 71 channel_state->uniform_cache_shots.end());
72 uniform_cache_shots.begin() + 1); 72 std::copy_n(channel_state->uniform_cache_hits.begin(),
73 uniform_cache_hits[0] = 0; 73 channel_state->uniform_cache_hits.size() - 1,
74 uniform_cache_shots[0] = 0; 74 channel_state->uniform_cache_hits.begin() + 1);
75 std::copy_n(channel_state->uniform_cache_shots.begin(),
76 channel_state->uniform_cache_shots.size() - 1,
77 channel_state->uniform_cache_shots.begin() + 1);
78 channel_state->uniform_cache_hits[0] = 0;
79 channel_state->uniform_cache_shots[0] = 0;
75 80
76 const bool skip_preferred = hits * 256 < shots * 251; 81 const bool skip_preferred = hits * 256 < shots * 251;
77 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 82 channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
78 83
79 // If we can obtain the memory info, use it instead of the estimate. 84 // If we can obtain the memory info, use it instead of the estimate.
80 if (runtime.CanReportMemoryUsage()) { 85 if (runtime.CanReportMemoryUsage()) {
@@ -164,10 +169,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
164 BufferId buffer_a; 169 BufferId buffer_a;
165 BufferId buffer_b; 170 BufferId buffer_b;
166 do { 171 do {
167 has_deleted_buffers = false; 172 channel_state->has_deleted_buffers = false;
168 buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); 173 buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount));
169 buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); 174 buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount));
170 } while (has_deleted_buffers); 175 } while (channel_state->has_deleted_buffers);
171 auto& src_buffer = slot_buffers[buffer_a]; 176 auto& src_buffer = slot_buffers[buffer_a];
172 auto& dest_buffer = slot_buffers[buffer_b]; 177 auto& dest_buffer = slot_buffers[buffer_b];
173 SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); 178 SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
@@ -272,30 +277,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr
272 .size = size, 277 .size = size,
273 .buffer_id = BufferId{}, 278 .buffer_id = BufferId{},
274 }; 279 };
275 uniform_buffers[stage][index] = binding; 280 channel_state->uniform_buffers[stage][index] = binding;
276} 281}
277 282
278template <class P> 283template <class P>
279void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) { 284void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
280 uniform_buffers[stage][index] = NULL_BINDING; 285 channel_state->uniform_buffers[stage][index] = NULL_BINDING;
281} 286}
282 287
283template <class P> 288template <class P>
284void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { 289void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) {
285 MICROPROFILE_SCOPE(GPU_PrepareBuffers); 290 MICROPROFILE_SCOPE(GPU_PrepareBuffers);
286 do { 291 do {
287 has_deleted_buffers = false; 292 channel_state->has_deleted_buffers = false;
288 DoUpdateGraphicsBuffers(is_indexed); 293 DoUpdateGraphicsBuffers(is_indexed);
289 } while (has_deleted_buffers); 294 } while (channel_state->has_deleted_buffers);
290} 295}
291 296
292template <class P> 297template <class P>
293void BufferCache<P>::UpdateComputeBuffers() { 298void BufferCache<P>::UpdateComputeBuffers() {
294 MICROPROFILE_SCOPE(GPU_PrepareBuffers); 299 MICROPROFILE_SCOPE(GPU_PrepareBuffers);
295 do { 300 do {
296 has_deleted_buffers = false; 301 channel_state->has_deleted_buffers = false;
297 DoUpdateComputeBuffers(); 302 DoUpdateComputeBuffers();
298 } while (has_deleted_buffers); 303 } while (channel_state->has_deleted_buffers);
299} 304}
300 305
301template <class P> 306template <class P>
@@ -338,98 +343,102 @@ template <class P>
338void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, 343void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
339 const UniformBufferSizes* sizes) { 344 const UniformBufferSizes* sizes) {
340 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 345 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
341 if (enabled_uniform_buffer_masks != mask) { 346 if (channel_state->enabled_uniform_buffer_masks != mask) {
342 if constexpr (IS_OPENGL) { 347 if constexpr (IS_OPENGL) {
343 fast_bound_uniform_buffers.fill(0); 348 channel_state->fast_bound_uniform_buffers.fill(0);
344 } 349 }
345 dirty_uniform_buffers.fill(~u32{0}); 350 channel_state->dirty_uniform_buffers.fill(~u32{0});
346 uniform_buffer_binding_sizes.fill({}); 351 channel_state->uniform_buffer_binding_sizes.fill({});
347 } 352 }
348 } 353 }
349 enabled_uniform_buffer_masks = mask; 354 channel_state->enabled_uniform_buffer_masks = mask;
350 uniform_buffer_sizes = sizes; 355 channel_state->uniform_buffer_sizes = sizes;
351} 356}
352 357
353template <class P> 358template <class P>
354void BufferCache<P>::SetComputeUniformBufferState(u32 mask, 359void BufferCache<P>::SetComputeUniformBufferState(u32 mask,
355 const ComputeUniformBufferSizes* sizes) { 360 const ComputeUniformBufferSizes* sizes) {
356 enabled_compute_uniform_buffer_mask = mask; 361 channel_state->enabled_compute_uniform_buffer_mask = mask;
357 compute_uniform_buffer_sizes = sizes; 362 channel_state->compute_uniform_buffer_sizes = sizes;
358} 363}
359 364
360template <class P> 365template <class P>
361void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { 366void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) {
362 enabled_storage_buffers[stage] = 0; 367 channel_state->enabled_storage_buffers[stage] = 0;
363 written_storage_buffers[stage] = 0; 368 channel_state->written_storage_buffers[stage] = 0;
364} 369}
365 370
366template <class P> 371template <class P>
367void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, 372void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index,
368 u32 cbuf_offset, bool is_written) { 373 u32 cbuf_offset, bool is_written) {
369 enabled_storage_buffers[stage] |= 1U << ssbo_index; 374 channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index;
370 written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; 375 channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
371 376
372 const auto& cbufs = maxwell3d->state.shader_stages[stage]; 377 const auto& cbufs = maxwell3d->state.shader_stages[stage];
373 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; 378 const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
374 storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); 379 channel_state->storage_buffers[stage][ssbo_index] =
380 StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
375} 381}
376 382
377template <class P> 383template <class P>
378void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { 384void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
379 enabled_texture_buffers[stage] = 0; 385 channel_state->enabled_texture_buffers[stage] = 0;
380 written_texture_buffers[stage] = 0; 386 channel_state->written_texture_buffers[stage] = 0;
381 image_texture_buffers[stage] = 0; 387 channel_state->image_texture_buffers[stage] = 0;
382} 388}
383 389
384template <class P> 390template <class P>
385void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, 391void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
386 u32 size, PixelFormat format, bool is_written, 392 u32 size, PixelFormat format, bool is_written,
387 bool is_image) { 393 bool is_image) {
388 enabled_texture_buffers[stage] |= 1U << tbo_index; 394 channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index;
389 written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; 395 channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
390 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 396 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
391 image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; 397 channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
392 } 398 }
393 texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); 399 channel_state->texture_buffers[stage][tbo_index] =
400 GetTextureBufferBinding(gpu_addr, size, format);
394} 401}
395 402
396template <class P> 403template <class P>
397void BufferCache<P>::UnbindComputeStorageBuffers() { 404void BufferCache<P>::UnbindComputeStorageBuffers() {
398 enabled_compute_storage_buffers = 0; 405 channel_state->enabled_compute_storage_buffers = 0;
399 written_compute_storage_buffers = 0; 406 channel_state->written_compute_storage_buffers = 0;
400 image_compute_texture_buffers = 0; 407 channel_state->image_compute_texture_buffers = 0;
401} 408}
402 409
403template <class P> 410template <class P>
404void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, 411void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
405 bool is_written) { 412 bool is_written) {
406 enabled_compute_storage_buffers |= 1U << ssbo_index; 413 channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index;
407 written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; 414 channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
408 415
409 const auto& launch_desc = kepler_compute->launch_description; 416 const auto& launch_desc = kepler_compute->launch_description;
410 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); 417 ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
411 418
412 const auto& cbufs = launch_desc.const_buffer_config; 419 const auto& cbufs = launch_desc.const_buffer_config;
413 const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; 420 const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset;
414 compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); 421 channel_state->compute_storage_buffers[ssbo_index] =
422 StorageBufferBinding(ssbo_addr, cbuf_index, is_written);
415} 423}
416 424
417template <class P> 425template <class P>
418void BufferCache<P>::UnbindComputeTextureBuffers() { 426void BufferCache<P>::UnbindComputeTextureBuffers() {
419 enabled_compute_texture_buffers = 0; 427 channel_state->enabled_compute_texture_buffers = 0;
420 written_compute_texture_buffers = 0; 428 channel_state->written_compute_texture_buffers = 0;
421 image_compute_texture_buffers = 0; 429 channel_state->image_compute_texture_buffers = 0;
422} 430}
423 431
424template <class P> 432template <class P>
425void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, 433void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
426 PixelFormat format, bool is_written, bool is_image) { 434 PixelFormat format, bool is_written, bool is_image) {
427 enabled_compute_texture_buffers |= 1U << tbo_index; 435 channel_state->enabled_compute_texture_buffers |= 1U << tbo_index;
428 written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; 436 channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
429 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 437 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
430 image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; 438 channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
431 } 439 }
432 compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); 440 channel_state->compute_texture_buffers[tbo_index] =
441 GetTextureBufferBinding(gpu_addr, size, format);
433} 442}
434 443
435template <class P> 444template <class P>
@@ -672,10 +681,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
672 681
673template <class P> 682template <class P>
674void BufferCache<P>::BindHostIndexBuffer() { 683void BufferCache<P>::BindHostIndexBuffer() {
675 Buffer& buffer = slot_buffers[index_buffer.buffer_id]; 684 Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id];
676 TouchBuffer(buffer, index_buffer.buffer_id); 685 TouchBuffer(buffer, channel_state->index_buffer.buffer_id);
677 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 686 const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr);
678 const u32 size = index_buffer.size; 687 const u32 size = channel_state->index_buffer.size;
679 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 688 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
680 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { 689 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
681 if constexpr (USE_MEMORY_MAPS) { 690 if constexpr (USE_MEMORY_MAPS) {
@@ -689,7 +698,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
689 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); 698 buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
690 } 699 }
691 } else { 700 } else {
692 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 701 SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size);
693 } 702 }
694 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { 703 if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
695 const u32 new_offset = 704 const u32 new_offset =
@@ -706,7 +715,7 @@ template <class P>
706void BufferCache<P>::BindHostVertexBuffers() { 715void BufferCache<P>::BindHostVertexBuffers() {
707 auto& flags = maxwell3d->dirty.flags; 716 auto& flags = maxwell3d->dirty.flags;
708 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 717 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
709 const Binding& binding = vertex_buffers[index]; 718 const Binding& binding = channel_state->vertex_buffers[index];
710 Buffer& buffer = slot_buffers[binding.buffer_id]; 719 Buffer& buffer = slot_buffers[binding.buffer_id];
711 TouchBuffer(buffer, binding.buffer_id); 720 TouchBuffer(buffer, binding.buffer_id);
712 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 721 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
@@ -729,19 +738,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() {
729 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 738 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
730 }; 739 };
731 if (current_draw_indirect->include_count) { 740 if (current_draw_indirect->include_count) {
732 bind_buffer(count_buffer_binding); 741 bind_buffer(channel_state->count_buffer_binding);
733 } 742 }
734 bind_buffer(indirect_buffer_binding); 743 bind_buffer(channel_state->indirect_buffer_binding);
735} 744}
736 745
737template <class P> 746template <class P>
738void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { 747void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
739 u32 dirty = ~0U; 748 u32 dirty = ~0U;
740 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 749 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
741 dirty = std::exchange(dirty_uniform_buffers[stage], 0); 750 dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0);
742 } 751 }
743 u32 binding_index = 0; 752 u32 binding_index = 0;
744 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { 753 ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
745 const bool needs_bind = ((dirty >> index) & 1) != 0; 754 const bool needs_bind = ((dirty >> index) & 1) != 0;
746 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); 755 BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind);
747 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 756 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
@@ -753,13 +762,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) {
753template <class P> 762template <class P>
754void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, 763void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index,
755 bool needs_bind) { 764 bool needs_bind) {
756 const Binding& binding = uniform_buffers[stage][index]; 765 const Binding& binding = channel_state->uniform_buffers[stage][index];
757 const VAddr cpu_addr = binding.cpu_addr; 766 const VAddr cpu_addr = binding.cpu_addr;
758 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); 767 const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]);
759 Buffer& buffer = slot_buffers[binding.buffer_id]; 768 Buffer& buffer = slot_buffers[binding.buffer_id];
760 TouchBuffer(buffer, binding.buffer_id); 769 TouchBuffer(buffer, binding.buffer_id);
761 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 770 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
762 size <= uniform_buffer_skip_cache_size && 771 size <= channel_state->uniform_buffer_skip_cache_size &&
763 !memory_tracker.IsRegionGpuModified(cpu_addr, size); 772 !memory_tracker.IsRegionGpuModified(cpu_addr, size);
764 if (use_fast_buffer) { 773 if (use_fast_buffer) {
765 if constexpr (IS_OPENGL) { 774 if constexpr (IS_OPENGL) {
@@ -767,11 +776,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
767 // Fast path for Nvidia 776 // Fast path for Nvidia
768 const bool should_fast_bind = 777 const bool should_fast_bind =
769 !HasFastUniformBufferBound(stage, binding_index) || 778 !HasFastUniformBufferBound(stage, binding_index) ||
770 uniform_buffer_binding_sizes[stage][binding_index] != size; 779 channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
771 if (should_fast_bind) { 780 if (should_fast_bind) {
772 // We only have to bind when the currently bound buffer is not the fast version 781 // We only have to bind when the currently bound buffer is not the fast version
773 fast_bound_uniform_buffers[stage] |= 1U << binding_index; 782 channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
774 uniform_buffer_binding_sizes[stage][binding_index] = size; 783 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
775 runtime.BindFastUniformBuffer(stage, binding_index, size); 784 runtime.BindFastUniformBuffer(stage, binding_index, size);
776 } 785 }
777 const auto span = ImmediateBufferWithData(cpu_addr, size); 786 const auto span = ImmediateBufferWithData(cpu_addr, size);
@@ -780,8 +789,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
780 } 789 }
781 } 790 }
782 if constexpr (IS_OPENGL) { 791 if constexpr (IS_OPENGL) {
783 fast_bound_uniform_buffers[stage] |= 1U << binding_index; 792 channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
784 uniform_buffer_binding_sizes[stage][binding_index] = size; 793 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
785 } 794 }
786 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan 795 // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
787 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); 796 const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
@@ -791,15 +800,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
791 // Classic cached path 800 // Classic cached path
792 const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); 801 const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
793 if (sync_cached) { 802 if (sync_cached) {
794 ++uniform_cache_hits[0]; 803 ++channel_state->uniform_cache_hits[0];
795 } 804 }
796 ++uniform_cache_shots[0]; 805 ++channel_state->uniform_cache_shots[0];
797 806
798 // Skip binding if it's not needed and if the bound buffer is not the fast version 807 // Skip binding if it's not needed and if the bound buffer is not the fast version
799 // This exists to avoid instances where the fast buffer is bound and a GPU write happens 808 // This exists to avoid instances where the fast buffer is bound and a GPU write happens
800 needs_bind |= HasFastUniformBufferBound(stage, binding_index); 809 needs_bind |= HasFastUniformBufferBound(stage, binding_index);
801 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 810 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
802 needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; 811 needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
803 } 812 }
804 if (!needs_bind) { 813 if (!needs_bind) {
805 return; 814 return;
@@ -807,14 +816,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
807 const u32 offset = buffer.Offset(cpu_addr); 816 const u32 offset = buffer.Offset(cpu_addr);
808 if constexpr (IS_OPENGL) { 817 if constexpr (IS_OPENGL) {
809 // Fast buffer will be unbound 818 // Fast buffer will be unbound
810 fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); 819 channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
811 820
812 // Mark the index as dirty if offset doesn't match 821 // Mark the index as dirty if offset doesn't match
813 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); 822 const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
814 dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; 823 channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
815 } 824 }
816 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 825 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
817 uniform_buffer_binding_sizes[stage][binding_index] = size; 826 channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
818 } 827 }
819 if constexpr (NEEDS_BIND_UNIFORM_INDEX) { 828 if constexpr (NEEDS_BIND_UNIFORM_INDEX) {
820 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); 829 runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size);
@@ -826,15 +835,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
826template <class P> 835template <class P>
827void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { 836void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
828 u32 binding_index = 0; 837 u32 binding_index = 0;
829 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { 838 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
830 const Binding& binding = storage_buffers[stage][index]; 839 const Binding& binding = channel_state->storage_buffers[stage][index];
831 Buffer& buffer = slot_buffers[binding.buffer_id]; 840 Buffer& buffer = slot_buffers[binding.buffer_id];
832 TouchBuffer(buffer, binding.buffer_id); 841 TouchBuffer(buffer, binding.buffer_id);
833 const u32 size = binding.size; 842 const u32 size = binding.size;
834 SynchronizeBuffer(buffer, binding.cpu_addr, size); 843 SynchronizeBuffer(buffer, binding.cpu_addr, size);
835 844
836 const u32 offset = buffer.Offset(binding.cpu_addr); 845 const u32 offset = buffer.Offset(binding.cpu_addr);
837 const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; 846 const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
838 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 847 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
839 runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); 848 runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
840 ++binding_index; 849 ++binding_index;
@@ -846,8 +855,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
846 855
847template <class P> 856template <class P>
848void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { 857void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
849 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { 858 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
850 const TextureBufferBinding& binding = texture_buffers[stage][index]; 859 const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index];
851 Buffer& buffer = slot_buffers[binding.buffer_id]; 860 Buffer& buffer = slot_buffers[binding.buffer_id];
852 const u32 size = binding.size; 861 const u32 size = binding.size;
853 SynchronizeBuffer(buffer, binding.cpu_addr, size); 862 SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -855,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
855 const u32 offset = buffer.Offset(binding.cpu_addr); 864 const u32 offset = buffer.Offset(binding.cpu_addr);
856 const PixelFormat format = binding.format; 865 const PixelFormat format = binding.format;
857 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 866 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
858 if (((image_texture_buffers[stage] >> index) & 1) != 0) { 867 if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) {
859 runtime.BindImageBuffer(buffer, offset, size, format); 868 runtime.BindImageBuffer(buffer, offset, size, format);
860 } else { 869 } else {
861 runtime.BindTextureBuffer(buffer, offset, size, format); 870 runtime.BindTextureBuffer(buffer, offset, size, format);
@@ -872,7 +881,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
872 return; 881 return;
873 } 882 }
874 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 883 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
875 const Binding& binding = transform_feedback_buffers[index]; 884 const Binding& binding = channel_state->transform_feedback_buffers[index];
876 Buffer& buffer = slot_buffers[binding.buffer_id]; 885 Buffer& buffer = slot_buffers[binding.buffer_id];
877 TouchBuffer(buffer, binding.buffer_id); 886 TouchBuffer(buffer, binding.buffer_id);
878 const u32 size = binding.size; 887 const u32 size = binding.size;
@@ -887,15 +896,16 @@ template <class P>
887void BufferCache<P>::BindHostComputeUniformBuffers() { 896void BufferCache<P>::BindHostComputeUniformBuffers() {
888 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 897 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
889 // Mark all uniform buffers as dirty 898 // Mark all uniform buffers as dirty
890 dirty_uniform_buffers.fill(~u32{0}); 899 channel_state->dirty_uniform_buffers.fill(~u32{0});
891 fast_bound_uniform_buffers.fill(0); 900 channel_state->fast_bound_uniform_buffers.fill(0);
892 } 901 }
893 u32 binding_index = 0; 902 u32 binding_index = 0;
894 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { 903 ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
895 const Binding& binding = compute_uniform_buffers[index]; 904 const Binding& binding = channel_state->compute_uniform_buffers[index];
896 Buffer& buffer = slot_buffers[binding.buffer_id]; 905 Buffer& buffer = slot_buffers[binding.buffer_id];
897 TouchBuffer(buffer, binding.buffer_id); 906 TouchBuffer(buffer, binding.buffer_id);
898 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); 907 const u32 size =
908 std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]);
899 SynchronizeBuffer(buffer, binding.cpu_addr, size); 909 SynchronizeBuffer(buffer, binding.cpu_addr, size);
900 910
901 const u32 offset = buffer.Offset(binding.cpu_addr); 911 const u32 offset = buffer.Offset(binding.cpu_addr);
@@ -911,15 +921,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
911template <class P> 921template <class P>
912void BufferCache<P>::BindHostComputeStorageBuffers() { 922void BufferCache<P>::BindHostComputeStorageBuffers() {
913 u32 binding_index = 0; 923 u32 binding_index = 0;
914 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 924 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
915 const Binding& binding = compute_storage_buffers[index]; 925 const Binding& binding = channel_state->compute_storage_buffers[index];
916 Buffer& buffer = slot_buffers[binding.buffer_id]; 926 Buffer& buffer = slot_buffers[binding.buffer_id];
917 TouchBuffer(buffer, binding.buffer_id); 927 TouchBuffer(buffer, binding.buffer_id);
918 const u32 size = binding.size; 928 const u32 size = binding.size;
919 SynchronizeBuffer(buffer, binding.cpu_addr, size); 929 SynchronizeBuffer(buffer, binding.cpu_addr, size);
920 930
921 const u32 offset = buffer.Offset(binding.cpu_addr); 931 const u32 offset = buffer.Offset(binding.cpu_addr);
922 const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; 932 const bool is_written =
933 ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
923 if constexpr (NEEDS_BIND_STORAGE_INDEX) { 934 if constexpr (NEEDS_BIND_STORAGE_INDEX) {
924 runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); 935 runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
925 ++binding_index; 936 ++binding_index;
@@ -931,8 +942,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
931 942
932template <class P> 943template <class P>
933void BufferCache<P>::BindHostComputeTextureBuffers() { 944void BufferCache<P>::BindHostComputeTextureBuffers() {
934 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { 945 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
935 const TextureBufferBinding& binding = compute_texture_buffers[index]; 946 const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index];
936 Buffer& buffer = slot_buffers[binding.buffer_id]; 947 Buffer& buffer = slot_buffers[binding.buffer_id];
937 const u32 size = binding.size; 948 const u32 size = binding.size;
938 SynchronizeBuffer(buffer, binding.cpu_addr, size); 949 SynchronizeBuffer(buffer, binding.cpu_addr, size);
@@ -940,7 +951,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
940 const u32 offset = buffer.Offset(binding.cpu_addr); 951 const u32 offset = buffer.Offset(binding.cpu_addr);
941 const PixelFormat format = binding.format; 952 const PixelFormat format = binding.format;
942 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { 953 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
943 if (((image_compute_texture_buffers >> index) & 1) != 0) { 954 if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) {
944 runtime.BindImageBuffer(buffer, offset, size, format); 955 runtime.BindImageBuffer(buffer, offset, size, format);
945 } else { 956 } else {
946 runtime.BindTextureBuffer(buffer, offset, size, format); 957 runtime.BindTextureBuffer(buffer, offset, size, format);
@@ -954,7 +965,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
954template <class P> 965template <class P>
955void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { 966void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
956 do { 967 do {
957 has_deleted_buffers = false; 968 channel_state->has_deleted_buffers = false;
958 if (is_indexed) { 969 if (is_indexed) {
959 UpdateIndexBuffer(); 970 UpdateIndexBuffer();
960 } 971 }
@@ -968,7 +979,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) {
968 if (current_draw_indirect) { 979 if (current_draw_indirect) {
969 UpdateDrawIndirect(); 980 UpdateDrawIndirect();
970 } 981 }
971 } while (has_deleted_buffers); 982 } while (channel_state->has_deleted_buffers);
972} 983}
973 984
974template <class P> 985template <class P>
@@ -999,7 +1010,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
999 slot_buffers.erase(inline_buffer_id); 1010 slot_buffers.erase(inline_buffer_id);
1000 inline_buffer_id = CreateBuffer(0, buffer_size); 1011 inline_buffer_id = CreateBuffer(0, buffer_size);
1001 } 1012 }
1002 index_buffer = Binding{ 1013 channel_state->index_buffer = Binding{
1003 .cpu_addr = 0, 1014 .cpu_addr = 0,
1004 .size = inline_index_size, 1015 .size = inline_index_size,
1005 .buffer_id = inline_buffer_id, 1016 .buffer_id = inline_buffer_id,
@@ -1015,10 +1026,10 @@ void BufferCache<P>::UpdateIndexBuffer() {
1015 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); 1026 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1016 const u32 size = std::min(address_size, draw_size); 1027 const u32 size = std::min(address_size, draw_size);
1017 if (size == 0 || !cpu_addr) { 1028 if (size == 0 || !cpu_addr) {
1018 index_buffer = NULL_BINDING; 1029 channel_state->index_buffer = NULL_BINDING;
1019 return; 1030 return;
1020 } 1031 }
1021 index_buffer = Binding{ 1032 channel_state->index_buffer = Binding{
1022 .cpu_addr = *cpu_addr, 1033 .cpu_addr = *cpu_addr,
1023 .size = size, 1034 .size = size,
1024 .buffer_id = FindBuffer(*cpu_addr, size), 1035 .buffer_id = FindBuffer(*cpu_addr, size),
@@ -1051,13 +1062,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1051 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1062 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1052 u32 size = address_size; // TODO: Analyze stride and number of vertices 1063 u32 size = address_size; // TODO: Analyze stride and number of vertices
1053 if (array.enable == 0 || size == 0 || !cpu_addr) { 1064 if (array.enable == 0 || size == 0 || !cpu_addr) {
1054 vertex_buffers[index] = NULL_BINDING; 1065 channel_state->vertex_buffers[index] = NULL_BINDING;
1055 return; 1066 return;
1056 } 1067 }
1057 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1068 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1058 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); 1069 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1059 } 1070 }
1060 vertex_buffers[index] = Binding{ 1071 channel_state->vertex_buffers[index] = Binding{
1061 .cpu_addr = *cpu_addr, 1072 .cpu_addr = *cpu_addr,
1062 .size = size, 1073 .size = size,
1063 .buffer_id = FindBuffer(*cpu_addr, size), 1074 .buffer_id = FindBuffer(*cpu_addr, size),
@@ -1079,23 +1090,24 @@ void BufferCache<P>::UpdateDrawIndirect() {
1079 }; 1090 };
1080 }; 1091 };
1081 if (current_draw_indirect->include_count) { 1092 if (current_draw_indirect->include_count) {
1082 update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); 1093 update(current_draw_indirect->count_start_address, sizeof(u32),
1094 channel_state->count_buffer_binding);
1083 } 1095 }
1084 update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, 1096 update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size,
1085 indirect_buffer_binding); 1097 channel_state->indirect_buffer_binding);
1086} 1098}
1087 1099
1088template <class P> 1100template <class P>
1089void BufferCache<P>::UpdateUniformBuffers(size_t stage) { 1101void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1090 ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { 1102 ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) {
1091 Binding& binding = uniform_buffers[stage][index]; 1103 Binding& binding = channel_state->uniform_buffers[stage][index];
1092 if (binding.buffer_id) { 1104 if (binding.buffer_id) {
1093 // Already updated 1105 // Already updated
1094 return; 1106 return;
1095 } 1107 }
1096 // Mark as dirty 1108 // Mark as dirty
1097 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1109 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1098 dirty_uniform_buffers[stage] |= 1U << index; 1110 channel_state->dirty_uniform_buffers[stage] |= 1U << index;
1099 } 1111 }
1100 // Resolve buffer 1112 // Resolve buffer
1101 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1113 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
@@ -1104,10 +1116,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
1104 1116
1105template <class P> 1117template <class P>
1106void BufferCache<P>::UpdateStorageBuffers(size_t stage) { 1118void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1107 const u32 written_mask = written_storage_buffers[stage]; 1119 const u32 written_mask = channel_state->written_storage_buffers[stage];
1108 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { 1120 ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
1109 // Resolve buffer 1121 // Resolve buffer
1110 Binding& binding = storage_buffers[stage][index]; 1122 Binding& binding = channel_state->storage_buffers[stage][index];
1111 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1123 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1112 binding.buffer_id = buffer_id; 1124 binding.buffer_id = buffer_id;
1113 // Mark buffer as written if needed 1125 // Mark buffer as written if needed
@@ -1119,11 +1131,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
1119 1131
1120template <class P> 1132template <class P>
1121void BufferCache<P>::UpdateTextureBuffers(size_t stage) { 1133void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
1122 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { 1134 ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
1123 Binding& binding = texture_buffers[stage][index]; 1135 Binding& binding = channel_state->texture_buffers[stage][index];
1124 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1136 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1125 // Mark buffer as written if needed 1137 // Mark buffer as written if needed
1126 if (((written_texture_buffers[stage] >> index) & 1) != 0) { 1138 if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
1127 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); 1139 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1128 } 1140 }
1129 }); 1141 });
@@ -1146,11 +1158,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1146 const u32 size = binding.size; 1158 const u32 size = binding.size;
1147 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1159 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1148 if (binding.enable == 0 || size == 0 || !cpu_addr) { 1160 if (binding.enable == 0 || size == 0 || !cpu_addr) {
1149 transform_feedback_buffers[index] = NULL_BINDING; 1161 channel_state->transform_feedback_buffers[index] = NULL_BINDING;
1150 return; 1162 return;
1151 } 1163 }
1152 const BufferId buffer_id = FindBuffer(*cpu_addr, size); 1164 const BufferId buffer_id = FindBuffer(*cpu_addr, size);
1153 transform_feedback_buffers[index] = Binding{ 1165 channel_state->transform_feedback_buffers[index] = Binding{
1154 .cpu_addr = *cpu_addr, 1166 .cpu_addr = *cpu_addr,
1155 .size = size, 1167 .size = size,
1156 .buffer_id = buffer_id, 1168 .buffer_id = buffer_id,
@@ -1160,8 +1172,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
1160 1172
1161template <class P> 1173template <class P>
1162void BufferCache<P>::UpdateComputeUniformBuffers() { 1174void BufferCache<P>::UpdateComputeUniformBuffers() {
1163 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { 1175 ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) {
1164 Binding& binding = compute_uniform_buffers[index]; 1176 Binding& binding = channel_state->compute_uniform_buffers[index];
1165 binding = NULL_BINDING; 1177 binding = NULL_BINDING;
1166 const auto& launch_desc = kepler_compute->launch_description; 1178 const auto& launch_desc = kepler_compute->launch_description;
1167 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { 1179 if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
@@ -1178,12 +1190,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
1178 1190
1179template <class P> 1191template <class P>
1180void BufferCache<P>::UpdateComputeStorageBuffers() { 1192void BufferCache<P>::UpdateComputeStorageBuffers() {
1181 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1193 ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) {
1182 // Resolve buffer 1194 // Resolve buffer
1183 Binding& binding = compute_storage_buffers[index]; 1195 Binding& binding = channel_state->compute_storage_buffers[index];
1184 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1196 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1185 // Mark as written if needed 1197 // Mark as written if needed
1186 if (((written_compute_storage_buffers >> index) & 1) != 0) { 1198 if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
1187 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); 1199 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1188 } 1200 }
1189 }); 1201 });
@@ -1191,11 +1203,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1191 1203
1192template <class P> 1204template <class P>
1193void BufferCache<P>::UpdateComputeTextureBuffers() { 1205void BufferCache<P>::UpdateComputeTextureBuffers() {
1194 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { 1206 ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
1195 Binding& binding = compute_texture_buffers[index]; 1207 Binding& binding = channel_state->compute_texture_buffers[index];
1196 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1208 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1197 // Mark as written if needed 1209 // Mark as written if needed
1198 if (((written_compute_texture_buffers >> index) & 1) != 0) { 1210 if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
1199 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); 1211 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1200 } 1212 }
1201 }); 1213 });
@@ -1610,13 +1622,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
1610 const auto replace = [scalar_replace](std::span<Binding> bindings) { 1622 const auto replace = [scalar_replace](std::span<Binding> bindings) {
1611 std::ranges::for_each(bindings, scalar_replace); 1623 std::ranges::for_each(bindings, scalar_replace);
1612 }; 1624 };
1613 scalar_replace(index_buffer); 1625 scalar_replace(channel_state->index_buffer);
1614 replace(vertex_buffers); 1626 replace(channel_state->vertex_buffers);
1615 std::ranges::for_each(uniform_buffers, replace); 1627 std::ranges::for_each(channel_state->uniform_buffers, replace);
1616 std::ranges::for_each(storage_buffers, replace); 1628 std::ranges::for_each(channel_state->storage_buffers, replace);
1617 replace(transform_feedback_buffers); 1629 replace(channel_state->transform_feedback_buffers);
1618 replace(compute_uniform_buffers); 1630 replace(channel_state->compute_uniform_buffers);
1619 replace(compute_storage_buffers); 1631 replace(channel_state->compute_storage_buffers);
1620 1632
1621 // Mark the whole buffer as CPU written to stop tracking CPU writes 1633 // Mark the whole buffer as CPU written to stop tracking CPU writes
1622 if (!do_not_mark) { 1634 if (!do_not_mark) {
@@ -1634,8 +1646,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
1634template <class P> 1646template <class P>
1635void BufferCache<P>::NotifyBufferDeletion() { 1647void BufferCache<P>::NotifyBufferDeletion() {
1636 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { 1648 if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
1637 dirty_uniform_buffers.fill(~u32{0}); 1649 channel_state->dirty_uniform_buffers.fill(~u32{0});
1638 uniform_buffer_binding_sizes.fill({}); 1650 channel_state->uniform_buffer_binding_sizes.fill({});
1639 } 1651 }
1640 auto& flags = maxwell3d->dirty.flags; 1652 auto& flags = maxwell3d->dirty.flags;
1641 flags[Dirty::IndexBuffer] = true; 1653 flags[Dirty::IndexBuffer] = true;
@@ -1643,13 +1655,12 @@ void BufferCache<P>::NotifyBufferDeletion() {
1643 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 1655 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
1644 flags[Dirty::VertexBuffer0 + index] = true; 1656 flags[Dirty::VertexBuffer0 + index] = true;
1645 } 1657 }
1646 has_deleted_buffers = true; 1658 channel_state->has_deleted_buffers = true;
1647} 1659}
1648 1660
1649template <class P> 1661template <class P>
1650typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, 1662Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
1651 u32 cbuf_index, 1663 bool is_written) const {
1652 bool is_written) const {
1653 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); 1664 const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
1654 const auto size = [&]() { 1665 const auto size = [&]() {
1655 const bool is_nvn_cbuf = cbuf_index == 0; 1666 const bool is_nvn_cbuf = cbuf_index == 0;
@@ -1681,8 +1692,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1681} 1692}
1682 1693
1683template <class P> 1694template <class P>
1684typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( 1695TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
1685 GPUVAddr gpu_addr, u32 size, PixelFormat format) { 1696 PixelFormat format) {
1686 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1697 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1687 TextureBufferBinding binding; 1698 TextureBufferBinding binding;
1688 if (!cpu_addr || size == 0) { 1699 if (!cpu_addr || size == 0) {
@@ -1721,7 +1732,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
1721template <class P> 1732template <class P>
1722bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { 1733bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
1723 if constexpr (IS_OPENGL) { 1734 if constexpr (IS_OPENGL) {
1724 return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; 1735 return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
1725 } else { 1736 } else {
1726 // Only OpenGL has fast uniform buffers 1737 // Only OpenGL has fast uniform buffers
1727 return false; 1738 return false;
@@ -1730,14 +1741,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index)
1730 1741
1731template <class P> 1742template <class P>
1732std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { 1743std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() {
1733 auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; 1744 auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id];
1734 return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); 1745 return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr));
1735} 1746}
1736 1747
1737template <class P> 1748template <class P>
1738std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { 1749std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() {
1739 auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; 1750 auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id];
1740 return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); 1751 return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr));
1741} 1752}
1742 1753
1743} // namespace VideoCommon 1754} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index ac00d4d9d..c689fe06b 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 {
86 MarkQuery = 3, 86 MarkQuery = 3,
87}; 87};
88 88
89template <typename P> 89static constexpr BufferId NULL_BUFFER_ID{0};
90class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 90static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
91
92struct Binding {
93 VAddr cpu_addr{};
94 u32 size{};
95 BufferId buffer_id;
96};
97
98struct TextureBufferBinding : Binding {
99 PixelFormat format;
100};
101
102static constexpr Binding NULL_BINDING{
103 .cpu_addr = 0,
104 .size = 0,
105 .buffer_id = NULL_BUFFER_ID,
106};
107
108class BufferCacheChannelInfo : public ChannelInfo {
109public:
110 BufferCacheChannelInfo() = delete;
111 BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {}
112 BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete;
113 BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete;
114
115 Binding index_buffer;
116 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
117 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
118 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
119 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
120 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
121 Binding count_buffer_binding;
122 Binding indirect_buffer_binding;
123
124 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
125 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
126 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
127
128 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
129 u32 enabled_compute_uniform_buffer_mask = 0;
130
131 const UniformBufferSizes* uniform_buffer_sizes{};
132 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
133
134 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
135 std::array<u32, NUM_STAGES> written_storage_buffers{};
136 u32 enabled_compute_storage_buffers = 0;
137 u32 written_compute_storage_buffers = 0;
138
139 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
140 std::array<u32, NUM_STAGES> written_texture_buffers{};
141 std::array<u32, NUM_STAGES> image_texture_buffers{};
142 u32 enabled_compute_texture_buffers = 0;
143 u32 written_compute_texture_buffers = 0;
144 u32 image_compute_texture_buffers = 0;
145
146 std::array<u32, 16> uniform_cache_hits{};
147 std::array<u32, 16> uniform_cache_shots{};
148
149 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
150
151 bool has_deleted_buffers = false;
152
153 std::array<u32, NUM_STAGES> dirty_uniform_buffers{};
154 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
155 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>
156 uniform_buffer_binding_sizes{};
157};
158
159template <class P>
160class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> {
91 // Page size for caching purposes. 161 // Page size for caching purposes.
92 // This is unrelated to the CPU page size and it can be changed as it seems optimal. 162 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
93 static constexpr u32 CACHING_PAGEBITS = 16; 163 static constexpr u32 CACHING_PAGEBITS = 16;
@@ -104,8 +174,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
104 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; 174 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
105 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; 175 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
106 176
107 static constexpr BufferId NULL_BUFFER_ID{0};
108
109 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; 177 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
110 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; 178 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
111 static constexpr s64 TARGET_THRESHOLD = 4_GiB; 179 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
@@ -149,8 +217,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
149 using OverlapSection = boost::icl::inter_section<int>; 217 using OverlapSection = boost::icl::inter_section<int>;
150 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 218 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
151 219
152 struct Empty {};
153
154 struct OverlapResult { 220 struct OverlapResult {
155 std::vector<BufferId> ids; 221 std::vector<BufferId> ids;
156 VAddr begin; 222 VAddr begin;
@@ -158,25 +224,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
158 bool has_stream_leap = false; 224 bool has_stream_leap = false;
159 }; 225 };
160 226
161 struct Binding {
162 VAddr cpu_addr{};
163 u32 size{};
164 BufferId buffer_id;
165 };
166
167 struct TextureBufferBinding : Binding {
168 PixelFormat format;
169 };
170
171 static constexpr Binding NULL_BINDING{
172 .cpu_addr = 0,
173 .size = 0,
174 .buffer_id = NULL_BUFFER_ID,
175 };
176
177public: 227public:
178 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
179
180 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, 228 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
181 Core::Memory::Memory& cpu_memory_, Runtime& runtime_); 229 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
182 230
@@ -496,51 +544,6 @@ private:
496 544
497 u32 last_index_count = 0; 545 u32 last_index_count = 0;
498 546
499 Binding index_buffer;
500 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
501 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
502 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
503 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
504 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
505 Binding count_buffer_binding;
506 Binding indirect_buffer_binding;
507
508 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
509 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
510 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
511
512 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
513 u32 enabled_compute_uniform_buffer_mask = 0;
514
515 const UniformBufferSizes* uniform_buffer_sizes{};
516 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
517
518 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
519 std::array<u32, NUM_STAGES> written_storage_buffers{};
520 u32 enabled_compute_storage_buffers = 0;
521 u32 written_compute_storage_buffers = 0;
522
523 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
524 std::array<u32, NUM_STAGES> written_texture_buffers{};
525 std::array<u32, NUM_STAGES> image_texture_buffers{};
526 u32 enabled_compute_texture_buffers = 0;
527 u32 written_compute_texture_buffers = 0;
528 u32 image_compute_texture_buffers = 0;
529
530 std::array<u32, 16> uniform_cache_hits{};
531 std::array<u32, 16> uniform_cache_shots{};
532
533 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
534
535 bool has_deleted_buffers = false;
536
537 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
538 dirty_uniform_buffers{};
539 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
540 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
541 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
542 uniform_buffer_binding_sizes{};
543
544 MemoryTracker memory_tracker; 547 MemoryTracker memory_tracker;
545 IntervalSet uncommitted_ranges; 548 IntervalSet uncommitted_ranges;
546 IntervalSet common_ranges; 549 IntervalSet common_ranges;