diff options
| author | 2023-05-27 15:45:36 +0100 | |
|---|---|---|
| committer | 2023-05-27 17:04:18 +0100 | |
| commit | b0bea13ed8422119cc4f09763be095cbc3762795 (patch) | |
| tree | c800abba1887e405774011fffcbe7fde60fa5e46 /src/video_core/buffer_cache | |
| parent | Merge pull request #10414 from liamwhite/anv-push-descriptor (diff) | |
| download | yuzu-b0bea13ed8422119cc4f09763be095cbc3762795.tar.gz yuzu-b0bea13ed8422119cc4f09763be095cbc3762795.tar.xz yuzu-b0bea13ed8422119cc4f09763be095cbc3762795.zip | |
Move buffer bindings to per-channel state
Diffstat (limited to 'src/video_core/buffer_cache')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 291 | ||||
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache_base.h | 141 |
3 files changed, 227 insertions, 209 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 40db243d2..4b4f7061b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/microprofile.h" | 4 | #include "common/microprofile.h" |
| 5 | #include "video_core/buffer_cache/buffer_cache_base.h" | ||
| 6 | #include "video_core/control/channel_state_cache.inc" | ||
| 5 | 7 | ||
| 6 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 7 | 9 | ||
| @@ -9,4 +11,6 @@ MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 12 | |||
| 9 | MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); | 11 | MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); |
| 10 | MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); | 12 | MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); |
| 11 | 13 | ||
| 14 | template class VideoCommon::ChannelSetupCaches<VideoCommon::BufferCacheChannelInfo>; | ||
| 15 | |||
| 12 | } // namespace VideoCommon | 16 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 65494097b..c336be707 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -64,17 +64,22 @@ void BufferCache<P>::RunGarbageCollector() { | |||
| 64 | template <class P> | 64 | template <class P> |
| 65 | void BufferCache<P>::TickFrame() { | 65 | void BufferCache<P>::TickFrame() { |
| 66 | // Calculate hits and shots and move hit bits to the right | 66 | // Calculate hits and shots and move hit bits to the right |
| 67 | const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); | 67 | |
| 68 | const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); | 68 | const u32 hits = std::reduce(channel_state->uniform_cache_hits.begin(), |
| 69 | std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, | 69 | channel_state->uniform_cache_hits.end()); |
| 70 | uniform_cache_hits.begin() + 1); | 70 | const u32 shots = std::reduce(channel_state->uniform_cache_shots.begin(), |
| 71 | std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, | 71 | channel_state->uniform_cache_shots.end()); |
| 72 | uniform_cache_shots.begin() + 1); | 72 | std::copy_n(channel_state->uniform_cache_hits.begin(), |
| 73 | uniform_cache_hits[0] = 0; | 73 | channel_state->uniform_cache_hits.size() - 1, |
| 74 | uniform_cache_shots[0] = 0; | 74 | channel_state->uniform_cache_hits.begin() + 1); |
| 75 | std::copy_n(channel_state->uniform_cache_shots.begin(), | ||
| 76 | channel_state->uniform_cache_shots.size() - 1, | ||
| 77 | channel_state->uniform_cache_shots.begin() + 1); | ||
| 78 | channel_state->uniform_cache_hits[0] = 0; | ||
| 79 | channel_state->uniform_cache_shots[0] = 0; | ||
| 75 | 80 | ||
| 76 | const bool skip_preferred = hits * 256 < shots * 251; | 81 | const bool skip_preferred = hits * 256 < shots * 251; |
| 77 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 82 | channel_state->uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 78 | 83 | ||
| 79 | // If we can obtain the memory info, use it instead of the estimate. | 84 | // If we can obtain the memory info, use it instead of the estimate. |
| 80 | if (runtime.CanReportMemoryUsage()) { | 85 | if (runtime.CanReportMemoryUsage()) { |
| @@ -164,10 +169,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 164 | BufferId buffer_a; | 169 | BufferId buffer_a; |
| 165 | BufferId buffer_b; | 170 | BufferId buffer_b; |
| 166 | do { | 171 | do { |
| 167 | has_deleted_buffers = false; | 172 | channel_state->has_deleted_buffers = false; |
| 168 | buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); | 173 | buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); |
| 169 | buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); | 174 | buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); |
| 170 | } while (has_deleted_buffers); | 175 | } while (channel_state->has_deleted_buffers); |
| 171 | auto& src_buffer = slot_buffers[buffer_a]; | 176 | auto& src_buffer = slot_buffers[buffer_a]; |
| 172 | auto& dest_buffer = slot_buffers[buffer_b]; | 177 | auto& dest_buffer = slot_buffers[buffer_b]; |
| 173 | SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); | 178 | SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount)); |
| @@ -272,30 +277,30 @@ void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr | |||
| 272 | .size = size, | 277 | .size = size, |
| 273 | .buffer_id = BufferId{}, | 278 | .buffer_id = BufferId{}, |
| 274 | }; | 279 | }; |
| 275 | uniform_buffers[stage][index] = binding; | 280 | channel_state->uniform_buffers[stage][index] = binding; |
| 276 | } | 281 | } |
| 277 | 282 | ||
| 278 | template <class P> | 283 | template <class P> |
| 279 | void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) { | 284 | void BufferCache<P>::DisableGraphicsUniformBuffer(size_t stage, u32 index) { |
| 280 | uniform_buffers[stage][index] = NULL_BINDING; | 285 | channel_state->uniform_buffers[stage][index] = NULL_BINDING; |
| 281 | } | 286 | } |
| 282 | 287 | ||
| 283 | template <class P> | 288 | template <class P> |
| 284 | void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { | 289 | void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { |
| 285 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); | 290 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); |
| 286 | do { | 291 | do { |
| 287 | has_deleted_buffers = false; | 292 | channel_state->has_deleted_buffers = false; |
| 288 | DoUpdateGraphicsBuffers(is_indexed); | 293 | DoUpdateGraphicsBuffers(is_indexed); |
| 289 | } while (has_deleted_buffers); | 294 | } while (channel_state->has_deleted_buffers); |
| 290 | } | 295 | } |
| 291 | 296 | ||
| 292 | template <class P> | 297 | template <class P> |
| 293 | void BufferCache<P>::UpdateComputeBuffers() { | 298 | void BufferCache<P>::UpdateComputeBuffers() { |
| 294 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); | 299 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); |
| 295 | do { | 300 | do { |
| 296 | has_deleted_buffers = false; | 301 | channel_state->has_deleted_buffers = false; |
| 297 | DoUpdateComputeBuffers(); | 302 | DoUpdateComputeBuffers(); |
| 298 | } while (has_deleted_buffers); | 303 | } while (channel_state->has_deleted_buffers); |
| 299 | } | 304 | } |
| 300 | 305 | ||
| 301 | template <class P> | 306 | template <class P> |
| @@ -338,98 +343,102 @@ template <class P> | |||
| 338 | void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | 343 | void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, |
| 339 | const UniformBufferSizes* sizes) { | 344 | const UniformBufferSizes* sizes) { |
| 340 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 345 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 341 | if (enabled_uniform_buffer_masks != mask) { | 346 | if (channel_state->enabled_uniform_buffer_masks != mask) { |
| 342 | if constexpr (IS_OPENGL) { | 347 | if constexpr (IS_OPENGL) { |
| 343 | fast_bound_uniform_buffers.fill(0); | 348 | channel_state->fast_bound_uniform_buffers.fill(0); |
| 344 | } | 349 | } |
| 345 | dirty_uniform_buffers.fill(~u32{0}); | 350 | channel_state->dirty_uniform_buffers.fill(~u32{0}); |
| 346 | uniform_buffer_binding_sizes.fill({}); | 351 | channel_state->uniform_buffer_binding_sizes.fill({}); |
| 347 | } | 352 | } |
| 348 | } | 353 | } |
| 349 | enabled_uniform_buffer_masks = mask; | 354 | channel_state->enabled_uniform_buffer_masks = mask; |
| 350 | uniform_buffer_sizes = sizes; | 355 | channel_state->uniform_buffer_sizes = sizes; |
| 351 | } | 356 | } |
| 352 | 357 | ||
| 353 | template <class P> | 358 | template <class P> |
| 354 | void BufferCache<P>::SetComputeUniformBufferState(u32 mask, | 359 | void BufferCache<P>::SetComputeUniformBufferState(u32 mask, |
| 355 | const ComputeUniformBufferSizes* sizes) { | 360 | const ComputeUniformBufferSizes* sizes) { |
| 356 | enabled_compute_uniform_buffer_mask = mask; | 361 | channel_state->enabled_compute_uniform_buffer_mask = mask; |
| 357 | compute_uniform_buffer_sizes = sizes; | 362 | channel_state->compute_uniform_buffer_sizes = sizes; |
| 358 | } | 363 | } |
| 359 | 364 | ||
| 360 | template <class P> | 365 | template <class P> |
| 361 | void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { | 366 | void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { |
| 362 | enabled_storage_buffers[stage] = 0; | 367 | channel_state->enabled_storage_buffers[stage] = 0; |
| 363 | written_storage_buffers[stage] = 0; | 368 | channel_state->written_storage_buffers[stage] = 0; |
| 364 | } | 369 | } |
| 365 | 370 | ||
| 366 | template <class P> | 371 | template <class P> |
| 367 | void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, | 372 | void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, |
| 368 | u32 cbuf_offset, bool is_written) { | 373 | u32 cbuf_offset, bool is_written) { |
| 369 | enabled_storage_buffers[stage] |= 1U << ssbo_index; | 374 | channel_state->enabled_storage_buffers[stage] |= 1U << ssbo_index; |
| 370 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; | 375 | channel_state->written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; |
| 371 | 376 | ||
| 372 | const auto& cbufs = maxwell3d->state.shader_stages[stage]; | 377 | const auto& cbufs = maxwell3d->state.shader_stages[stage]; |
| 373 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; | 378 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; |
| 374 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); | 379 | channel_state->storage_buffers[stage][ssbo_index] = |
| 380 | StorageBufferBinding(ssbo_addr, cbuf_index, is_written); | ||
| 375 | } | 381 | } |
| 376 | 382 | ||
| 377 | template <class P> | 383 | template <class P> |
| 378 | void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { | 384 | void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { |
| 379 | enabled_texture_buffers[stage] = 0; | 385 | channel_state->enabled_texture_buffers[stage] = 0; |
| 380 | written_texture_buffers[stage] = 0; | 386 | channel_state->written_texture_buffers[stage] = 0; |
| 381 | image_texture_buffers[stage] = 0; | 387 | channel_state->image_texture_buffers[stage] = 0; |
| 382 | } | 388 | } |
| 383 | 389 | ||
| 384 | template <class P> | 390 | template <class P> |
| 385 | void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, | 391 | void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, |
| 386 | u32 size, PixelFormat format, bool is_written, | 392 | u32 size, PixelFormat format, bool is_written, |
| 387 | bool is_image) { | 393 | bool is_image) { |
| 388 | enabled_texture_buffers[stage] |= 1U << tbo_index; | 394 | channel_state->enabled_texture_buffers[stage] |= 1U << tbo_index; |
| 389 | written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; | 395 | channel_state->written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; |
| 390 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 396 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 391 | image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; | 397 | channel_state->image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; |
| 392 | } | 398 | } |
| 393 | texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | 399 | channel_state->texture_buffers[stage][tbo_index] = |
| 400 | GetTextureBufferBinding(gpu_addr, size, format); | ||
| 394 | } | 401 | } |
| 395 | 402 | ||
| 396 | template <class P> | 403 | template <class P> |
| 397 | void BufferCache<P>::UnbindComputeStorageBuffers() { | 404 | void BufferCache<P>::UnbindComputeStorageBuffers() { |
| 398 | enabled_compute_storage_buffers = 0; | 405 | channel_state->enabled_compute_storage_buffers = 0; |
| 399 | written_compute_storage_buffers = 0; | 406 | channel_state->written_compute_storage_buffers = 0; |
| 400 | image_compute_texture_buffers = 0; | 407 | channel_state->image_compute_texture_buffers = 0; |
| 401 | } | 408 | } |
| 402 | 409 | ||
| 403 | template <class P> | 410 | template <class P> |
| 404 | void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | 411 | void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 405 | bool is_written) { | 412 | bool is_written) { |
| 406 | enabled_compute_storage_buffers |= 1U << ssbo_index; | 413 | channel_state->enabled_compute_storage_buffers |= 1U << ssbo_index; |
| 407 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; | 414 | channel_state->written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; |
| 408 | 415 | ||
| 409 | const auto& launch_desc = kepler_compute->launch_description; | 416 | const auto& launch_desc = kepler_compute->launch_description; |
| 410 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); | 417 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); |
| 411 | 418 | ||
| 412 | const auto& cbufs = launch_desc.const_buffer_config; | 419 | const auto& cbufs = launch_desc.const_buffer_config; |
| 413 | const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; | 420 | const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; |
| 414 | compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr, cbuf_index, is_written); | 421 | channel_state->compute_storage_buffers[ssbo_index] = |
| 422 | StorageBufferBinding(ssbo_addr, cbuf_index, is_written); | ||
| 415 | } | 423 | } |
| 416 | 424 | ||
| 417 | template <class P> | 425 | template <class P> |
| 418 | void BufferCache<P>::UnbindComputeTextureBuffers() { | 426 | void BufferCache<P>::UnbindComputeTextureBuffers() { |
| 419 | enabled_compute_texture_buffers = 0; | 427 | channel_state->enabled_compute_texture_buffers = 0; |
| 420 | written_compute_texture_buffers = 0; | 428 | channel_state->written_compute_texture_buffers = 0; |
| 421 | image_compute_texture_buffers = 0; | 429 | channel_state->image_compute_texture_buffers = 0; |
| 422 | } | 430 | } |
| 423 | 431 | ||
| 424 | template <class P> | 432 | template <class P> |
| 425 | void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, | 433 | void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, |
| 426 | PixelFormat format, bool is_written, bool is_image) { | 434 | PixelFormat format, bool is_written, bool is_image) { |
| 427 | enabled_compute_texture_buffers |= 1U << tbo_index; | 435 | channel_state->enabled_compute_texture_buffers |= 1U << tbo_index; |
| 428 | written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; | 436 | channel_state->written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; |
| 429 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 437 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 430 | image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; | 438 | channel_state->image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; |
| 431 | } | 439 | } |
| 432 | compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | 440 | channel_state->compute_texture_buffers[tbo_index] = |
| 441 | GetTextureBufferBinding(gpu_addr, size, format); | ||
| 433 | } | 442 | } |
| 434 | 443 | ||
| 435 | template <class P> | 444 | template <class P> |
| @@ -672,10 +681,10 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | |||
| 672 | 681 | ||
| 673 | template <class P> | 682 | template <class P> |
| 674 | void BufferCache<P>::BindHostIndexBuffer() { | 683 | void BufferCache<P>::BindHostIndexBuffer() { |
| 675 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | 684 | Buffer& buffer = slot_buffers[channel_state->index_buffer.buffer_id]; |
| 676 | TouchBuffer(buffer, index_buffer.buffer_id); | 685 | TouchBuffer(buffer, channel_state->index_buffer.buffer_id); |
| 677 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 686 | const u32 offset = buffer.Offset(channel_state->index_buffer.cpu_addr); |
| 678 | const u32 size = index_buffer.size; | 687 | const u32 size = channel_state->index_buffer.size; |
| 679 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 688 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 680 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { | 689 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 681 | if constexpr (USE_MEMORY_MAPS) { | 690 | if constexpr (USE_MEMORY_MAPS) { |
| @@ -689,7 +698,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 689 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); | 698 | buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); |
| 690 | } | 699 | } |
| 691 | } else { | 700 | } else { |
| 692 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | 701 | SynchronizeBuffer(buffer, channel_state->index_buffer.cpu_addr, size); |
| 693 | } | 702 | } |
| 694 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | 703 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { |
| 695 | const u32 new_offset = | 704 | const u32 new_offset = |
| @@ -706,7 +715,7 @@ template <class P> | |||
| 706 | void BufferCache<P>::BindHostVertexBuffers() { | 715 | void BufferCache<P>::BindHostVertexBuffers() { |
| 707 | auto& flags = maxwell3d->dirty.flags; | 716 | auto& flags = maxwell3d->dirty.flags; |
| 708 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 717 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 709 | const Binding& binding = vertex_buffers[index]; | 718 | const Binding& binding = channel_state->vertex_buffers[index]; |
| 710 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 719 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 711 | TouchBuffer(buffer, binding.buffer_id); | 720 | TouchBuffer(buffer, binding.buffer_id); |
| 712 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 721 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| @@ -729,19 +738,19 @@ void BufferCache<P>::BindHostDrawIndirectBuffers() { | |||
| 729 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | 738 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); |
| 730 | }; | 739 | }; |
| 731 | if (current_draw_indirect->include_count) { | 740 | if (current_draw_indirect->include_count) { |
| 732 | bind_buffer(count_buffer_binding); | 741 | bind_buffer(channel_state->count_buffer_binding); |
| 733 | } | 742 | } |
| 734 | bind_buffer(indirect_buffer_binding); | 743 | bind_buffer(channel_state->indirect_buffer_binding); |
| 735 | } | 744 | } |
| 736 | 745 | ||
| 737 | template <class P> | 746 | template <class P> |
| 738 | void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | 747 | void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { |
| 739 | u32 dirty = ~0U; | 748 | u32 dirty = ~0U; |
| 740 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 749 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 741 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); | 750 | dirty = std::exchange(channel_state->dirty_uniform_buffers[stage], 0); |
| 742 | } | 751 | } |
| 743 | u32 binding_index = 0; | 752 | u32 binding_index = 0; |
| 744 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { | 753 | ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 745 | const bool needs_bind = ((dirty >> index) & 1) != 0; | 754 | const bool needs_bind = ((dirty >> index) & 1) != 0; |
| 746 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); | 755 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); |
| 747 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 756 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| @@ -753,13 +762,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | |||
| 753 | template <class P> | 762 | template <class P> |
| 754 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, | 763 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, |
| 755 | bool needs_bind) { | 764 | bool needs_bind) { |
| 756 | const Binding& binding = uniform_buffers[stage][index]; | 765 | const Binding& binding = channel_state->uniform_buffers[stage][index]; |
| 757 | const VAddr cpu_addr = binding.cpu_addr; | 766 | const VAddr cpu_addr = binding.cpu_addr; |
| 758 | const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); | 767 | const u32 size = std::min(binding.size, (*channel_state->uniform_buffer_sizes)[stage][index]); |
| 759 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 768 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 760 | TouchBuffer(buffer, binding.buffer_id); | 769 | TouchBuffer(buffer, binding.buffer_id); |
| 761 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 770 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 762 | size <= uniform_buffer_skip_cache_size && | 771 | size <= channel_state->uniform_buffer_skip_cache_size && |
| 763 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); | 772 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); |
| 764 | if (use_fast_buffer) { | 773 | if (use_fast_buffer) { |
| 765 | if constexpr (IS_OPENGL) { | 774 | if constexpr (IS_OPENGL) { |
| @@ -767,11 +776,11 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 767 | // Fast path for Nvidia | 776 | // Fast path for Nvidia |
| 768 | const bool should_fast_bind = | 777 | const bool should_fast_bind = |
| 769 | !HasFastUniformBufferBound(stage, binding_index) || | 778 | !HasFastUniformBufferBound(stage, binding_index) || |
| 770 | uniform_buffer_binding_sizes[stage][binding_index] != size; | 779 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; |
| 771 | if (should_fast_bind) { | 780 | if (should_fast_bind) { |
| 772 | // We only have to bind when the currently bound buffer is not the fast version | 781 | // We only have to bind when the currently bound buffer is not the fast version |
| 773 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; | 782 | channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; |
| 774 | uniform_buffer_binding_sizes[stage][binding_index] = size; | 783 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 775 | runtime.BindFastUniformBuffer(stage, binding_index, size); | 784 | runtime.BindFastUniformBuffer(stage, binding_index, size); |
| 776 | } | 785 | } |
| 777 | const auto span = ImmediateBufferWithData(cpu_addr, size); | 786 | const auto span = ImmediateBufferWithData(cpu_addr, size); |
| @@ -780,8 +789,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 780 | } | 789 | } |
| 781 | } | 790 | } |
| 782 | if constexpr (IS_OPENGL) { | 791 | if constexpr (IS_OPENGL) { |
| 783 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; | 792 | channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; |
| 784 | uniform_buffer_binding_sizes[stage][binding_index] = size; | 793 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 785 | } | 794 | } |
| 786 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan | 795 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan |
| 787 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); | 796 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); |
| @@ -791,15 +800,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 791 | // Classic cached path | 800 | // Classic cached path |
| 792 | const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); | 801 | const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size); |
| 793 | if (sync_cached) { | 802 | if (sync_cached) { |
| 794 | ++uniform_cache_hits[0]; | 803 | ++channel_state->uniform_cache_hits[0]; |
| 795 | } | 804 | } |
| 796 | ++uniform_cache_shots[0]; | 805 | ++channel_state->uniform_cache_shots[0]; |
| 797 | 806 | ||
| 798 | // Skip binding if it's not needed and if the bound buffer is not the fast version | 807 | // Skip binding if it's not needed and if the bound buffer is not the fast version |
| 799 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens | 808 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens |
| 800 | needs_bind |= HasFastUniformBufferBound(stage, binding_index); | 809 | needs_bind |= HasFastUniformBufferBound(stage, binding_index); |
| 801 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 810 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 802 | needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; | 811 | needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; |
| 803 | } | 812 | } |
| 804 | if (!needs_bind) { | 813 | if (!needs_bind) { |
| 805 | return; | 814 | return; |
| @@ -807,14 +816,14 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 807 | const u32 offset = buffer.Offset(cpu_addr); | 816 | const u32 offset = buffer.Offset(cpu_addr); |
| 808 | if constexpr (IS_OPENGL) { | 817 | if constexpr (IS_OPENGL) { |
| 809 | // Fast buffer will be unbound | 818 | // Fast buffer will be unbound |
| 810 | fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | 819 | channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); |
| 811 | 820 | ||
| 812 | // Mark the index as dirty if offset doesn't match | 821 | // Mark the index as dirty if offset doesn't match |
| 813 | const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); | 822 | const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); |
| 814 | dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; | 823 | channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; |
| 815 | } | 824 | } |
| 816 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 825 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 817 | uniform_buffer_binding_sizes[stage][binding_index] = size; | 826 | channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; |
| 818 | } | 827 | } |
| 819 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | 828 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 820 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); | 829 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| @@ -826,15 +835,15 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 826 | template <class P> | 835 | template <class P> |
| 827 | void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | 836 | void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { |
| 828 | u32 binding_index = 0; | 837 | u32 binding_index = 0; |
| 829 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 838 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { |
| 830 | const Binding& binding = storage_buffers[stage][index]; | 839 | const Binding& binding = channel_state->storage_buffers[stage][index]; |
| 831 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 840 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 832 | TouchBuffer(buffer, binding.buffer_id); | 841 | TouchBuffer(buffer, binding.buffer_id); |
| 833 | const u32 size = binding.size; | 842 | const u32 size = binding.size; |
| 834 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 843 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 835 | 844 | ||
| 836 | const u32 offset = buffer.Offset(binding.cpu_addr); | 845 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 837 | const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; | 846 | const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0; |
| 838 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 847 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| 839 | runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); | 848 | runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); |
| 840 | ++binding_index; | 849 | ++binding_index; |
| @@ -846,8 +855,8 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 846 | 855 | ||
| 847 | template <class P> | 856 | template <class P> |
| 848 | void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | 857 | void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { |
| 849 | ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { | 858 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { |
| 850 | const TextureBufferBinding& binding = texture_buffers[stage][index]; | 859 | const TextureBufferBinding& binding = channel_state->texture_buffers[stage][index]; |
| 851 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 860 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 852 | const u32 size = binding.size; | 861 | const u32 size = binding.size; |
| 853 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 862 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| @@ -855,7 +864,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 855 | const u32 offset = buffer.Offset(binding.cpu_addr); | 864 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 856 | const PixelFormat format = binding.format; | 865 | const PixelFormat format = binding.format; |
| 857 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 866 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 858 | if (((image_texture_buffers[stage] >> index) & 1) != 0) { | 867 | if (((channel_state->image_texture_buffers[stage] >> index) & 1) != 0) { |
| 859 | runtime.BindImageBuffer(buffer, offset, size, format); | 868 | runtime.BindImageBuffer(buffer, offset, size, format); |
| 860 | } else { | 869 | } else { |
| 861 | runtime.BindTextureBuffer(buffer, offset, size, format); | 870 | runtime.BindTextureBuffer(buffer, offset, size, format); |
| @@ -872,7 +881,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { | |||
| 872 | return; | 881 | return; |
| 873 | } | 882 | } |
| 874 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | 883 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 875 | const Binding& binding = transform_feedback_buffers[index]; | 884 | const Binding& binding = channel_state->transform_feedback_buffers[index]; |
| 876 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 885 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 877 | TouchBuffer(buffer, binding.buffer_id); | 886 | TouchBuffer(buffer, binding.buffer_id); |
| 878 | const u32 size = binding.size; | 887 | const u32 size = binding.size; |
| @@ -887,15 +896,16 @@ template <class P> | |||
| 887 | void BufferCache<P>::BindHostComputeUniformBuffers() { | 896 | void BufferCache<P>::BindHostComputeUniformBuffers() { |
| 888 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 897 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 889 | // Mark all uniform buffers as dirty | 898 | // Mark all uniform buffers as dirty |
| 890 | dirty_uniform_buffers.fill(~u32{0}); | 899 | channel_state->dirty_uniform_buffers.fill(~u32{0}); |
| 891 | fast_bound_uniform_buffers.fill(0); | 900 | channel_state->fast_bound_uniform_buffers.fill(0); |
| 892 | } | 901 | } |
| 893 | u32 binding_index = 0; | 902 | u32 binding_index = 0; |
| 894 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { | 903 | ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 895 | const Binding& binding = compute_uniform_buffers[index]; | 904 | const Binding& binding = channel_state->compute_uniform_buffers[index]; |
| 896 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 905 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 897 | TouchBuffer(buffer, binding.buffer_id); | 906 | TouchBuffer(buffer, binding.buffer_id); |
| 898 | const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); | 907 | const u32 size = |
| 908 | std::min(binding.size, (*channel_state->compute_uniform_buffer_sizes)[index]); | ||
| 899 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 909 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 900 | 910 | ||
| 901 | const u32 offset = buffer.Offset(binding.cpu_addr); | 911 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| @@ -911,15 +921,16 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { | |||
| 911 | template <class P> | 921 | template <class P> |
| 912 | void BufferCache<P>::BindHostComputeStorageBuffers() { | 922 | void BufferCache<P>::BindHostComputeStorageBuffers() { |
| 913 | u32 binding_index = 0; | 923 | u32 binding_index = 0; |
| 914 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 924 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { |
| 915 | const Binding& binding = compute_storage_buffers[index]; | 925 | const Binding& binding = channel_state->compute_storage_buffers[index]; |
| 916 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 926 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 917 | TouchBuffer(buffer, binding.buffer_id); | 927 | TouchBuffer(buffer, binding.buffer_id); |
| 918 | const u32 size = binding.size; | 928 | const u32 size = binding.size; |
| 919 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 929 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 920 | 930 | ||
| 921 | const u32 offset = buffer.Offset(binding.cpu_addr); | 931 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 922 | const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; | 932 | const bool is_written = |
| 933 | ((channel_state->written_compute_storage_buffers >> index) & 1) != 0; | ||
| 923 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | 934 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { |
| 924 | runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); | 935 | runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); |
| 925 | ++binding_index; | 936 | ++binding_index; |
| @@ -931,8 +942,8 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 931 | 942 | ||
| 932 | template <class P> | 943 | template <class P> |
| 933 | void BufferCache<P>::BindHostComputeTextureBuffers() { | 944 | void BufferCache<P>::BindHostComputeTextureBuffers() { |
| 934 | ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { | 945 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { |
| 935 | const TextureBufferBinding& binding = compute_texture_buffers[index]; | 946 | const TextureBufferBinding& binding = channel_state->compute_texture_buffers[index]; |
| 936 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 947 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 937 | const u32 size = binding.size; | 948 | const u32 size = binding.size; |
| 938 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | 949 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| @@ -940,7 +951,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 940 | const u32 offset = buffer.Offset(binding.cpu_addr); | 951 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 941 | const PixelFormat format = binding.format; | 952 | const PixelFormat format = binding.format; |
| 942 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | 953 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 943 | if (((image_compute_texture_buffers >> index) & 1) != 0) { | 954 | if (((channel_state->image_compute_texture_buffers >> index) & 1) != 0) { |
| 944 | runtime.BindImageBuffer(buffer, offset, size, format); | 955 | runtime.BindImageBuffer(buffer, offset, size, format); |
| 945 | } else { | 956 | } else { |
| 946 | runtime.BindTextureBuffer(buffer, offset, size, format); | 957 | runtime.BindTextureBuffer(buffer, offset, size, format); |
| @@ -954,7 +965,7 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 954 | template <class P> | 965 | template <class P> |
| 955 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | 966 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { |
| 956 | do { | 967 | do { |
| 957 | has_deleted_buffers = false; | 968 | channel_state->has_deleted_buffers = false; |
| 958 | if (is_indexed) { | 969 | if (is_indexed) { |
| 959 | UpdateIndexBuffer(); | 970 | UpdateIndexBuffer(); |
| 960 | } | 971 | } |
| @@ -968,7 +979,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | |||
| 968 | if (current_draw_indirect) { | 979 | if (current_draw_indirect) { |
| 969 | UpdateDrawIndirect(); | 980 | UpdateDrawIndirect(); |
| 970 | } | 981 | } |
| 971 | } while (has_deleted_buffers); | 982 | } while (channel_state->has_deleted_buffers); |
| 972 | } | 983 | } |
| 973 | 984 | ||
| 974 | template <class P> | 985 | template <class P> |
| @@ -999,7 +1010,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 999 | slot_buffers.erase(inline_buffer_id); | 1010 | slot_buffers.erase(inline_buffer_id); |
| 1000 | inline_buffer_id = CreateBuffer(0, buffer_size); | 1011 | inline_buffer_id = CreateBuffer(0, buffer_size); |
| 1001 | } | 1012 | } |
| 1002 | index_buffer = Binding{ | 1013 | channel_state->index_buffer = Binding{ |
| 1003 | .cpu_addr = 0, | 1014 | .cpu_addr = 0, |
| 1004 | .size = inline_index_size, | 1015 | .size = inline_index_size, |
| 1005 | .buffer_id = inline_buffer_id, | 1016 | .buffer_id = inline_buffer_id, |
| @@ -1015,10 +1026,10 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1015 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | 1026 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); |
| 1016 | const u32 size = std::min(address_size, draw_size); | 1027 | const u32 size = std::min(address_size, draw_size); |
| 1017 | if (size == 0 || !cpu_addr) { | 1028 | if (size == 0 || !cpu_addr) { |
| 1018 | index_buffer = NULL_BINDING; | 1029 | channel_state->index_buffer = NULL_BINDING; |
| 1019 | return; | 1030 | return; |
| 1020 | } | 1031 | } |
| 1021 | index_buffer = Binding{ | 1032 | channel_state->index_buffer = Binding{ |
| 1022 | .cpu_addr = *cpu_addr, | 1033 | .cpu_addr = *cpu_addr, |
| 1023 | .size = size, | 1034 | .size = size, |
| 1024 | .buffer_id = FindBuffer(*cpu_addr, size), | 1035 | .buffer_id = FindBuffer(*cpu_addr, size), |
| @@ -1051,13 +1062,13 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1051 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1062 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1052 | u32 size = address_size; // TODO: Analyze stride and number of vertices | 1063 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1053 | if (array.enable == 0 || size == 0 || !cpu_addr) { | 1064 | if (array.enable == 0 || size == 0 || !cpu_addr) { |
| 1054 | vertex_buffers[index] = NULL_BINDING; | 1065 | channel_state->vertex_buffers[index] = NULL_BINDING; |
| 1055 | return; | 1066 | return; |
| 1056 | } | 1067 | } |
| 1057 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1068 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1058 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); | 1069 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1059 | } | 1070 | } |
| 1060 | vertex_buffers[index] = Binding{ | 1071 | channel_state->vertex_buffers[index] = Binding{ |
| 1061 | .cpu_addr = *cpu_addr, | 1072 | .cpu_addr = *cpu_addr, |
| 1062 | .size = size, | 1073 | .size = size, |
| 1063 | .buffer_id = FindBuffer(*cpu_addr, size), | 1074 | .buffer_id = FindBuffer(*cpu_addr, size), |
| @@ -1079,23 +1090,24 @@ void BufferCache<P>::UpdateDrawIndirect() { | |||
| 1079 | }; | 1090 | }; |
| 1080 | }; | 1091 | }; |
| 1081 | if (current_draw_indirect->include_count) { | 1092 | if (current_draw_indirect->include_count) { |
| 1082 | update(current_draw_indirect->count_start_address, sizeof(u32), count_buffer_binding); | 1093 | update(current_draw_indirect->count_start_address, sizeof(u32), |
| 1094 | channel_state->count_buffer_binding); | ||
| 1083 | } | 1095 | } |
| 1084 | update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, | 1096 | update(current_draw_indirect->indirect_start_address, current_draw_indirect->buffer_size, |
| 1085 | indirect_buffer_binding); | 1097 | channel_state->indirect_buffer_binding); |
| 1086 | } | 1098 | } |
| 1087 | 1099 | ||
| 1088 | template <class P> | 1100 | template <class P> |
| 1089 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | 1101 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { |
| 1090 | ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { | 1102 | ForEachEnabledBit(channel_state->enabled_uniform_buffer_masks[stage], [&](u32 index) { |
| 1091 | Binding& binding = uniform_buffers[stage][index]; | 1103 | Binding& binding = channel_state->uniform_buffers[stage][index]; |
| 1092 | if (binding.buffer_id) { | 1104 | if (binding.buffer_id) { |
| 1093 | // Already updated | 1105 | // Already updated |
| 1094 | return; | 1106 | return; |
| 1095 | } | 1107 | } |
| 1096 | // Mark as dirty | 1108 | // Mark as dirty |
| 1097 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 1109 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 1098 | dirty_uniform_buffers[stage] |= 1U << index; | 1110 | channel_state->dirty_uniform_buffers[stage] |= 1U << index; |
| 1099 | } | 1111 | } |
| 1100 | // Resolve buffer | 1112 | // Resolve buffer |
| 1101 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1113 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| @@ -1104,10 +1116,10 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | |||
| 1104 | 1116 | ||
| 1105 | template <class P> | 1117 | template <class P> |
| 1106 | void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | 1118 | void BufferCache<P>::UpdateStorageBuffers(size_t stage) { |
| 1107 | const u32 written_mask = written_storage_buffers[stage]; | 1119 | const u32 written_mask = channel_state->written_storage_buffers[stage]; |
| 1108 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | 1120 | ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) { |
| 1109 | // Resolve buffer | 1121 | // Resolve buffer |
| 1110 | Binding& binding = storage_buffers[stage][index]; | 1122 | Binding& binding = channel_state->storage_buffers[stage][index]; |
| 1111 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1123 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 1112 | binding.buffer_id = buffer_id; | 1124 | binding.buffer_id = buffer_id; |
| 1113 | // Mark buffer as written if needed | 1125 | // Mark buffer as written if needed |
| @@ -1119,11 +1131,11 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | |||
| 1119 | 1131 | ||
| 1120 | template <class P> | 1132 | template <class P> |
| 1121 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { | 1133 | void BufferCache<P>::UpdateTextureBuffers(size_t stage) { |
| 1122 | ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { | 1134 | ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) { |
| 1123 | Binding& binding = texture_buffers[stage][index]; | 1135 | Binding& binding = channel_state->texture_buffers[stage][index]; |
| 1124 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1136 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 1125 | // Mark buffer as written if needed | 1137 | // Mark buffer as written if needed |
| 1126 | if (((written_texture_buffers[stage] >> index) & 1) != 0) { | 1138 | if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) { |
| 1127 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); | 1139 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); |
| 1128 | } | 1140 | } |
| 1129 | }); | 1141 | }); |
| @@ -1146,11 +1158,11 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1146 | const u32 size = binding.size; | 1158 | const u32 size = binding.size; |
| 1147 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1159 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1148 | if (binding.enable == 0 || size == 0 || !cpu_addr) { | 1160 | if (binding.enable == 0 || size == 0 || !cpu_addr) { |
| 1149 | transform_feedback_buffers[index] = NULL_BINDING; | 1161 | channel_state->transform_feedback_buffers[index] = NULL_BINDING; |
| 1150 | return; | 1162 | return; |
| 1151 | } | 1163 | } |
| 1152 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | 1164 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); |
| 1153 | transform_feedback_buffers[index] = Binding{ | 1165 | channel_state->transform_feedback_buffers[index] = Binding{ |
| 1154 | .cpu_addr = *cpu_addr, | 1166 | .cpu_addr = *cpu_addr, |
| 1155 | .size = size, | 1167 | .size = size, |
| 1156 | .buffer_id = buffer_id, | 1168 | .buffer_id = buffer_id, |
| @@ -1160,8 +1172,8 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { | |||
| 1160 | 1172 | ||
| 1161 | template <class P> | 1173 | template <class P> |
| 1162 | void BufferCache<P>::UpdateComputeUniformBuffers() { | 1174 | void BufferCache<P>::UpdateComputeUniformBuffers() { |
| 1163 | ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { | 1175 | ForEachEnabledBit(channel_state->enabled_compute_uniform_buffer_mask, [&](u32 index) { |
| 1164 | Binding& binding = compute_uniform_buffers[index]; | 1176 | Binding& binding = channel_state->compute_uniform_buffers[index]; |
| 1165 | binding = NULL_BINDING; | 1177 | binding = NULL_BINDING; |
| 1166 | const auto& launch_desc = kepler_compute->launch_description; | 1178 | const auto& launch_desc = kepler_compute->launch_description; |
| 1167 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | 1179 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { |
| @@ -1178,12 +1190,12 @@ void BufferCache<P>::UpdateComputeUniformBuffers() { | |||
| 1178 | 1190 | ||
| 1179 | template <class P> | 1191 | template <class P> |
| 1180 | void BufferCache<P>::UpdateComputeStorageBuffers() { | 1192 | void BufferCache<P>::UpdateComputeStorageBuffers() { |
| 1181 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 1193 | ForEachEnabledBit(channel_state->enabled_compute_storage_buffers, [&](u32 index) { |
| 1182 | // Resolve buffer | 1194 | // Resolve buffer |
| 1183 | Binding& binding = compute_storage_buffers[index]; | 1195 | Binding& binding = channel_state->compute_storage_buffers[index]; |
| 1184 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1196 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 1185 | // Mark as written if needed | 1197 | // Mark as written if needed |
| 1186 | if (((written_compute_storage_buffers >> index) & 1) != 0) { | 1198 | if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) { |
| 1187 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); | 1199 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); |
| 1188 | } | 1200 | } |
| 1189 | }); | 1201 | }); |
| @@ -1191,11 +1203,11 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
| 1191 | 1203 | ||
| 1192 | template <class P> | 1204 | template <class P> |
| 1193 | void BufferCache<P>::UpdateComputeTextureBuffers() { | 1205 | void BufferCache<P>::UpdateComputeTextureBuffers() { |
| 1194 | ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { | 1206 | ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) { |
| 1195 | Binding& binding = compute_texture_buffers[index]; | 1207 | Binding& binding = channel_state->compute_texture_buffers[index]; |
| 1196 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1208 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 1197 | // Mark as written if needed | 1209 | // Mark as written if needed |
| 1198 | if (((written_compute_texture_buffers >> index) & 1) != 0) { | 1210 | if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) { |
| 1199 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); | 1211 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); |
| 1200 | } | 1212 | } |
| 1201 | }); | 1213 | }); |
| @@ -1610,13 +1622,13 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { | |||
| 1610 | const auto replace = [scalar_replace](std::span<Binding> bindings) { | 1622 | const auto replace = [scalar_replace](std::span<Binding> bindings) { |
| 1611 | std::ranges::for_each(bindings, scalar_replace); | 1623 | std::ranges::for_each(bindings, scalar_replace); |
| 1612 | }; | 1624 | }; |
| 1613 | scalar_replace(index_buffer); | 1625 | scalar_replace(channel_state->index_buffer); |
| 1614 | replace(vertex_buffers); | 1626 | replace(channel_state->vertex_buffers); |
| 1615 | std::ranges::for_each(uniform_buffers, replace); | 1627 | std::ranges::for_each(channel_state->uniform_buffers, replace); |
| 1616 | std::ranges::for_each(storage_buffers, replace); | 1628 | std::ranges::for_each(channel_state->storage_buffers, replace); |
| 1617 | replace(transform_feedback_buffers); | 1629 | replace(channel_state->transform_feedback_buffers); |
| 1618 | replace(compute_uniform_buffers); | 1630 | replace(channel_state->compute_uniform_buffers); |
| 1619 | replace(compute_storage_buffers); | 1631 | replace(channel_state->compute_storage_buffers); |
| 1620 | 1632 | ||
| 1621 | // Mark the whole buffer as CPU written to stop tracking CPU writes | 1633 | // Mark the whole buffer as CPU written to stop tracking CPU writes |
| 1622 | if (!do_not_mark) { | 1634 | if (!do_not_mark) { |
| @@ -1634,8 +1646,8 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { | |||
| 1634 | template <class P> | 1646 | template <class P> |
| 1635 | void BufferCache<P>::NotifyBufferDeletion() { | 1647 | void BufferCache<P>::NotifyBufferDeletion() { |
| 1636 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | 1648 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 1637 | dirty_uniform_buffers.fill(~u32{0}); | 1649 | channel_state->dirty_uniform_buffers.fill(~u32{0}); |
| 1638 | uniform_buffer_binding_sizes.fill({}); | 1650 | channel_state->uniform_buffer_binding_sizes.fill({}); |
| 1639 | } | 1651 | } |
| 1640 | auto& flags = maxwell3d->dirty.flags; | 1652 | auto& flags = maxwell3d->dirty.flags; |
| 1641 | flags[Dirty::IndexBuffer] = true; | 1653 | flags[Dirty::IndexBuffer] = true; |
| @@ -1643,13 +1655,12 @@ void BufferCache<P>::NotifyBufferDeletion() { | |||
| 1643 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | 1655 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 1644 | flags[Dirty::VertexBuffer0 + index] = true; | 1656 | flags[Dirty::VertexBuffer0 + index] = true; |
| 1645 | } | 1657 | } |
| 1646 | has_deleted_buffers = true; | 1658 | channel_state->has_deleted_buffers = true; |
| 1647 | } | 1659 | } |
| 1648 | 1660 | ||
| 1649 | template <class P> | 1661 | template <class P> |
| 1650 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, | 1662 | Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, |
| 1651 | u32 cbuf_index, | 1663 | bool is_written) const { |
| 1652 | bool is_written) const { | ||
| 1653 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); | 1664 | const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr); |
| 1654 | const auto size = [&]() { | 1665 | const auto size = [&]() { |
| 1655 | const bool is_nvn_cbuf = cbuf_index == 0; | 1666 | const bool is_nvn_cbuf = cbuf_index == 0; |
| @@ -1681,8 +1692,8 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 1681 | } | 1692 | } |
| 1682 | 1693 | ||
| 1683 | template <class P> | 1694 | template <class P> |
| 1684 | typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( | 1695 | TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, |
| 1685 | GPUVAddr gpu_addr, u32 size, PixelFormat format) { | 1696 | PixelFormat format) { |
| 1686 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | 1697 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
| 1687 | TextureBufferBinding binding; | 1698 | TextureBufferBinding binding; |
| 1688 | if (!cpu_addr || size == 0) { | 1699 | if (!cpu_addr || size == 0) { |
| @@ -1721,7 +1732,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { | |||
| 1721 | template <class P> | 1732 | template <class P> |
| 1722 | bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { | 1733 | bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { |
| 1723 | if constexpr (IS_OPENGL) { | 1734 | if constexpr (IS_OPENGL) { |
| 1724 | return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; | 1735 | return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; |
| 1725 | } else { | 1736 | } else { |
| 1726 | // Only OpenGL has fast uniform buffers | 1737 | // Only OpenGL has fast uniform buffers |
| 1727 | return false; | 1738 | return false; |
| @@ -1730,14 +1741,14 @@ bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) | |||
| 1730 | 1741 | ||
| 1731 | template <class P> | 1742 | template <class P> |
| 1732 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { | 1743 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectCount() { |
| 1733 | auto& buffer = slot_buffers[count_buffer_binding.buffer_id]; | 1744 | auto& buffer = slot_buffers[channel_state->count_buffer_binding.buffer_id]; |
| 1734 | return std::make_pair(&buffer, buffer.Offset(count_buffer_binding.cpu_addr)); | 1745 | return std::make_pair(&buffer, buffer.Offset(channel_state->count_buffer_binding.cpu_addr)); |
| 1735 | } | 1746 | } |
| 1736 | 1747 | ||
| 1737 | template <class P> | 1748 | template <class P> |
| 1738 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { | 1749 | std::pair<typename BufferCache<P>::Buffer*, u32> BufferCache<P>::GetDrawIndirectBuffer() { |
| 1739 | auto& buffer = slot_buffers[indirect_buffer_binding.buffer_id]; | 1750 | auto& buffer = slot_buffers[channel_state->indirect_buffer_binding.buffer_id]; |
| 1740 | return std::make_pair(&buffer, buffer.Offset(indirect_buffer_binding.cpu_addr)); | 1751 | return std::make_pair(&buffer, buffer.Offset(channel_state->indirect_buffer_binding.cpu_addr)); |
| 1741 | } | 1752 | } |
| 1742 | 1753 | ||
| 1743 | } // namespace VideoCommon | 1754 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index ac00d4d9d..c689fe06b 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -86,8 +86,78 @@ enum class ObtainBufferOperation : u32 { | |||
| 86 | MarkQuery = 3, | 86 | MarkQuery = 3, |
| 87 | }; | 87 | }; |
| 88 | 88 | ||
| 89 | template <typename P> | 89 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 90 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 90 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); |
| 91 | |||
| 92 | struct Binding { | ||
| 93 | VAddr cpu_addr{}; | ||
| 94 | u32 size{}; | ||
| 95 | BufferId buffer_id; | ||
| 96 | }; | ||
| 97 | |||
| 98 | struct TextureBufferBinding : Binding { | ||
| 99 | PixelFormat format; | ||
| 100 | }; | ||
| 101 | |||
| 102 | static constexpr Binding NULL_BINDING{ | ||
| 103 | .cpu_addr = 0, | ||
| 104 | .size = 0, | ||
| 105 | .buffer_id = NULL_BUFFER_ID, | ||
| 106 | }; | ||
| 107 | |||
| 108 | class BufferCacheChannelInfo : public ChannelInfo { | ||
| 109 | public: | ||
| 110 | BufferCacheChannelInfo() = delete; | ||
| 111 | BufferCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept : ChannelInfo(state) {} | ||
| 112 | BufferCacheChannelInfo(const BufferCacheChannelInfo& state) = delete; | ||
| 113 | BufferCacheChannelInfo& operator=(const BufferCacheChannelInfo&) = delete; | ||
| 114 | |||
| 115 | Binding index_buffer; | ||
| 116 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 117 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 118 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 119 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 120 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 121 | Binding count_buffer_binding; | ||
| 122 | Binding indirect_buffer_binding; | ||
| 123 | |||
| 124 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 125 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 126 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 127 | |||
| 128 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 129 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 130 | |||
| 131 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 132 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 133 | |||
| 134 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 135 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 136 | u32 enabled_compute_storage_buffers = 0; | ||
| 137 | u32 written_compute_storage_buffers = 0; | ||
| 138 | |||
| 139 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 140 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 141 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 142 | u32 enabled_compute_texture_buffers = 0; | ||
| 143 | u32 written_compute_texture_buffers = 0; | ||
| 144 | u32 image_compute_texture_buffers = 0; | ||
| 145 | |||
| 146 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 147 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 148 | |||
| 149 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 150 | |||
| 151 | bool has_deleted_buffers = false; | ||
| 152 | |||
| 153 | std::array<u32, NUM_STAGES> dirty_uniform_buffers{}; | ||
| 154 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | ||
| 155 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> | ||
| 156 | uniform_buffer_binding_sizes{}; | ||
| 157 | }; | ||
| 158 | |||
| 159 | template <class P> | ||
| 160 | class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInfo> { | ||
| 91 | // Page size for caching purposes. | 161 | // Page size for caching purposes. |
| 92 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | 162 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. |
| 93 | static constexpr u32 CACHING_PAGEBITS = 16; | 163 | static constexpr u32 CACHING_PAGEBITS = 16; |
| @@ -104,8 +174,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI | |||
| 104 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | 174 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; |
| 105 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | 175 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; |
| 106 | 176 | ||
| 107 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 108 | |||
| 109 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | 177 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; |
| 110 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | 178 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; |
| 111 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | 179 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; |
| @@ -149,8 +217,6 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI | |||
| 149 | using OverlapSection = boost::icl::inter_section<int>; | 217 | using OverlapSection = boost::icl::inter_section<int>; |
| 150 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | 218 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; |
| 151 | 219 | ||
| 152 | struct Empty {}; | ||
| 153 | |||
| 154 | struct OverlapResult { | 220 | struct OverlapResult { |
| 155 | std::vector<BufferId> ids; | 221 | std::vector<BufferId> ids; |
| 156 | VAddr begin; | 222 | VAddr begin; |
| @@ -158,25 +224,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI | |||
| 158 | bool has_stream_leap = false; | 224 | bool has_stream_leap = false; |
| 159 | }; | 225 | }; |
| 160 | 226 | ||
| 161 | struct Binding { | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | u32 size{}; | ||
| 164 | BufferId buffer_id; | ||
| 165 | }; | ||
| 166 | |||
| 167 | struct TextureBufferBinding : Binding { | ||
| 168 | PixelFormat format; | ||
| 169 | }; | ||
| 170 | |||
| 171 | static constexpr Binding NULL_BINDING{ | ||
| 172 | .cpu_addr = 0, | ||
| 173 | .size = 0, | ||
| 174 | .buffer_id = NULL_BUFFER_ID, | ||
| 175 | }; | ||
| 176 | |||
| 177 | public: | 227 | public: |
| 178 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 179 | |||
| 180 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 228 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 181 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | 229 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); |
| 182 | 230 | ||
| @@ -496,51 +544,6 @@ private: | |||
| 496 | 544 | ||
| 497 | u32 last_index_count = 0; | 545 | u32 last_index_count = 0; |
| 498 | 546 | ||
| 499 | Binding index_buffer; | ||
| 500 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 501 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 502 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 503 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 504 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 505 | Binding count_buffer_binding; | ||
| 506 | Binding indirect_buffer_binding; | ||
| 507 | |||
| 508 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 509 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 510 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 511 | |||
| 512 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 513 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 514 | |||
| 515 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 516 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 517 | |||
| 518 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 519 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 520 | u32 enabled_compute_storage_buffers = 0; | ||
| 521 | u32 written_compute_storage_buffers = 0; | ||
| 522 | |||
| 523 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 524 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 525 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 526 | u32 enabled_compute_texture_buffers = 0; | ||
| 527 | u32 written_compute_texture_buffers = 0; | ||
| 528 | u32 image_compute_texture_buffers = 0; | ||
| 529 | |||
| 530 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 531 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 532 | |||
| 533 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 534 | |||
| 535 | bool has_deleted_buffers = false; | ||
| 536 | |||
| 537 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | ||
| 538 | dirty_uniform_buffers{}; | ||
| 539 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 540 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 541 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 542 | uniform_buffer_binding_sizes{}; | ||
| 543 | |||
| 544 | MemoryTracker memory_tracker; | 547 | MemoryTracker memory_tracker; |
| 545 | IntervalSet uncommitted_ranges; | 548 | IntervalSet uncommitted_ranges; |
| 546 | IntervalSet common_ranges; | 549 | IntervalSet common_ranges; |