diff options
| author | 2023-08-20 17:53:08 +0200 | |
|---|---|---|
| committer | 2023-09-23 23:05:30 +0200 | |
| commit | c8237d5c312485394389b2520451ef720604ea9a (patch) | |
| tree | 1a1064ed38a7a53bd61e4c04bf4571cdebfce2ec /src | |
| parent | Query Cache: Fix guest side sample counting (diff) | |
| download | yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.gz yuzu-c8237d5c312485394389b2520451ef720604ea9a.tar.xz yuzu-c8237d5c312485394389b2520451ef720604ea9a.zip | |
Query Cache: Implement host side sample counting.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/host_shaders/queries_prefix_scan_sum.comp | 124 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 147 |
5 files changed, 348 insertions, 48 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index fb24b6532..8218ec4c8 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -41,6 +41,7 @@ set(SHADER_FILES | |||
| 41 | pitch_unswizzle.comp | 41 | pitch_unswizzle.comp |
| 42 | present_bicubic.frag | 42 | present_bicubic.frag |
| 43 | present_gaussian.frag | 43 | present_gaussian.frag |
| 44 | queries_prefix_scan_sum.comp | ||
| 44 | resolve_conditional_render.comp | 45 | resolve_conditional_render.comp |
| 45 | smaa_edge_detection.vert | 46 | smaa_edge_detection.vert |
| 46 | smaa_edge_detection.frag | 47 | smaa_edge_detection.frag |
diff --git a/src/video_core/host_shaders/queries_prefix_scan_sum.comp b/src/video_core/host_shaders/queries_prefix_scan_sum.comp new file mode 100644 index 000000000..dce1279fe --- /dev/null +++ b/src/video_core/host_shaders/queries_prefix_scan_sum.comp | |||
| @@ -0,0 +1,124 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2015 Graham Sellers, Richard Wright Jr. and Nicholas Haemel | ||
| 2 | // SPDX-License-Identifier: MIT | ||
| 3 | |||
| 4 | // Code obtained from OpenGL SuperBible, Seventh Edition by Graham Sellers, Richard Wright Jr. and | ||
| 5 | // Nicholas Haemel. Modified to suit needs and optimize for subgroup | ||
| 6 | |||
| 7 | #version 460 core | ||
| 8 | |||
| 9 | #ifdef VULKAN | ||
| 10 | |||
| 11 | #extension GL_KHR_shader_subgroup_arithmetic : enable | ||
| 12 | #define HAS_EXTENDED_TYPES 1 | ||
| 13 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 14 | #define END_PUSH_CONSTANTS \ | ||
| 15 | } \ | ||
| 16 | ; | ||
| 17 | #define UNIFORM(n) | ||
| 18 | #define BINDING_INPUT_BUFFER 0 | ||
| 19 | #define BINDING_OUTPUT_IMAGE 1 | ||
| 20 | |||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 22 | |||
| 23 | #extension GL_KHR_shader_subgroup_arithmetic : enable | ||
| 24 | #extension GL_NV_gpu_shader5 : enable | ||
| 25 | #ifdef GL_NV_gpu_shader5 | ||
| 26 | #define HAS_EXTENDED_TYPES 1 | ||
| 27 | #else | ||
| 28 | #define HAS_EXTENDED_TYPES 0 | ||
| 29 | #endif | ||
| 30 | #define BEGIN_PUSH_CONSTANTS | ||
| 31 | #define END_PUSH_CONSTANTS | ||
| 32 | #define UNIFORM(n) layout(location = n) uniform | ||
| 33 | #define BINDING_INPUT_BUFFER 0 | ||
| 34 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 35 | |||
| 36 | #endif | ||
| 37 | |||
| 38 | BEGIN_PUSH_CONSTANTS | ||
| 39 | UNIFORM(0) uint max_accumulation_base; | ||
| 40 | UNIFORM(1) uint accumulation_limit; | ||
| 41 | END_PUSH_CONSTANTS | ||
| 42 | |||
| 43 | layout(local_size_x = 32) in; | ||
| 44 | |||
| 45 | layout(std430, binding = 0) readonly buffer block1 { | ||
| 46 | uvec2 input_data[gl_WorkGroupSize.x]; | ||
| 47 | }; | ||
| 48 | |||
| 49 | layout(std430, binding = 1) writeonly coherent buffer block2 { | ||
| 50 | uvec2 output_data[gl_WorkGroupSize.x]; | ||
| 51 | }; | ||
| 52 | |||
| 53 | layout(std430, binding = 2) coherent buffer block3 { | ||
| 54 | uvec2 accumulated_data; | ||
| 55 | }; | ||
| 56 | |||
| 57 | shared uvec2 shared_data[gl_WorkGroupSize.x * 2]; | ||
| 58 | |||
| 59 | uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { | ||
| 60 | uint carry = 0; | ||
| 61 | uvec2 result; | ||
| 62 | result.x = uaddCarry(value_1.x, value_2.x, carry); | ||
| 63 | result.y = value_1.y + value_2.y + carry; | ||
| 64 | return result; | ||
| 65 | } | ||
| 66 | |||
| 67 | void main(void) { | ||
| 68 | uint id = gl_LocalInvocationID.x; | ||
| 69 | uvec2 base_value_1 = (id * 2) < max_accumulation_base ? accumulated_data : uvec2(0); | ||
| 70 | uvec2 base_value_2 = (id * 2 + 1) < max_accumulation_base ? accumulated_data : uvec2(0); | ||
| 71 | uint work_size = gl_WorkGroupSize.x; | ||
| 72 | uint rd_id; | ||
| 73 | uint wr_id; | ||
| 74 | uint mask; | ||
| 75 | uvec2 input_1 = input_data[id * 2]; | ||
| 76 | uvec2 input_2 = input_data[id * 2 + 1]; | ||
| 77 | // The number of steps is the log base 2 of the | ||
| 78 | // work group size, which should be a power of 2 | ||
| 79 | const uint steps = uint(log2(work_size)) + 1; | ||
| 80 | uint step = 0; | ||
| 81 | |||
| 82 | // Each invocation is responsible for the content of | ||
| 83 | // two elements of the output array | ||
| 84 | shared_data[id * 2] = input_1; | ||
| 85 | shared_data[id * 2 + 1] = input_2; | ||
| 86 | // Synchronize to make sure that everyone has initialized | ||
| 87 | // their elements of shared_data[] with data loaded from | ||
| 88 | // the input arrays | ||
| 89 | barrier(); | ||
| 90 | memoryBarrierShared(); | ||
| 91 | // For each step... | ||
| 92 | for (step = 0; step < steps; step++) { | ||
| 93 | // Calculate the read and write index in the | ||
| 94 | // shared array | ||
| 95 | mask = (1 << step) - 1; | ||
| 96 | rd_id = ((id >> step) << (step + 1)) + mask; | ||
| 97 | wr_id = rd_id + 1 + (id & mask); | ||
| 98 | // Accumulate the read data into our element | ||
| 99 | |||
| 100 | shared_data[wr_id] = AddUint64(shared_data[rd_id], shared_data[wr_id]); | ||
| 101 | // Synchronize again to make sure that everyone | ||
| 102 | // has caught up with us | ||
| 103 | barrier(); | ||
| 104 | memoryBarrierShared(); | ||
| 105 | } | ||
| 106 | // Add the accumulation | ||
| 107 | shared_data[id * 2] = AddUint64(shared_data[id * 2], base_value_1); | ||
| 108 | shared_data[id * 2 + 1] = AddUint64(shared_data[id * 2 + 1], base_value_2); | ||
| 109 | barrier(); | ||
| 110 | memoryBarrierShared(); | ||
| 111 | |||
| 112 | // Finally write our data back to the output buffer | ||
| 113 | output_data[id * 2] = shared_data[id * 2]; | ||
| 114 | output_data[id * 2 + 1] = shared_data[id * 2 + 1]; | ||
| 115 | if (id == 0) { | ||
| 116 | if (max_accumulation_base >= accumulation_limit + 1) { | ||
| 117 | accumulated_data = shared_data[accumulation_limit]; | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | uvec2 value_1 = shared_data[max_accumulation_base]; | ||
| 121 | uvec2 value_2 = shared_data[accumulation_limit]; | ||
| 122 | accumulated_data = AddUint64(value_1, -value_2); | ||
| 123 | } | ||
| 124 | } \ No newline at end of file | ||
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 039dc95e1..a1af08cda 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" |
| 15 | #include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h" | ||
| 15 | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" | 16 | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" |
| 16 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 17 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 17 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 18 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| @@ -58,6 +59,30 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SE | |||
| 58 | }, | 59 | }, |
| 59 | }}; | 60 | }}; |
| 60 | 61 | ||
| 62 | constexpr std::array<VkDescriptorSetLayoutBinding, 3> QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS{{ | ||
| 63 | { | ||
| 64 | .binding = 0, | ||
| 65 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 66 | .descriptorCount = 1, | ||
| 67 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 68 | .pImmutableSamplers = nullptr, | ||
| 69 | }, | ||
| 70 | { | ||
| 71 | .binding = 1, | ||
| 72 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 73 | .descriptorCount = 1, | ||
| 74 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 75 | .pImmutableSamplers = nullptr, | ||
| 76 | }, | ||
| 77 | { | ||
| 78 | .binding = 2, | ||
| 79 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 80 | .descriptorCount = 1, | ||
| 81 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 82 | .pImmutableSamplers = nullptr, | ||
| 83 | }, | ||
| 84 | }}; | ||
| 85 | |||
| 61 | constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ | 86 | constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ |
| 62 | .uniform_buffers = 0, | 87 | .uniform_buffers = 0, |
| 63 | .storage_buffers = 2, | 88 | .storage_buffers = 2, |
| @@ -68,6 +93,16 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ | |||
| 68 | .score = 2, | 93 | .score = 2, |
| 69 | }; | 94 | }; |
| 70 | 95 | ||
| 96 | constexpr DescriptorBankInfo QUERIES_SCAN_BANK_INFO{ | ||
| 97 | .uniform_buffers = 0, | ||
| 98 | .storage_buffers = 3, | ||
| 99 | .texture_buffers = 0, | ||
| 100 | .image_buffers = 0, | ||
| 101 | .textures = 0, | ||
| 102 | .images = 0, | ||
| 103 | .score = 3, | ||
| 104 | }; | ||
| 105 | |||
| 71 | constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{ | 106 | constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{ |
| 72 | { | 107 | { |
| 73 | .binding = ASTC_BINDING_INPUT_BUFFER, | 108 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| @@ -104,6 +139,15 @@ constexpr VkDescriptorUpdateTemplateEntry INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLAT | |||
| 104 | .stride = sizeof(DescriptorUpdateEntry), | 139 | .stride = sizeof(DescriptorUpdateEntry), |
| 105 | }; | 140 | }; |
| 106 | 141 | ||
| 142 | constexpr VkDescriptorUpdateTemplateEntry QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE{ | ||
| 143 | .dstBinding = 0, | ||
| 144 | .dstArrayElement = 0, | ||
| 145 | .descriptorCount = 3, | ||
| 146 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 147 | .offset = 0, | ||
| 148 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 149 | }; | ||
| 150 | |||
| 107 | constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS> | 151 | constexpr std::array<VkDescriptorUpdateTemplateEntry, ASTC_NUM_BINDINGS> |
| 108 | ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ | 152 | ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ |
| 109 | { | 153 | { |
| @@ -132,6 +176,11 @@ struct AstcPushConstants { | |||
| 132 | u32 block_height; | 176 | u32 block_height; |
| 133 | u32 block_height_mask; | 177 | u32 block_height_mask; |
| 134 | }; | 178 | }; |
| 179 | |||
| 180 | struct QueriesPrefixScanPushConstants { | ||
| 181 | u32 max_accumulation_base; | ||
| 182 | u32 accumulation_limit; | ||
| 183 | }; | ||
| 135 | } // Anonymous namespace | 184 | } // Anonymous namespace |
| 136 | 185 | ||
| 137 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | 186 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, |
| @@ -313,8 +362,6 @@ ConditionalRenderingResolvePass::ConditionalRenderingResolvePass( | |||
| 313 | 362 | ||
| 314 | void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, | 363 | void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 315 | u32 src_offset, bool compare_to_zero) { | 364 | u32 src_offset, bool compare_to_zero) { |
| 316 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 317 | |||
| 318 | const size_t compare_size = compare_to_zero ? 8 : 24; | 365 | const size_t compare_size = compare_to_zero ? 8 : 24; |
| 319 | 366 | ||
| 320 | compute_pass_descriptor_queue.Acquire(); | 367 | compute_pass_descriptor_queue.Acquire(); |
| @@ -327,7 +374,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ | |||
| 327 | static constexpr VkMemoryBarrier read_barrier{ | 374 | static constexpr VkMemoryBarrier read_barrier{ |
| 328 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 375 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 329 | .pNext = nullptr, | 376 | .pNext = nullptr, |
| 330 | .srcAccessMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | 377 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| 331 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | 378 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| 332 | }; | 379 | }; |
| 333 | static constexpr VkMemoryBarrier write_barrier{ | 380 | static constexpr VkMemoryBarrier write_barrier{ |
| @@ -349,6 +396,63 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ | |||
| 349 | }); | 396 | }); |
| 350 | } | 397 | } |
| 351 | 398 | ||
| 399 | QueriesPrefixScanPass::QueriesPrefixScanPass( | ||
| 400 | const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | ||
| 401 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | ||
| 402 | : ComputePass(device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, | ||
| 403 | QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, | ||
| 404 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>, | ||
| 405 | QUERIES_PREFIX_SCAN_SUM_COMP_SPV), | ||
| 406 | scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | ||
| 407 | |||
| 408 | void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, | ||
| 409 | VkBuffer src_buffer, size_t number_of_sums, | ||
| 410 | size_t max_accumulation_limit) { | ||
| 411 | size_t aligned_runs = Common::AlignUp(number_of_sums, 32); | ||
| 412 | |||
| 413 | compute_pass_descriptor_queue.Acquire(); | ||
| 414 | compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, aligned_runs * sizeof(u64)); | ||
| 415 | compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, aligned_runs * sizeof(u64)); | ||
| 416 | compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64)); | ||
| 417 | const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; | ||
| 418 | |||
| 419 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 420 | scheduler.Record([this, descriptor_data, max_accumulation_limit, number_of_sums, | ||
| 421 | aligned_runs](vk::CommandBuffer cmdbuf) { | ||
| 422 | static constexpr VkMemoryBarrier read_barrier{ | ||
| 423 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 424 | .pNext = nullptr, | ||
| 425 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 426 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 427 | }; | ||
| 428 | static constexpr VkMemoryBarrier write_barrier{ | ||
| 429 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 430 | .pNext = nullptr, | ||
| 431 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 432 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT | | ||
| 433 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | | ||
| 434 | VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | | ||
| 435 | VK_ACCESS_UNIFORM_READ_BIT | | ||
| 436 | VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT, | ||
| 437 | }; | ||
| 438 | const QueriesPrefixScanPushConstants uniforms{ | ||
| 439 | .max_accumulation_base = static_cast<u32>(max_accumulation_limit), | ||
| 440 | .accumulation_limit = static_cast<u32>(number_of_sums - 1), | ||
| 441 | }; | ||
| 442 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 443 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||
| 444 | |||
| 445 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 446 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); | ||
| 447 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 448 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 449 | cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||
| 450 | cmdbuf.Dispatch(static_cast<u32>(aligned_runs / 32U), 1, 1); | ||
| 451 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 452 | VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT, 0, write_barrier); | ||
| 453 | }); | ||
| 454 | } | ||
| 455 | |||
| 352 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | 456 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |
| 353 | DescriptorPool& descriptor_pool_, | 457 | DescriptorPool& descriptor_pool_, |
| 354 | StagingBufferPool& staging_buffer_pool_, | 458 | StagingBufferPool& staging_buffer_pool_, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index c62f30d30..e6ff86e9a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -95,6 +95,20 @@ private: | |||
| 95 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | 95 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; |
| 96 | }; | 96 | }; |
| 97 | 97 | ||
| 98 | class QueriesPrefixScanPass final : public ComputePass { | ||
| 99 | public: | ||
| 100 | explicit QueriesPrefixScanPass(const Device& device_, Scheduler& scheduler_, | ||
| 101 | DescriptorPool& descriptor_pool_, | ||
| 102 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_); | ||
| 103 | |||
| 104 | void Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, VkBuffer src_buffer, | ||
| 105 | size_t number_of_sums, size_t max_accumulation_limit); | ||
| 106 | |||
| 107 | private: | ||
| 108 | Scheduler& scheduler; | ||
| 109 | ComputePassDescriptorQueue& compute_pass_descriptor_queue; | ||
| 110 | }; | ||
| 111 | |||
| 98 | class ASTCDecoderPass final : public ComputePass { | 112 | class ASTCDecoderPass final : public ComputePass { |
| 99 | public: | 113 | public: |
| 100 | explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, | 114 | explicit ASTCDecoderPass(const Device& device_, Scheduler& scheduler_, |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 2147776f8..ded190ae0 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <utility> | 11 | #include <utility> |
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | 13 | ||
| 14 | #include "common/bit_util.h" | ||
| 14 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 15 | #include "core/memory.h" | 16 | #include "core/memory.h" |
| 16 | #include "video_core/engines/draw_manager.h" | 17 | #include "video_core/engines/draw_manager.h" |
| @@ -112,14 +113,34 @@ class SamplesStreamer : public BaseStreamer { | |||
| 112 | public: | 113 | public: |
| 113 | explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, | 114 | explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, |
| 114 | VideoCore::RasterizerInterface* rasterizer_, const Device& device_, | 115 | VideoCore::RasterizerInterface* rasterizer_, const Device& device_, |
| 115 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) | 116 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, |
| 117 | ComputePassDescriptorQueue& compute_pass_descriptor_queue, | ||
| 118 | DescriptorPool& descriptor_pool) | ||
| 116 | : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_}, | 119 | : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_}, |
| 117 | scheduler{scheduler_}, memory_allocator{memory_allocator_} { | 120 | scheduler{scheduler_}, memory_allocator{memory_allocator_} { |
| 118 | BuildResolveBuffer(); | ||
| 119 | current_bank = nullptr; | 121 | current_bank = nullptr; |
| 120 | current_query = nullptr; | 122 | current_query = nullptr; |
| 121 | ammend_value = 0; | 123 | ammend_value = 0; |
| 122 | acumulation_value = 0; | 124 | acumulation_value = 0; |
| 125 | queries_prefix_scan_pass = std::make_unique<QueriesPrefixScanPass>( | ||
| 126 | device, scheduler, descriptor_pool, compute_pass_descriptor_queue); | ||
| 127 | |||
| 128 | const VkBufferCreateInfo buffer_ci = { | ||
| 129 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 130 | .pNext = nullptr, | ||
| 131 | .flags = 0, | ||
| 132 | .size = 8, | ||
| 133 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | | ||
| 134 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | ||
| 135 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 136 | .queueFamilyIndexCount = 0, | ||
| 137 | .pQueueFamilyIndices = nullptr, | ||
| 138 | }; | ||
| 139 | accumulation_buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 140 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 141 | scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) { | ||
| 142 | cmdbuf.FillBuffer(buffer, 0, 8, 0); | ||
| 143 | }); | ||
| 123 | } | 144 | } |
| 124 | 145 | ||
| 125 | ~SamplesStreamer() = default; | 146 | ~SamplesStreamer() = default; |
| @@ -159,6 +180,8 @@ public: | |||
| 159 | acumulation_value = 0; | 180 | acumulation_value = 0; |
| 160 | }); | 181 | }); |
| 161 | rasterizer->SyncOperation(std::move(func)); | 182 | rasterizer->SyncOperation(std::move(func)); |
| 183 | accumulation_since_last_sync = false; | ||
| 184 | last_accumulation_checkpoint = std::min(last_accumulation_checkpoint, num_slots_used); | ||
| 162 | } | 185 | } |
| 163 | 186 | ||
| 164 | void CloseCounter() override { | 187 | void CloseCounter() override { |
| @@ -175,7 +198,8 @@ public: | |||
| 175 | } | 198 | } |
| 176 | 199 | ||
| 177 | for (size_t i = 0; i < sync_values_stash.size(); i++) { | 200 | for (size_t i = 0; i < sync_values_stash.size(); i++) { |
| 178 | runtime.template SyncValues<HostSyncValues>(sync_values_stash[i], *resolve_buffers[i]); | 201 | runtime.template SyncValues<HostSyncValues>(sync_values_stash[i], |
| 202 | *buffers[resolve_buffers[i]]); | ||
| 179 | } | 203 | } |
| 180 | 204 | ||
| 181 | sync_values_stash.clear(); | 205 | sync_values_stash.clear(); |
| @@ -189,36 +213,21 @@ public: | |||
| 189 | sync_values_stash.clear(); | 213 | sync_values_stash.clear(); |
| 190 | sync_values_stash.emplace_back(); | 214 | sync_values_stash.emplace_back(); |
| 191 | std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); | 215 | std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); |
| 192 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | 216 | sync_values->reserve(num_slots_used); |
| 193 | std::unordered_map<size_t, std::pair<size_t, size_t>> offsets; | 217 | std::unordered_map<size_t, std::pair<size_t, size_t>> offsets; |
| 194 | size_t this_bank_slot = std::numeric_limits<size_t>::max(); | 218 | resolve_buffers.clear(); |
| 195 | size_t resolve_slots_remaining = resolve_slots; | 219 | size_t resolve_buffer_index = ObtainBuffer<true>(num_slots_used); |
| 196 | size_t resolve_buffer_index = 0; | 220 | resolve_buffers.push_back(resolve_buffer_index); |
| 221 | size_t base_offset = 0; | ||
| 222 | |||
| 197 | ApplyBanksWideOp<true>(pending_sync, [&](SamplesQueryBank* bank, size_t start, | 223 | ApplyBanksWideOp<true>(pending_sync, [&](SamplesQueryBank* bank, size_t start, |
| 198 | size_t amount) { | 224 | size_t amount) { |
| 199 | size_t bank_id = bank->GetIndex(); | 225 | size_t bank_id = bank->GetIndex(); |
| 200 | if (this_bank_slot != bank_id) { | 226 | auto& resolve_buffer = buffers[resolve_buffer_index]; |
| 201 | this_bank_slot = bank_id; | ||
| 202 | if (resolve_slots_remaining == 0) { | ||
| 203 | resolve_buffer_index++; | ||
| 204 | if (resolve_buffer_index >= resolve_buffers.size()) { | ||
| 205 | BuildResolveBuffer(); | ||
| 206 | } | ||
| 207 | resolve_slots_remaining = resolve_slots; | ||
| 208 | sync_values_stash.emplace_back(); | ||
| 209 | sync_values = &sync_values_stash.back(); | ||
| 210 | sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); | ||
| 211 | } | ||
| 212 | resolve_slots_remaining--; | ||
| 213 | } | ||
| 214 | auto& resolve_buffer = resolve_buffers[resolve_buffer_index]; | ||
| 215 | const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * | ||
| 216 | (resolve_slots - resolve_slots_remaining - 1); | ||
| 217 | VkQueryPool query_pool = bank->GetInnerPool(); | 227 | VkQueryPool query_pool = bank->GetInnerPool(); |
| 218 | scheduler.RequestOutsideRenderPassOperationContext(); | 228 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 219 | scheduler.Record([start, amount, base_offset, query_pool, | 229 | scheduler.Record([start, amount, base_offset, query_pool, |
| 220 | buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { | 230 | buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { |
| 221 | size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; | ||
| 222 | const VkBufferMemoryBarrier copy_query_pool_barrier{ | 231 | const VkBufferMemoryBarrier copy_query_pool_barrier{ |
| 223 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 232 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| 224 | .pNext = nullptr, | 233 | .pNext = nullptr, |
| @@ -227,39 +236,60 @@ public: | |||
| 227 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 236 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 228 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 237 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 229 | .buffer = buffer, | 238 | .buffer = buffer, |
| 230 | .offset = final_offset, | 239 | .offset = base_offset, |
| 231 | .size = amount * SamplesQueryBank::QUERY_SIZE, | 240 | .size = amount * SamplesQueryBank::QUERY_SIZE, |
| 232 | }; | 241 | }; |
| 233 | 242 | ||
| 234 | cmdbuf.CopyQueryPoolResults( | 243 | cmdbuf.CopyQueryPoolResults( |
| 235 | query_pool, static_cast<u32>(start), static_cast<u32>(amount), buffer, | 244 | query_pool, static_cast<u32>(start), static_cast<u32>(amount), buffer, |
| 236 | static_cast<u32>(final_offset), SamplesQueryBank::QUERY_SIZE, | 245 | static_cast<u32>(base_offset), SamplesQueryBank::QUERY_SIZE, |
| 237 | VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); | 246 | VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT); |
| 238 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | 247 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 239 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, copy_query_pool_barrier); | 248 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, copy_query_pool_barrier); |
| 240 | }); | 249 | }); |
| 241 | offsets[bank_id] = {sync_values_stash.size() - 1, base_offset}; | 250 | offsets[bank_id] = {start, base_offset}; |
| 251 | base_offset += amount * SamplesQueryBank::QUERY_SIZE; | ||
| 242 | }); | 252 | }); |
| 243 | 253 | ||
| 244 | // Convert queries | 254 | // Convert queries |
| 255 | bool has_multi_queries = false; | ||
| 245 | for (auto q : pending_sync) { | 256 | for (auto q : pending_sync) { |
| 246 | auto* query = GetQuery(q); | 257 | auto* query = GetQuery(q); |
| 258 | size_t sync_value_slot = 0; | ||
| 247 | if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { | 259 | if (True(query->flags & VideoCommon::QueryFlagBits::IsRewritten)) { |
| 248 | continue; | 260 | continue; |
| 249 | } | 261 | } |
| 250 | if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { | 262 | if (True(query->flags & VideoCommon::QueryFlagBits::IsInvalidated)) { |
| 251 | continue; | 263 | continue; |
| 252 | } | 264 | } |
| 253 | if (query->size_slots > 1) { | 265 | if (accumulation_since_last_sync || query->size_slots > 1) { |
| 254 | // This is problematic. | 266 | if (!has_multi_queries) { |
| 255 | // UNIMPLEMENTED(); | 267 | has_multi_queries = true; |
| 268 | sync_values_stash.emplace_back(); | ||
| 269 | } | ||
| 270 | sync_value_slot = 1; | ||
| 256 | } | 271 | } |
| 257 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; | 272 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; |
| 258 | auto loc_data = offsets[query->start_bank_id]; | 273 | auto loc_data = offsets[query->start_bank_id]; |
| 259 | sync_values_stash[loc_data.first].emplace_back(HostSyncValues{ | 274 | sync_values_stash[sync_value_slot].emplace_back(HostSyncValues{ |
| 260 | .address = query->guest_address, | 275 | .address = query->guest_address, |
| 261 | .size = SamplesQueryBank::QUERY_SIZE, | 276 | .size = SamplesQueryBank::QUERY_SIZE, |
| 262 | .offset = loc_data.second + query->start_slot * SamplesQueryBank::QUERY_SIZE, | 277 | .offset = |
| 278 | loc_data.second + (query->start_slot - loc_data.first + query->size_slots - 1) * | ||
| 279 | SamplesQueryBank::QUERY_SIZE, | ||
| 280 | }); | ||
| 281 | } | ||
| 282 | |||
| 283 | if (has_multi_queries) { | ||
| 284 | size_t intermediary_buffer_index = ObtainBuffer<false>(num_slots_used); | ||
| 285 | resolve_buffers.push_back(intermediary_buffer_index); | ||
| 286 | queries_prefix_scan_pass->Run(*accumulation_buffer, *buffers[intermediary_buffer_index], | ||
| 287 | *buffers[resolve_buffer_index], num_slots_used, | ||
| 288 | std::min(last_accumulation_checkpoint, num_slots_used)); | ||
| 289 | } else { | ||
| 290 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 291 | scheduler.Record([buffer = *accumulation_buffer](vk::CommandBuffer cmdbuf) { | ||
| 292 | cmdbuf.FillBuffer(buffer, 0, 8, 0); | ||
| 263 | }); | 293 | }); |
| 264 | } | 294 | } |
| 265 | 295 | ||
| @@ -267,6 +297,9 @@ public: | |||
| 267 | std::function<void()> func([this] { ammend_value = acumulation_value; }); | 297 | std::function<void()> func([this] { ammend_value = acumulation_value; }); |
| 268 | rasterizer->SyncOperation(std::move(func)); | 298 | rasterizer->SyncOperation(std::move(func)); |
| 269 | AbandonCurrentQuery(); | 299 | AbandonCurrentQuery(); |
| 300 | num_slots_used = 0; | ||
| 301 | last_accumulation_checkpoint = std::numeric_limits<size_t>::max(); | ||
| 302 | accumulation_since_last_sync = has_multi_queries; | ||
| 270 | pending_sync.clear(); | 303 | pending_sync.clear(); |
| 271 | } | 304 | } |
| 272 | 305 | ||
| @@ -400,6 +433,7 @@ private: | |||
| 400 | void ReserveHostQuery() { | 433 | void ReserveHostQuery() { |
| 401 | size_t new_slot = ReserveBankSlot(); | 434 | size_t new_slot = ReserveBankSlot(); |
| 402 | current_bank->AddReference(1); | 435 | current_bank->AddReference(1); |
| 436 | num_slots_used++; | ||
| 403 | if (current_query) { | 437 | if (current_query) { |
| 404 | size_t bank_id = current_query->start_bank_id; | 438 | size_t bank_id = current_query->start_bank_id; |
| 405 | size_t banks_set = current_query->size_banks - 1; | 439 | size_t banks_set = current_query->size_banks - 1; |
| @@ -470,32 +504,50 @@ private: | |||
| 470 | }); | 504 | }); |
| 471 | } | 505 | } |
| 472 | 506 | ||
| 473 | void BuildResolveBuffer() { | 507 | template <bool is_resolve> |
| 508 | size_t ObtainBuffer(size_t num_needed) { | ||
| 509 | const size_t log_2 = std::max<size_t>(6U, Common::Log2Ceil64(num_needed)); | ||
| 510 | if constexpr (is_resolve) { | ||
| 511 | if (resolve_table[log_2] != 0) { | ||
| 512 | return resolve_table[log_2] - 1; | ||
| 513 | } | ||
| 514 | } else { | ||
| 515 | if (intermediary_table[log_2] != 0) { | ||
| 516 | return intermediary_table[log_2] - 1; | ||
| 517 | } | ||
| 518 | } | ||
| 474 | const VkBufferCreateInfo buffer_ci = { | 519 | const VkBufferCreateInfo buffer_ci = { |
| 475 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 520 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 476 | .pNext = nullptr, | 521 | .pNext = nullptr, |
| 477 | .flags = 0, | 522 | .flags = 0, |
| 478 | .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, | 523 | .size = SamplesQueryBank::QUERY_SIZE * (1ULL << log_2), |
| 479 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | | 524 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | |
| 480 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, | 525 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 481 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 526 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 482 | .queueFamilyIndexCount = 0, | 527 | .queueFamilyIndexCount = 0, |
| 483 | .pQueueFamilyIndices = nullptr, | 528 | .pQueueFamilyIndices = nullptr, |
| 484 | }; | 529 | }; |
| 485 | resolve_buffers.emplace_back( | 530 | buffers.emplace_back(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)); |
| 486 | memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)); | 531 | if constexpr (is_resolve) { |
| 532 | resolve_table[log_2] = buffers.size(); | ||
| 533 | } else { | ||
| 534 | intermediary_table[log_2] = buffers.size(); | ||
| 535 | } | ||
| 536 | return buffers.size() - 1; | ||
| 487 | } | 537 | } |
| 488 | 538 | ||
| 489 | static constexpr size_t resolve_slots = 8; | ||
| 490 | |||
| 491 | QueryCacheRuntime& runtime; | 539 | QueryCacheRuntime& runtime; |
| 492 | VideoCore::RasterizerInterface* rasterizer; | 540 | VideoCore::RasterizerInterface* rasterizer; |
| 493 | const Device& device; | 541 | const Device& device; |
| 494 | Scheduler& scheduler; | 542 | Scheduler& scheduler; |
| 495 | const MemoryAllocator& memory_allocator; | 543 | const MemoryAllocator& memory_allocator; |
| 496 | VideoCommon::BankPool<SamplesQueryBank> bank_pool; | 544 | VideoCommon::BankPool<SamplesQueryBank> bank_pool; |
| 497 | std::deque<vk::Buffer> resolve_buffers; | 545 | std::deque<vk::Buffer> buffers; |
| 546 | std::array<size_t, 32> resolve_table{}; | ||
| 547 | std::array<size_t, 32> intermediary_table{}; | ||
| 548 | vk::Buffer accumulation_buffer; | ||
| 498 | std::deque<std::vector<HostSyncValues>> sync_values_stash; | 549 | std::deque<std::vector<HostSyncValues>> sync_values_stash; |
| 550 | std::vector<size_t> resolve_buffers; | ||
| 499 | 551 | ||
| 500 | // syncing queue | 552 | // syncing queue |
| 501 | std::vector<size_t> pending_sync; | 553 | std::vector<size_t> pending_sync; |
| @@ -510,10 +562,14 @@ private: | |||
| 510 | SamplesQueryBank* current_bank; | 562 | SamplesQueryBank* current_bank; |
| 511 | VkQueryPool current_query_pool; | 563 | VkQueryPool current_query_pool; |
| 512 | size_t current_query_id; | 564 | size_t current_query_id; |
| 565 | size_t num_slots_used{}; | ||
| 566 | size_t last_accumulation_checkpoint{}; | ||
| 567 | bool accumulation_since_last_sync{}; | ||
| 513 | VideoCommon::HostQueryBase* current_query; | 568 | VideoCommon::HostQueryBase* current_query; |
| 514 | bool has_started{}; | 569 | bool has_started{}; |
| 515 | bool current_unset{}; | ||
| 516 | std::mutex flush_guard; | 570 | std::mutex flush_guard; |
| 571 | |||
| 572 | std::unique_ptr<QueriesPrefixScanPass> queries_prefix_scan_pass; | ||
| 517 | }; | 573 | }; |
| 518 | 574 | ||
| 519 | // Transform feedback queries | 575 | // Transform feedback queries |
| @@ -1090,7 +1146,8 @@ struct QueryCacheRuntimeImpl { | |||
| 1090 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | 1146 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, |
| 1091 | guest_streamer(0, runtime), | 1147 | guest_streamer(0, runtime), |
| 1092 | sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer, | 1148 | sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer, |
| 1093 | device, scheduler, memory_allocator), | 1149 | device, scheduler, memory_allocator, compute_pass_descriptor_queue, |
| 1150 | descriptor_pool), | ||
| 1094 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, | 1151 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, |
| 1095 | scheduler, memory_allocator, staging_pool), | 1152 | scheduler, memory_allocator, staging_pool), |
| 1096 | primitives_succeeded_streamer( | 1153 | primitives_succeeded_streamer( |
| @@ -1319,10 +1376,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
| 1319 | return true; | 1376 | return true; |
| 1320 | } | 1377 | } |
| 1321 | } | 1378 | } |
| 1322 | if (!is_in_bc[0] && !is_in_bc[1]) { | 1379 | /*if (!is_in_bc[0] && !is_in_bc[1]) { |
| 1323 | // Both queries are in query cache, it's best to just flush. | 1380 | // Both queries are in query cache, it's best to just flush. |
| 1324 | return false; | 1381 | return true; |
| 1325 | } | 1382 | }*/ |
| 1326 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); | 1383 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); |
| 1327 | return true; | 1384 | return true; |
| 1328 | } | 1385 | } |