diff options
| author | 2023-08-22 12:28:25 +0200 | |
|---|---|---|
| committer | 2023-09-23 23:05:30 +0200 | |
| commit | a07c88e686fb9b65924876d472a8184f1f1849df (patch) | |
| tree | 1eb5015652f00ba728217d16a71ecbed67faa24b /src/video_core/renderer_vulkan | |
| parent | Query Cache: Implement host side sample counting. (diff) | |
| download | yuzu-a07c88e686fb9b65924876d472a8184f1f1849df.tar.gz yuzu-a07c88e686fb9b65924876d472a8184f1f1849df.tar.xz yuzu-a07c88e686fb9b65924876d472a8184f1f1849df.zip | |
Query Cache: Simplify Prefix Sum compute shader
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 27 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 4 |
3 files changed, 26 insertions, 9 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a1af08cda..44ec5a032 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "common/div_ceil.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" | 14 | #include "video_core/host_shaders/astc_decoder_comp_spv.h" |
| 15 | #include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h" | 15 | #include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h" |
| 16 | #include "video_core/host_shaders/queries_prefix_scan_sum_nosubgroups_comp_spv.h" | ||
| 16 | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" | 17 | #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" |
| 17 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 18 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 18 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 19 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| @@ -187,7 +188,8 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | |||
| 187 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 188 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 188 | vk::Span<VkDescriptorUpdateTemplateEntry> templates, | 189 | vk::Span<VkDescriptorUpdateTemplateEntry> templates, |
| 189 | const DescriptorBankInfo& bank_info, | 190 | const DescriptorBankInfo& bank_info, |
| 190 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) | 191 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code, |
| 192 | std::optional<u32> optional_subgroup_size) | ||
| 191 | : device{device_} { | 193 | : device{device_} { |
| 192 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ | 194 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ |
| 193 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 195 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| @@ -228,13 +230,19 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, | |||
| 228 | .pCode = code.data(), | 230 | .pCode = code.data(), |
| 229 | }); | 231 | }); |
| 230 | device.SaveShader(code); | 232 | device.SaveShader(code); |
| 233 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 234 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||
| 235 | .pNext = nullptr, | ||
| 236 | .requiredSubgroupSize = optional_subgroup_size ? *optional_subgroup_size : 32U, | ||
| 237 | }; | ||
| 238 | bool use_setup_size = device.IsExtSubgroupSizeControlSupported() && optional_subgroup_size; | ||
| 231 | pipeline = device.GetLogical().CreateComputePipeline({ | 239 | pipeline = device.GetLogical().CreateComputePipeline({ |
| 232 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 240 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 233 | .pNext = nullptr, | 241 | .pNext = nullptr, |
| 234 | .flags = 0, | 242 | .flags = 0, |
| 235 | .stage{ | 243 | .stage{ |
| 236 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 244 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 237 | .pNext = nullptr, | 245 | .pNext = use_setup_size ? &subgroup_size_ci : nullptr, |
| 238 | .flags = 0, | 246 | .flags = 0, |
| 239 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 247 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 240 | .module = *module, | 248 | .module = *module, |
| @@ -399,10 +407,17 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ | |||
| 399 | QueriesPrefixScanPass::QueriesPrefixScanPass( | 407 | QueriesPrefixScanPass::QueriesPrefixScanPass( |
| 400 | const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, | 408 | const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 401 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_) | 409 | ComputePassDescriptorQueue& compute_pass_descriptor_queue_) |
| 402 | : ComputePass(device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, | 410 | : ComputePass( |
| 403 | QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, | 411 | device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, |
| 404 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>, | 412 | QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, |
| 405 | QUERIES_PREFIX_SCAN_SUM_COMP_SPV), | 413 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>, |
| 414 | device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) && | ||
| 415 | device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) && | ||
| 416 | device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) && | ||
| 417 | device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) | ||
| 418 | ? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV) | ||
| 419 | : std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV), | ||
| 420 | {32}), | ||
| 406 | scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} | 421 | scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} |
| 407 | 422 | ||
| 408 | void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, | 423 | void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index e6ff86e9a..68ffb1b82 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <optional> | ||
| 6 | #include <span> | 7 | #include <span> |
| 7 | #include <utility> | 8 | #include <utility> |
| 8 | 9 | ||
| @@ -31,7 +32,8 @@ public: | |||
| 31 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 32 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 32 | vk::Span<VkDescriptorUpdateTemplateEntry> templates, | 33 | vk::Span<VkDescriptorUpdateTemplateEntry> templates, |
| 33 | const DescriptorBankInfo& bank_info, | 34 | const DescriptorBankInfo& bank_info, |
| 34 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); | 35 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code, |
| 36 | std::optional<u32> optional_subgroup_size = std::nullopt); | ||
| 35 | ~ComputePass(); | 37 | ~ComputePass(); |
| 36 | 38 | ||
| 37 | protected: | 39 | protected: |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ded190ae0..825e1a72e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -1376,10 +1376,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
| 1376 | return true; | 1376 | return true; |
| 1377 | } | 1377 | } |
| 1378 | } | 1378 | } |
| 1379 | /*if (!is_in_bc[0] && !is_in_bc[1]) { | 1379 | if (!is_in_bc[0] && !is_in_bc[1]) { |
| 1380 | // Both queries are in query cache, it's best to just flush. | 1380 | // Both queries are in query cache, it's best to just flush. |
| 1381 | return true; | 1381 | return true; |
| 1382 | }*/ | 1382 | } |
| 1383 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); | 1383 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); |
| 1384 | return true; | 1384 | return true; |
| 1385 | } | 1385 | } |