summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-22 12:28:25 +0200
committerGravatar Fernando Sahmkow2023-09-23 23:05:30 +0200
commita07c88e686fb9b65924876d472a8184f1f1849df (patch)
tree1eb5015652f00ba728217d16a71ecbed67faa24b /src/video_core/renderer_vulkan
parentQuery Cache: Implement host side sample counting. (diff)
downloadyuzu-a07c88e686fb9b65924876d472a8184f1f1849df.tar.gz
yuzu-a07c88e686fb9b65924876d472a8184f1f1849df.tar.xz
yuzu-a07c88e686fb9b65924876d472a8184f1f1849df.zip
Query Cache: Simplify Prefix Sum compute shader
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp4
3 files changed, 26 insertions, 9 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index a1af08cda..44ec5a032 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -13,6 +13,7 @@
13#include "common/div_ceil.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/astc_decoder_comp_spv.h" 14#include "video_core/host_shaders/astc_decoder_comp_spv.h"
15#include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h" 15#include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h"
16#include "video_core/host_shaders/queries_prefix_scan_sum_nosubgroups_comp_spv.h"
16#include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" 17#include "video_core/host_shaders/resolve_conditional_render_comp_spv.h"
17#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 18#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
18#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 19#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
@@ -187,7 +188,8 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
187 vk::Span<VkDescriptorSetLayoutBinding> bindings, 188 vk::Span<VkDescriptorSetLayoutBinding> bindings,
188 vk::Span<VkDescriptorUpdateTemplateEntry> templates, 189 vk::Span<VkDescriptorUpdateTemplateEntry> templates,
189 const DescriptorBankInfo& bank_info, 190 const DescriptorBankInfo& bank_info,
190 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) 191 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code,
192 std::optional<u32> optional_subgroup_size)
191 : device{device_} { 193 : device{device_} {
192 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 194 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
193 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 195 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -228,13 +230,19 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
228 .pCode = code.data(), 230 .pCode = code.data(),
229 }); 231 });
230 device.SaveShader(code); 232 device.SaveShader(code);
233 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
234 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
235 .pNext = nullptr,
236 .requiredSubgroupSize = optional_subgroup_size ? *optional_subgroup_size : 32U,
237 };
238 bool use_setup_size = device.IsExtSubgroupSizeControlSupported() && optional_subgroup_size;
231 pipeline = device.GetLogical().CreateComputePipeline({ 239 pipeline = device.GetLogical().CreateComputePipeline({
232 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 240 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
233 .pNext = nullptr, 241 .pNext = nullptr,
234 .flags = 0, 242 .flags = 0,
235 .stage{ 243 .stage{
236 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 244 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
237 .pNext = nullptr, 245 .pNext = use_setup_size ? &subgroup_size_ci : nullptr,
238 .flags = 0, 246 .flags = 0,
239 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 247 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
240 .module = *module, 248 .module = *module,
@@ -399,10 +407,17 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
399QueriesPrefixScanPass::QueriesPrefixScanPass( 407QueriesPrefixScanPass::QueriesPrefixScanPass(
400 const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, 408 const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
401 ComputePassDescriptorQueue& compute_pass_descriptor_queue_) 409 ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
402 : ComputePass(device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, 410 : ComputePass(
403 QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, 411 device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS,
404 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>, 412 QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO,
405 QUERIES_PREFIX_SCAN_SUM_COMP_SPV), 413 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(QueriesPrefixScanPushConstants)>,
414 device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) &&
415 device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) &&
416 device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) &&
417 device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
418 ? std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_COMP_SPV)
419 : std::span<const u32>(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV),
420 {32}),
406 scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} 421 scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {}
407 422
408void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, 423void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index e6ff86e9a..68ffb1b82 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -3,6 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <optional>
6#include <span> 7#include <span>
7#include <utility> 8#include <utility>
8 9
@@ -31,7 +32,8 @@ public:
31 vk::Span<VkDescriptorSetLayoutBinding> bindings, 32 vk::Span<VkDescriptorSetLayoutBinding> bindings,
32 vk::Span<VkDescriptorUpdateTemplateEntry> templates, 33 vk::Span<VkDescriptorUpdateTemplateEntry> templates,
33 const DescriptorBankInfo& bank_info, 34 const DescriptorBankInfo& bank_info,
34 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); 35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code,
36 std::optional<u32> optional_subgroup_size = std::nullopt);
35 ~ComputePass(); 37 ~ComputePass();
36 38
37protected: 39protected:
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ded190ae0..825e1a72e 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -1376,10 +1376,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
1376 return true; 1376 return true;
1377 } 1377 }
1378 } 1378 }
1379 /*if (!is_in_bc[0] && !is_in_bc[1]) { 1379 if (!is_in_bc[0] && !is_in_bc[1]) {
1380 // Both queries are in query cache, it's best to just flush. 1380 // Both queries are in query cache, it's best to just flush.
1381 return true; 1381 return true;
1382 }*/ 1382 }
1383 HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); 1383 HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
1384 return true; 1384 return true;
1385} 1385}