diff options
| author | 2021-02-16 20:52:12 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:22 -0400 | |
| commit | c67d64365a712830fe140dd36e24e2efd9b8a812 (patch) | |
| tree | 9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/renderer_vulkan | |
| parent | shader: Add XMAD multiplication folding optimization (diff) | |
| download | yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip | |
shader: Remove old shader management
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/blit_image.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | 136 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pipeline.h | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 484 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 103 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 375 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 91 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 361 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 47 |
9 files changed, 41 insertions, 1604 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c0d5c7f4 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi | |||
| 323 | cmdbuf.SetScissor(0, scissor); | 323 | cmdbuf.SetScissor(0, scissor); |
| 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); |
| 325 | } | 325 | } |
| 326 | |||
| 327 | } // Anonymous namespace | 326 | } // Anonymous namespace |
| 328 | 327 | ||
| 329 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | 328 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..7a3660496 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -8,146 +8,14 @@ | |||
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 11 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 13 | #include "video_core/vulkan_common/vulkan_device.h" | 12 | #include "video_core/vulkan_common/vulkan_device.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 14 | ||
| 16 | namespace Vulkan { | 15 | namespace Vulkan { |
| 17 | 16 | ||
| 18 | VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 17 | ComputePipeline::ComputePipeline() = default; |
| 19 | VKDescriptorPool& descriptor_pool_, | ||
| 20 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 21 | const SPIRVShader& shader_) | ||
| 22 | : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, | ||
| 23 | descriptor_set_layout{CreateDescriptorSetLayout()}, | ||
| 24 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | ||
| 25 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | ||
| 26 | descriptor_template{CreateDescriptorUpdateTemplate()}, | ||
| 27 | shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} | ||
| 28 | 18 | ||
| 29 | VKComputePipeline::~VKComputePipeline() = default; | 19 | ComputePipeline::~ComputePipeline() = default; |
| 30 | |||
| 31 | VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { | ||
| 32 | if (!descriptor_template) { | ||
| 33 | return {}; | ||
| 34 | } | ||
| 35 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 36 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 37 | return set; | ||
| 38 | } | ||
| 39 | |||
| 40 | vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | ||
| 41 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 42 | u32 binding = 0; | ||
| 43 | const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { | ||
| 44 | // TODO(Rodrigo): Maybe make individual bindings here? | ||
| 45 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { | ||
| 46 | bindings.push_back({ | ||
| 47 | .binding = binding++, | ||
| 48 | .descriptorType = descriptor_type, | ||
| 49 | .descriptorCount = 1, | ||
| 50 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 51 | .pImmutableSamplers = nullptr, | ||
| 52 | }); | ||
| 53 | } | ||
| 54 | }; | ||
| 55 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); | ||
| 56 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); | ||
| 57 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); | ||
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); | ||
| 59 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); | ||
| 60 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); | ||
| 61 | |||
| 62 | return device.GetLogical().CreateDescriptorSetLayout({ | ||
| 63 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 64 | .pNext = nullptr, | ||
| 65 | .flags = 0, | ||
| 66 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 67 | .pBindings = bindings.data(), | ||
| 68 | }); | ||
| 69 | } | ||
| 70 | |||
| 71 | vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { | ||
| 72 | return device.GetLogical().CreatePipelineLayout({ | ||
| 73 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 74 | .pNext = nullptr, | ||
| 75 | .flags = 0, | ||
| 76 | .setLayoutCount = 1, | ||
| 77 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 78 | .pushConstantRangeCount = 0, | ||
| 79 | .pPushConstantRanges = nullptr, | ||
| 80 | }); | ||
| 81 | } | ||
| 82 | |||
| 83 | vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { | ||
| 84 | std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; | ||
| 85 | u32 binding = 0; | ||
| 86 | u32 offset = 0; | ||
| 87 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); | ||
| 88 | if (template_entries.empty()) { | ||
| 89 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 90 | return {}; | ||
| 91 | } | ||
| 92 | |||
| 93 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||
| 94 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 95 | .pNext = nullptr, | ||
| 96 | .flags = 0, | ||
| 97 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | ||
| 98 | .pDescriptorUpdateEntries = template_entries.data(), | ||
| 99 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 100 | .descriptorSetLayout = *descriptor_set_layout, | ||
| 101 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 102 | .pipelineLayout = *layout, | ||
| 103 | .set = DESCRIPTOR_SET, | ||
| 104 | }); | ||
| 105 | } | ||
| 106 | |||
| 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | ||
| 108 | device.SaveShader(code); | ||
| 109 | |||
| 110 | return device.GetLogical().CreateShaderModule({ | ||
| 111 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | ||
| 112 | .pNext = nullptr, | ||
| 113 | .flags = 0, | ||
| 114 | .codeSize = code.size() * sizeof(u32), | ||
| 115 | .pCode = code.data(), | ||
| 116 | }); | ||
| 117 | } | ||
| 118 | |||
| 119 | vk::Pipeline VKComputePipeline::CreatePipeline() const { | ||
| 120 | |||
| 121 | VkComputePipelineCreateInfo ci{ | ||
| 122 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 123 | .pNext = nullptr, | ||
| 124 | .flags = 0, | ||
| 125 | .stage = | ||
| 126 | { | ||
| 127 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 128 | .pNext = nullptr, | ||
| 129 | .flags = 0, | ||
| 130 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 131 | .module = *shader_module, | ||
| 132 | .pName = "main", | ||
| 133 | .pSpecializationInfo = nullptr, | ||
| 134 | }, | ||
| 135 | .layout = *layout, | ||
| 136 | .basePipelineHandle = nullptr, | ||
| 137 | .basePipelineIndex = 0, | ||
| 138 | }; | ||
| 139 | |||
| 140 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 141 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||
| 142 | .pNext = nullptr, | ||
| 143 | .requiredSubgroupSize = GuestWarpSize, | ||
| 144 | }; | ||
| 145 | |||
| 146 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { | ||
| 147 | ci.stage.pNext = &subgroup_size_ci; | ||
| 148 | } | ||
| 149 | |||
| 150 | return device.GetLogical().CreateComputePipeline(ci); | ||
| 151 | } | ||
| 152 | 20 | ||
| 153 | } // namespace Vulkan | 21 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..433d8bb3d 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 10 | ||
| 12 | namespace Vulkan { | 11 | namespace Vulkan { |
| @@ -15,50 +14,10 @@ class Device; | |||
| 15 | class VKScheduler; | 14 | class VKScheduler; |
| 16 | class VKUpdateDescriptorQueue; | 15 | class VKUpdateDescriptorQueue; |
| 17 | 16 | ||
| 18 | class VKComputePipeline final { | 17 | class ComputePipeline { |
| 19 | public: | 18 | public: |
| 20 | explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 19 | explicit ComputePipeline(); |
| 21 | VKDescriptorPool& descriptor_pool_, | 20 | ~ComputePipeline(); |
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 23 | const SPIRVShader& shader_); | ||
| 24 | ~VKComputePipeline(); | ||
| 25 | |||
| 26 | VkDescriptorSet CommitDescriptorSet(); | ||
| 27 | |||
| 28 | VkPipeline GetHandle() const { | ||
| 29 | return *pipeline; | ||
| 30 | } | ||
| 31 | |||
| 32 | VkPipelineLayout GetLayout() const { | ||
| 33 | return *layout; | ||
| 34 | } | ||
| 35 | |||
| 36 | const ShaderEntries& GetEntries() const { | ||
| 37 | return entries; | ||
| 38 | } | ||
| 39 | |||
| 40 | private: | ||
| 41 | vk::DescriptorSetLayout CreateDescriptorSetLayout() const; | ||
| 42 | |||
| 43 | vk::PipelineLayout CreatePipelineLayout() const; | ||
| 44 | |||
| 45 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; | ||
| 46 | |||
| 47 | vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; | ||
| 48 | |||
| 49 | vk::Pipeline CreatePipeline() const; | ||
| 50 | |||
| 51 | const Device& device; | ||
| 52 | VKScheduler& scheduler; | ||
| 53 | ShaderEntries entries; | ||
| 54 | |||
| 55 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 56 | DescriptorAllocator descriptor_allocator; | ||
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 58 | vk::PipelineLayout layout; | ||
| 59 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 60 | vk::ShaderModule shader_module; | ||
| 61 | vk::Pipeline pipeline; | ||
| 62 | }; | 21 | }; |
| 63 | 22 | ||
| 64 | } // namespace Vulkan | 23 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp deleted file mode 100644 index fc6dd83eb..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ /dev/null | |||
| @@ -1,484 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstring> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/microprofile.h" | ||
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 21 | |||
| 22 | namespace Vulkan { | ||
| 23 | |||
| 24 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||
| 25 | |||
| 26 | namespace { | ||
| 27 | |||
| 28 | template <class StencilFace> | ||
| 29 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { | ||
| 30 | return { | ||
| 31 | .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()), | ||
| 32 | .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()), | ||
| 33 | .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()), | ||
| 34 | .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()), | ||
| 35 | .compareMask = 0, | ||
| 36 | .writeMask = 0, | ||
| 37 | .reference = 0, | ||
| 38 | }; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { | ||
| 42 | static constexpr std::array unsupported_topologies = { | ||
| 43 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, | ||
| 44 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, | ||
| 45 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | ||
| 46 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, | ||
| 47 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, | ||
| 48 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; | ||
| 49 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), | ||
| 50 | topology) == std::end(unsupported_topologies); | ||
| 51 | } | ||
| 52 | |||
| 53 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | ||
| 54 | union Swizzle { | ||
| 55 | u32 raw; | ||
| 56 | BitField<0, 3, Maxwell::ViewportSwizzle> x; | ||
| 57 | BitField<4, 3, Maxwell::ViewportSwizzle> y; | ||
| 58 | BitField<8, 3, Maxwell::ViewportSwizzle> z; | ||
| 59 | BitField<12, 3, Maxwell::ViewportSwizzle> w; | ||
| 60 | }; | ||
| 61 | const Swizzle unpacked{swizzle}; | ||
| 62 | |||
| 63 | return { | ||
| 64 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), | ||
| 65 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), | ||
| 66 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), | ||
| 67 | .w = MaxwellToVK::ViewportSwizzle(unpacked.w), | ||
| 68 | }; | ||
| 69 | } | ||
| 70 | |||
| 71 | VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 72 | switch (msaa_mode) { | ||
| 73 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 74 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 75 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 76 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 77 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 78 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 79 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 80 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 81 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 82 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 83 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 84 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 85 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 86 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 87 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 88 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 89 | default: | ||
| 90 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 91 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | } // Anonymous namespace | ||
| 96 | |||
| 97 | VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | ||
| 98 | VKDescriptorPool& descriptor_pool_, | ||
| 99 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 100 | const GraphicsPipelineCacheKey& key, | ||
| 101 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||
| 102 | const SPIRVProgram& program, u32 num_color_buffers) | ||
| 103 | : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, | ||
| 104 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | ||
| 105 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | ||
| 106 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | ||
| 107 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, | ||
| 108 | modules(CreateShaderModules(program)), | ||
| 109 | pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} | ||
| 110 | |||
| 111 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | ||
| 112 | |||
| 113 | VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | ||
| 114 | if (!descriptor_template) { | ||
| 115 | return {}; | ||
| 116 | } | ||
| 117 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 118 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 119 | return set; | ||
| 120 | } | ||
| 121 | |||
| 122 | vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( | ||
| 123 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const { | ||
| 124 | const VkDescriptorSetLayoutCreateInfo ci{ | ||
| 125 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 126 | .pNext = nullptr, | ||
| 127 | .flags = 0, | ||
| 128 | .bindingCount = bindings.size(), | ||
| 129 | .pBindings = bindings.data(), | ||
| 130 | }; | ||
| 131 | return device.GetLogical().CreateDescriptorSetLayout(ci); | ||
| 132 | } | ||
| 133 | |||
| 134 | vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { | ||
| 135 | const VkPipelineLayoutCreateInfo ci{ | ||
| 136 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 137 | .pNext = nullptr, | ||
| 138 | .flags = 0, | ||
| 139 | .setLayoutCount = 1, | ||
| 140 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 141 | .pushConstantRangeCount = 0, | ||
| 142 | .pPushConstantRanges = nullptr, | ||
| 143 | }; | ||
| 144 | return device.GetLogical().CreatePipelineLayout(ci); | ||
| 145 | } | ||
| 146 | |||
| 147 | vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( | ||
| 148 | const SPIRVProgram& program) const { | ||
| 149 | std::vector<VkDescriptorUpdateTemplateEntry> template_entries; | ||
| 150 | u32 binding = 0; | ||
| 151 | u32 offset = 0; | ||
| 152 | for (const auto& stage : program) { | ||
| 153 | if (stage) { | ||
| 154 | FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); | ||
| 155 | } | ||
| 156 | } | ||
| 157 | if (template_entries.empty()) { | ||
| 158 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 159 | return {}; | ||
| 160 | } | ||
| 161 | |||
| 162 | const VkDescriptorUpdateTemplateCreateInfoKHR ci{ | ||
| 163 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 164 | .pNext = nullptr, | ||
| 165 | .flags = 0, | ||
| 166 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | ||
| 167 | .pDescriptorUpdateEntries = template_entries.data(), | ||
| 168 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 169 | .descriptorSetLayout = *descriptor_set_layout, | ||
| 170 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 171 | .pipelineLayout = *layout, | ||
| 172 | .set = DESCRIPTOR_SET, | ||
| 173 | }; | ||
| 174 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); | ||
| 175 | } | ||
| 176 | |||
| 177 | std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | ||
| 178 | const SPIRVProgram& program) const { | ||
| 179 | VkShaderModuleCreateInfo ci{ | ||
| 180 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | ||
| 181 | .pNext = nullptr, | ||
| 182 | .flags = 0, | ||
| 183 | .codeSize = 0, | ||
| 184 | .pCode = nullptr, | ||
| 185 | }; | ||
| 186 | |||
| 187 | std::vector<vk::ShaderModule> shader_modules; | ||
| 188 | shader_modules.reserve(Maxwell::MaxShaderStage); | ||
| 189 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { | ||
| 190 | const auto& stage = program[i]; | ||
| 191 | if (!stage) { | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | |||
| 195 | device.SaveShader(stage->code); | ||
| 196 | |||
| 197 | ci.codeSize = stage->code.size() * sizeof(u32); | ||
| 198 | ci.pCode = stage->code.data(); | ||
| 199 | shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); | ||
| 200 | } | ||
| 201 | return shader_modules; | ||
| 202 | } | ||
| 203 | |||
| 204 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | ||
| 205 | VkRenderPass renderpass, | ||
| 206 | u32 num_color_buffers) const { | ||
| 207 | const auto& state = cache_key.fixed_state; | ||
| 208 | const auto& viewport_swizzles = state.viewport_swizzles; | ||
| 209 | |||
| 210 | FixedPipelineState::DynamicState dynamic; | ||
| 211 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 212 | // Insert dummy values, as long as they are valid they don't matter as extended dynamic | ||
| 213 | // state is ignored | ||
| 214 | dynamic.raw1 = 0; | ||
| 215 | dynamic.raw2 = 0; | ||
| 216 | dynamic.vertex_strides.fill(0); | ||
| 217 | } else { | ||
| 218 | dynamic = state.dynamic_state; | ||
| 219 | } | ||
| 220 | |||
| 221 | std::vector<VkVertexInputBindingDescription> vertex_bindings; | ||
| 222 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | ||
| 223 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 224 | const bool instanced = state.binding_divisors[index] != 0; | ||
| 225 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 226 | vertex_bindings.push_back({ | ||
| 227 | .binding = static_cast<u32>(index), | ||
| 228 | .stride = dynamic.vertex_strides[index], | ||
| 229 | .inputRate = rate, | ||
| 230 | }); | ||
| 231 | if (instanced) { | ||
| 232 | vertex_binding_divisors.push_back({ | ||
| 233 | .binding = static_cast<u32>(index), | ||
| 234 | .divisor = state.binding_divisors[index], | ||
| 235 | }); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; | ||
| 240 | const auto& input_attributes = program[0]->entries.attributes; | ||
| 241 | for (std::size_t index = 0; index < state.attributes.size(); ++index) { | ||
| 242 | const auto& attribute = state.attributes[index]; | ||
| 243 | if (!attribute.enabled) { | ||
| 244 | continue; | ||
| 245 | } | ||
| 246 | if (!input_attributes.contains(static_cast<u32>(index))) { | ||
| 247 | // Skip attributes not used by the vertex shaders. | ||
| 248 | continue; | ||
| 249 | } | ||
| 250 | vertex_attributes.push_back({ | ||
| 251 | .location = static_cast<u32>(index), | ||
| 252 | .binding = attribute.buffer, | ||
| 253 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 254 | .offset = attribute.offset, | ||
| 255 | }); | ||
| 256 | } | ||
| 257 | |||
| 258 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ | ||
| 259 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | ||
| 260 | .pNext = nullptr, | ||
| 261 | .flags = 0, | ||
| 262 | .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()), | ||
| 263 | .pVertexBindingDescriptions = vertex_bindings.data(), | ||
| 264 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), | ||
| 265 | .pVertexAttributeDescriptions = vertex_attributes.data(), | ||
| 266 | }; | ||
| 267 | |||
| 268 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ | ||
| 269 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, | ||
| 270 | .pNext = nullptr, | ||
| 271 | .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()), | ||
| 272 | .pVertexBindingDivisors = vertex_binding_divisors.data(), | ||
| 273 | }; | ||
| 274 | if (!vertex_binding_divisors.empty()) { | ||
| 275 | vertex_input_ci.pNext = &input_divisor_ci; | ||
| 276 | } | ||
| 277 | |||
| 278 | const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); | ||
| 279 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ | ||
| 280 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | ||
| 281 | .pNext = nullptr, | ||
| 282 | .flags = 0, | ||
| 283 | .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), | ||
| 284 | .primitiveRestartEnable = state.primitive_restart_enable != 0 && | ||
| 285 | SupportsPrimitiveRestart(input_assembly_topology), | ||
| 286 | }; | ||
| 287 | |||
| 288 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ | ||
| 289 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, | ||
| 290 | .pNext = nullptr, | ||
| 291 | .flags = 0, | ||
| 292 | .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, | ||
| 293 | }; | ||
| 294 | |||
| 295 | VkPipelineViewportStateCreateInfo viewport_ci{ | ||
| 296 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 297 | .pNext = nullptr, | ||
| 298 | .flags = 0, | ||
| 299 | .viewportCount = Maxwell::NumViewports, | ||
| 300 | .pViewports = nullptr, | ||
| 301 | .scissorCount = Maxwell::NumViewports, | ||
| 302 | .pScissors = nullptr, | ||
| 303 | }; | ||
| 304 | |||
| 305 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | ||
| 306 | std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); | ||
| 307 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | ||
| 308 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | ||
| 309 | .pNext = nullptr, | ||
| 310 | .flags = 0, | ||
| 311 | .viewportCount = Maxwell::NumViewports, | ||
| 312 | .pViewportSwizzles = swizzles.data(), | ||
| 313 | }; | ||
| 314 | if (device.IsNvViewportSwizzleSupported()) { | ||
| 315 | viewport_ci.pNext = &swizzle_ci; | ||
| 316 | } | ||
| 317 | |||
| 318 | const VkPipelineRasterizationStateCreateInfo rasterization_ci{ | ||
| 319 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | ||
| 320 | .pNext = nullptr, | ||
| 321 | .flags = 0, | ||
| 322 | .depthClampEnable = | ||
| 323 | static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), | ||
| 324 | .rasterizerDiscardEnable = | ||
| 325 | static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), | ||
| 326 | .polygonMode = VK_POLYGON_MODE_FILL, | ||
| 327 | .cullMode = static_cast<VkCullModeFlags>( | ||
| 328 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), | ||
| 329 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), | ||
| 330 | .depthBiasEnable = state.depth_bias_enable, | ||
| 331 | .depthBiasConstantFactor = 0.0f, | ||
| 332 | .depthBiasClamp = 0.0f, | ||
| 333 | .depthBiasSlopeFactor = 0.0f, | ||
| 334 | .lineWidth = 1.0f, | ||
| 335 | }; | ||
| 336 | |||
| 337 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ | ||
| 338 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | ||
| 339 | .pNext = nullptr, | ||
| 340 | .flags = 0, | ||
| 341 | .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), | ||
| 342 | .sampleShadingEnable = VK_FALSE, | ||
| 343 | .minSampleShading = 0.0f, | ||
| 344 | .pSampleMask = nullptr, | ||
| 345 | .alphaToCoverageEnable = VK_FALSE, | ||
| 346 | .alphaToOneEnable = VK_FALSE, | ||
| 347 | }; | ||
| 348 | |||
| 349 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ | ||
| 350 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | ||
| 351 | .pNext = nullptr, | ||
| 352 | .flags = 0, | ||
| 353 | .depthTestEnable = dynamic.depth_test_enable, | ||
| 354 | .depthWriteEnable = dynamic.depth_write_enable, | ||
| 355 | .depthCompareOp = dynamic.depth_test_enable | ||
| 356 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) | ||
| 357 | : VK_COMPARE_OP_ALWAYS, | ||
| 358 | .depthBoundsTestEnable = dynamic.depth_bounds_enable, | ||
| 359 | .stencilTestEnable = dynamic.stencil_enable, | ||
| 360 | .front = GetStencilFaceState(dynamic.front), | ||
| 361 | .back = GetStencilFaceState(dynamic.back), | ||
| 362 | .minDepthBounds = 0.0f, | ||
| 363 | .maxDepthBounds = 0.0f, | ||
| 364 | }; | ||
| 365 | |||
| 366 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | ||
| 367 | for (std::size_t index = 0; index < num_color_buffers; ++index) { | ||
| 368 | static constexpr std::array COMPONENT_TABLE{ | ||
| 369 | VK_COLOR_COMPONENT_R_BIT, | ||
| 370 | VK_COLOR_COMPONENT_G_BIT, | ||
| 371 | VK_COLOR_COMPONENT_B_BIT, | ||
| 372 | VK_COLOR_COMPONENT_A_BIT, | ||
| 373 | }; | ||
| 374 | const auto& blend = state.attachments[index]; | ||
| 375 | |||
| 376 | VkColorComponentFlags color_components = 0; | ||
| 377 | for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { | ||
| 378 | if (blend.Mask()[i]) { | ||
| 379 | color_components |= COMPONENT_TABLE[i]; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | cb_attachments[index] = { | ||
| 384 | .blendEnable = blend.enable != 0, | ||
| 385 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), | ||
| 386 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), | ||
| 387 | .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()), | ||
| 388 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), | ||
| 389 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), | ||
| 390 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), | ||
| 391 | .colorWriteMask = color_components, | ||
| 392 | }; | ||
| 393 | } | ||
| 394 | |||
| 395 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ | ||
| 396 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | ||
| 397 | .pNext = nullptr, | ||
| 398 | .flags = 0, | ||
| 399 | .logicOpEnable = VK_FALSE, | ||
| 400 | .logicOp = VK_LOGIC_OP_COPY, | ||
| 401 | .attachmentCount = num_color_buffers, | ||
| 402 | .pAttachments = cb_attachments.data(), | ||
| 403 | .blendConstants = {}, | ||
| 404 | }; | ||
| 405 | |||
| 406 | std::vector dynamic_states{ | ||
| 407 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, | ||
| 408 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, | ||
| 409 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, | ||
| 410 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, | ||
| 411 | }; | ||
| 412 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 413 | static constexpr std::array extended{ | ||
| 414 | VK_DYNAMIC_STATE_CULL_MODE_EXT, | ||
| 415 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, | ||
| 416 | VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, | ||
| 417 | VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, | ||
| 418 | VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, | ||
| 419 | VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, | ||
| 420 | VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT, | ||
| 421 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, | ||
| 422 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, | ||
| 423 | }; | ||
| 424 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); | ||
| 425 | } | ||
| 426 | |||
| 427 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ | ||
| 428 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | ||
| 429 | .pNext = nullptr, | ||
| 430 | .flags = 0, | ||
| 431 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), | ||
| 432 | .pDynamicStates = dynamic_states.data(), | ||
| 433 | }; | ||
| 434 | |||
| 435 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 436 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||
| 437 | .pNext = nullptr, | ||
| 438 | .requiredSubgroupSize = GuestWarpSize, | ||
| 439 | }; | ||
| 440 | |||
| 441 | std::vector<VkPipelineShaderStageCreateInfo> shader_stages; | ||
| 442 | std::size_t module_index = 0; | ||
| 443 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 444 | if (!program[stage]) { | ||
| 445 | continue; | ||
| 446 | } | ||
| 447 | |||
| 448 | VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); | ||
| 449 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; | ||
| 450 | stage_ci.pNext = nullptr; | ||
| 451 | stage_ci.flags = 0; | ||
| 452 | stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); | ||
| 453 | stage_ci.module = *modules[module_index++]; | ||
| 454 | stage_ci.pName = "main"; | ||
| 455 | stage_ci.pSpecializationInfo = nullptr; | ||
| 456 | |||
| 457 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { | ||
| 458 | stage_ci.pNext = &subgroup_size_ci; | ||
| 459 | } | ||
| 460 | } | ||
| 461 | return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ | ||
| 462 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | ||
| 463 | .pNext = nullptr, | ||
| 464 | .flags = 0, | ||
| 465 | .stageCount = static_cast<u32>(shader_stages.size()), | ||
| 466 | .pStages = shader_stages.data(), | ||
| 467 | .pVertexInputState = &vertex_input_ci, | ||
| 468 | .pInputAssemblyState = &input_assembly_ci, | ||
| 469 | .pTessellationState = &tessellation_ci, | ||
| 470 | .pViewportState = &viewport_ci, | ||
| 471 | .pRasterizationState = &rasterization_ci, | ||
| 472 | .pMultisampleState = &multisample_ci, | ||
| 473 | .pDepthStencilState = &depth_stencil_ci, | ||
| 474 | .pColorBlendState = &color_blend_ci, | ||
| 475 | .pDynamicState = &dynamic_state_ci, | ||
| 476 | .layout = *layout, | ||
| 477 | .renderPass = renderpass, | ||
| 478 | .subpass = 0, | ||
| 479 | .basePipelineHandle = nullptr, | ||
| 480 | .basePipelineIndex = 0, | ||
| 481 | }); | ||
| 482 | } | ||
| 483 | |||
| 484 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h deleted file mode 100644 index 8b6a98fe0..000000000 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 17 | |||
| 18 | namespace Vulkan { | ||
| 19 | |||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 21 | |||
| 22 | struct GraphicsPipelineCacheKey { | ||
| 23 | VkRenderPass renderpass; | ||
| 24 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | ||
| 25 | FixedPipelineState fixed_state; | ||
| 26 | |||
| 27 | std::size_t Hash() const noexcept; | ||
| 28 | |||
| 29 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; | ||
| 30 | |||
| 31 | bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 32 | return !operator==(rhs); | ||
| 33 | } | ||
| 34 | |||
| 35 | std::size_t Size() const noexcept { | ||
| 36 | return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); | ||
| 37 | } | ||
| 38 | }; | ||
| 39 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | ||
| 40 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | ||
| 41 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | ||
| 42 | |||
| 43 | class Device; | ||
| 44 | class VKDescriptorPool; | ||
| 45 | class VKScheduler; | ||
| 46 | class VKUpdateDescriptorQueue; | ||
| 47 | |||
| 48 | using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; | ||
| 49 | |||
| 50 | class VKGraphicsPipeline final { | ||
| 51 | public: | ||
| 52 | explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | ||
| 53 | VKDescriptorPool& descriptor_pool, | ||
| 54 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 55 | const GraphicsPipelineCacheKey& key, | ||
| 56 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||
| 57 | const SPIRVProgram& program, u32 num_color_buffers); | ||
| 58 | ~VKGraphicsPipeline(); | ||
| 59 | |||
| 60 | VkDescriptorSet CommitDescriptorSet(); | ||
| 61 | |||
| 62 | VkPipeline GetHandle() const { | ||
| 63 | return *pipeline; | ||
| 64 | } | ||
| 65 | |||
| 66 | VkPipelineLayout GetLayout() const { | ||
| 67 | return *layout; | ||
| 68 | } | ||
| 69 | |||
| 70 | GraphicsPipelineCacheKey GetCacheKey() const { | ||
| 71 | return cache_key; | ||
| 72 | } | ||
| 73 | |||
| 74 | private: | ||
| 75 | vk::DescriptorSetLayout CreateDescriptorSetLayout( | ||
| 76 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const; | ||
| 77 | |||
| 78 | vk::PipelineLayout CreatePipelineLayout() const; | ||
| 79 | |||
| 80 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | ||
| 81 | const SPIRVProgram& program) const; | ||
| 82 | |||
| 83 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | ||
| 84 | |||
| 85 | vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, | ||
| 86 | u32 num_color_buffers) const; | ||
| 87 | |||
| 88 | const Device& device; | ||
| 89 | VKScheduler& scheduler; | ||
| 90 | const GraphicsPipelineCacheKey cache_key; | ||
| 91 | const u64 hash; | ||
| 92 | |||
| 93 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 94 | DescriptorAllocator descriptor_allocator; | ||
| 95 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 96 | vk::PipelineLayout layout; | ||
| 97 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 98 | std::vector<vk::ShaderModule> modules; | ||
| 99 | |||
| 100 | vk::Pipeline pipeline; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..7d0ba1180 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -19,49 +19,27 @@ | |||
| 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 23 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 24 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 25 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 27 | #include "video_core/shader/compiler_settings.h" | ||
| 28 | #include "video_core/shader/memory_util.h" | ||
| 29 | #include "video_core/shader_cache.h" | 26 | #include "video_core/shader_cache.h" |
| 30 | #include "video_core/shader_notify.h" | 27 | #include "video_core/shader_notify.h" |
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | 28 | #include "video_core/vulkan_common/vulkan_device.h" |
| 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 29 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 33 | 30 | ||
| 34 | namespace Vulkan { | 31 | namespace Vulkan { |
| 35 | |||
| 36 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 32 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 37 | 33 | ||
| 38 | using Tegra::Engines::ShaderType; | 34 | using Tegra::Engines::ShaderType; |
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 42 | using VideoCommon::Shader::ProgramCode; | ||
| 43 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 44 | 35 | ||
| 45 | namespace { | 36 | namespace { |
| 46 | 37 | size_t StageFromProgram(size_t program) { | |
| 47 | constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; | ||
| 48 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 49 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; | ||
| 50 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | ||
| 51 | constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; | ||
| 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | ||
| 53 | |||
| 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | ||
| 55 | .depth = VideoCommon::Shader::CompileDepth::FullDecompile, | ||
| 56 | .disable_else_derivation = true, | ||
| 57 | }; | ||
| 58 | |||
| 59 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | ||
| 60 | return program == 0 ? 0 : program - 1; | 38 | return program == 0 ? 0 : program - 1; |
| 61 | } | 39 | } |
| 62 | 40 | ||
| 63 | constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | 41 | ShaderType StageFromProgram(Maxwell::ShaderProgram program) { |
| 64 | return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); | 42 | return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program))); |
| 65 | } | 43 | } |
| 66 | 44 | ||
| 67 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | 45 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { |
| @@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { | |||
| 81 | return ShaderType::Vertex; | 59 | return ShaderType::Vertex; |
| 82 | } | 60 | } |
| 83 | } | 61 | } |
| 84 | |||
| 85 | template <VkDescriptorType descriptor_type, class Container> | ||
| 86 | void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, | ||
| 87 | VkShaderStageFlags stage_flags, const Container& container) { | ||
| 88 | const u32 num_entries = static_cast<u32>(std::size(container)); | ||
| 89 | for (std::size_t i = 0; i < num_entries; ++i) { | ||
| 90 | u32 count = 1; | ||
| 91 | if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { | ||
| 92 | // Combined image samplers can be arrayed. | ||
| 93 | count = container[i].size; | ||
| 94 | } | ||
| 95 | bindings.push_back({ | ||
| 96 | .binding = binding++, | ||
| 97 | .descriptorType = descriptor_type, | ||
| 98 | .descriptorCount = count, | ||
| 99 | .stageFlags = stage_flags, | ||
| 100 | .pImmutableSamplers = nullptr, | ||
| 101 | }); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | u32 FillDescriptorLayout(const ShaderEntries& entries, | ||
| 106 | std::vector<VkDescriptorSetLayoutBinding>& bindings, | ||
| 107 | Maxwell::ShaderProgram program_type, u32 base_binding) { | ||
| 108 | const ShaderType stage = GetStageFromProgram(program_type); | ||
| 109 | const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); | ||
| 110 | |||
| 111 | u32 binding = base_binding; | ||
| 112 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); | ||
| 113 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); | ||
| 114 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); | ||
| 115 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); | ||
| 116 | AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); | ||
| 117 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); | ||
| 118 | return binding; | ||
| 119 | } | ||
| 120 | |||
| 121 | } // Anonymous namespace | 62 | } // Anonymous namespace |
| 122 | 63 | ||
| 123 | std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { | 64 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| 124 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); | ||
| 125 | return static_cast<std::size_t>(hash); | ||
| 126 | } | ||
| 127 | |||
| 128 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||
| 129 | return std::memcmp(&rhs, this, Size()) == 0; | ||
| 130 | } | ||
| 131 | |||
| 132 | std::size_t ComputePipelineCacheKey::Hash() const noexcept { | ||
| 133 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 65 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 134 | return static_cast<std::size_t>(hash); | 66 | return static_cast<size_t>(hash); |
| 135 | } | 67 | } |
| 136 | 68 | ||
| 137 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | 69 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { |
| 138 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 70 | return std::memcmp(&rhs, this, sizeof *this) == 0; |
| 139 | } | 71 | } |
| 140 | 72 | ||
| 141 | Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, | 73 | Shader::Shader() = default; |
| 142 | GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) | ||
| 143 | : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), | ||
| 144 | shader_ir(program_code, main_offset_, compiler_settings, registry), | ||
| 145 | entries(GenerateShaderEntries(shader_ir)) {} | ||
| 146 | 74 | ||
| 147 | Shader::~Shader() = default; | 75 | Shader::~Shader() = default; |
| 148 | 76 | ||
| 149 | VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 77 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, |
| 150 | Tegra::Engines::Maxwell3D& maxwell3d_, | 78 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 151 | Tegra::Engines::KeplerCompute& kepler_compute_, | 79 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 152 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 80 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 153 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | 81 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, |
| 154 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 82 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 155 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | 83 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, |
| 156 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | 84 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, |
| 157 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ | 85 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ |
| 158 | update_descriptor_queue_} {} | 86 | update_descriptor_queue_} {} |
| 159 | 87 | ||
| 160 | VKPipelineCache::~VKPipelineCache() = default; | 88 | PipelineCache::~PipelineCache() = default; |
| 161 | |||
| 162 | std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ||
| 163 | std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; | ||
| 164 | |||
| 165 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 166 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 167 | |||
| 168 | // Skip stages that are not enabled | ||
| 169 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 170 | continue; | ||
| 171 | } | ||
| 172 | |||
| 173 | const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; | ||
| 174 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 175 | ASSERT(cpu_addr); | ||
| 176 | |||
| 177 | Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 178 | if (!result) { | ||
| 179 | const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; | ||
| 180 | |||
| 181 | // No shader found - create a new one | ||
| 182 | static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; | ||
| 183 | const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); | ||
| 184 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); | ||
| 185 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 186 | |||
| 187 | auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, | ||
| 188 | std::move(code), stage_offset); | ||
| 189 | result = shader.get(); | ||
| 190 | |||
| 191 | if (cpu_addr) { | ||
| 192 | Register(std::move(shader), *cpu_addr, size_in_bytes); | ||
| 193 | } else { | ||
| 194 | null_shader = std::move(shader); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | shaders[index] = result; | ||
| 198 | } | ||
| 199 | return last_shaders = shaders; | ||
| 200 | } | ||
| 201 | |||
| 202 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | ||
| 203 | const GraphicsPipelineCacheKey& key, u32 num_color_buffers, | ||
| 204 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 205 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 206 | |||
| 207 | if (last_graphics_pipeline && last_graphics_key == key) { | ||
| 208 | return last_graphics_pipeline; | ||
| 209 | } | ||
| 210 | last_graphics_key = key; | ||
| 211 | |||
| 212 | if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { | ||
| 213 | std::unique_lock lock{pipeline_cache}; | ||
| 214 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 215 | if (is_cache_miss) { | ||
| 216 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 217 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 218 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||
| 219 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | ||
| 220 | update_descriptor_queue, bindings, program, key, | ||
| 221 | num_color_buffers); | ||
| 222 | } | ||
| 223 | last_graphics_pipeline = pair->second.get(); | ||
| 224 | return last_graphics_pipeline; | ||
| 225 | } | ||
| 226 | |||
| 227 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 228 | auto& entry = pair->second; | ||
| 229 | if (is_cache_miss) { | ||
| 230 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 231 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 232 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||
| 233 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | ||
| 234 | update_descriptor_queue, key, bindings, | ||
| 235 | program, num_color_buffers); | ||
| 236 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 237 | } | ||
| 238 | last_graphics_pipeline = entry.get(); | ||
| 239 | return last_graphics_pipeline; | ||
| 240 | } | ||
| 241 | 89 | ||
| 242 | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | 90 | ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { |
| 243 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 91 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 244 | 92 | ||
| 245 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | 93 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); |
| @@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 248 | return *entry; | 96 | return *entry; |
| 249 | } | 97 | } |
| 250 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 98 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |
| 251 | 99 | throw "Bad"; | |
| 252 | const GPUVAddr gpu_addr = key.shader; | ||
| 253 | |||
| 254 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 255 | ASSERT(cpu_addr); | ||
| 256 | |||
| 257 | Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); | ||
| 258 | if (!shader) { | ||
| 259 | // No shader found - create a new one | ||
| 260 | const auto host_ptr = gpu_memory.GetPointer(gpu_addr); | ||
| 261 | |||
| 262 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); | ||
| 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 264 | |||
| 265 | auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, | ||
| 266 | *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); | ||
| 267 | shader = shader_info.get(); | ||
| 268 | |||
| 269 | if (cpu_addr) { | ||
| 270 | Register(std::move(shader_info), *cpu_addr, size_in_bytes); | ||
| 271 | } else { | ||
| 272 | null_kernel = std::move(shader_info); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | const Specialization specialization{ | ||
| 277 | .base_binding = 0, | ||
| 278 | .workgroup_size = key.workgroup_size, | ||
| 279 | .shared_memory_size = key.shared_memory_size, | ||
| 280 | .point_size = std::nullopt, | ||
| 281 | .enabled_attributes = {}, | ||
| 282 | .attribute_types = {}, | ||
| 283 | .ndc_minus_one_to_one = false, | ||
| 284 | }; | ||
| 285 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, | ||
| 286 | shader->GetRegistry(), specialization), | ||
| 287 | shader->GetEntries()}; | ||
| 288 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | ||
| 289 | update_descriptor_queue, spirv_shader); | ||
| 290 | return *entry; | ||
| 291 | } | ||
| 292 | |||
| 293 | void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { | ||
| 294 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 295 | std::unique_lock lock{pipeline_cache}; | ||
| 296 | graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); | ||
| 297 | } | ||
| 298 | |||
| 299 | void VKPipelineCache::OnShaderRemoval(Shader* shader) { | ||
| 300 | bool finished = false; | ||
| 301 | const auto Finish = [&] { | ||
| 302 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | ||
| 303 | // flush. | ||
| 304 | if (finished) { | ||
| 305 | return; | ||
| 306 | } | ||
| 307 | finished = true; | ||
| 308 | scheduler.Finish(); | ||
| 309 | }; | ||
| 310 | |||
| 311 | const GPUVAddr invalidated_addr = shader->GetGpuAddr(); | ||
| 312 | for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { | ||
| 313 | auto& entry = it->first; | ||
| 314 | if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == | ||
| 315 | entry.shaders.end()) { | ||
| 316 | ++it; | ||
| 317 | continue; | ||
| 318 | } | ||
| 319 | Finish(); | ||
| 320 | it = graphics_cache.erase(it); | ||
| 321 | } | ||
| 322 | for (auto it = compute_cache.begin(); it != compute_cache.end();) { | ||
| 323 | auto& entry = it->first; | ||
| 324 | if (entry.shader != invalidated_addr) { | ||
| 325 | ++it; | ||
| 326 | continue; | ||
| 327 | } | ||
| 328 | Finish(); | ||
| 329 | it = compute_cache.erase(it); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | |||
| 333 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | ||
| 334 | VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { | ||
| 335 | Specialization specialization; | ||
| 336 | if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { | ||
| 337 | float point_size; | ||
| 338 | std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); | ||
| 339 | specialization.point_size = point_size; | ||
| 340 | ASSERT(point_size != 0.0f); | ||
| 341 | } | ||
| 342 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | ||
| 343 | const auto& attribute = fixed_state.attributes[i]; | ||
| 344 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 345 | specialization.attribute_types[i] = attribute.Type(); | ||
| 346 | } | ||
| 347 | specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; | ||
| 348 | specialization.early_fragment_tests = fixed_state.early_z; | ||
| 349 | |||
| 350 | // Alpha test | ||
| 351 | specialization.alpha_test_func = | ||
| 352 | FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); | ||
| 353 | specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); | ||
| 354 | |||
| 355 | SPIRVProgram program; | ||
| 356 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 357 | |||
| 358 | for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 359 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | ||
| 360 | // Skip stages that are not enabled | ||
| 361 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 362 | continue; | ||
| 363 | } | ||
| 364 | const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); | ||
| 365 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 366 | Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 367 | |||
| 368 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | ||
| 369 | const ShaderType program_type = GetShaderType(program_enum); | ||
| 370 | const auto& entries = shader->GetEntries(); | ||
| 371 | program[stage] = { | ||
| 372 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), | ||
| 373 | entries, | ||
| 374 | }; | ||
| 375 | |||
| 376 | const u32 old_binding = specialization.base_binding; | ||
| 377 | specialization.base_binding = | ||
| 378 | FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); | ||
| 379 | ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); | ||
| 380 | } | ||
| 381 | return {std::move(program), std::move(bindings)}; | ||
| 382 | } | 100 | } |
| 383 | 101 | ||
| 384 | template <VkDescriptorType descriptor_type, class Container> | 102 | void PipelineCache::OnShaderRemoval(Shader*) {} |
| 385 | void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, | ||
| 386 | u32& offset, const Container& container) { | ||
| 387 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | ||
| 388 | const u32 count = static_cast<u32>(std::size(container)); | ||
| 389 | |||
| 390 | if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { | ||
| 391 | for (u32 i = 0; i < count; ++i) { | ||
| 392 | const u32 num_samplers = container[i].size; | ||
| 393 | template_entries.push_back({ | ||
| 394 | .dstBinding = binding, | ||
| 395 | .dstArrayElement = 0, | ||
| 396 | .descriptorCount = num_samplers, | ||
| 397 | .descriptorType = descriptor_type, | ||
| 398 | .offset = offset, | ||
| 399 | .stride = entry_size, | ||
| 400 | }); | ||
| 401 | |||
| 402 | ++binding; | ||
| 403 | offset += num_samplers * entry_size; | ||
| 404 | } | ||
| 405 | return; | ||
| 406 | } | ||
| 407 | |||
| 408 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || | ||
| 409 | descriptor_type == STORAGE_TEXEL_BUFFER) { | ||
| 410 | // Nvidia has a bug where updating multiple texels at once causes the driver to crash. | ||
| 411 | // Note: Fixed in driver Windows 443.24, Linux 440.66.15 | ||
| 412 | for (u32 i = 0; i < count; ++i) { | ||
| 413 | template_entries.push_back({ | ||
| 414 | .dstBinding = binding + i, | ||
| 415 | .dstArrayElement = 0, | ||
| 416 | .descriptorCount = 1, | ||
| 417 | .descriptorType = descriptor_type, | ||
| 418 | .offset = static_cast<std::size_t>(offset + i * entry_size), | ||
| 419 | .stride = entry_size, | ||
| 420 | }); | ||
| 421 | } | ||
| 422 | } else if (count > 0) { | ||
| 423 | template_entries.push_back({ | ||
| 424 | .dstBinding = binding, | ||
| 425 | .dstArrayElement = 0, | ||
| 426 | .descriptorCount = count, | ||
| 427 | .descriptorType = descriptor_type, | ||
| 428 | .offset = offset, | ||
| 429 | .stride = entry_size, | ||
| 430 | }); | ||
| 431 | } | ||
| 432 | offset += count * entry_size; | ||
| 433 | binding += count; | ||
| 434 | } | ||
| 435 | |||
| 436 | void FillDescriptorUpdateTemplateEntries( | ||
| 437 | const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 438 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { | ||
| 439 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); | ||
| 440 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); | ||
| 441 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); | ||
| 442 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); | ||
| 443 | AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); | ||
| 444 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); | ||
| 445 | } | ||
| 446 | 103 | ||
| 447 | } // namespace Vulkan | 104 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..e3e63340d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -15,15 +15,8 @@ | |||
| 15 | #include <boost/functional/hash.hpp> | 15 | #include <boost/functional/hash.hpp> |
| 16 | 16 | ||
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 23 | #include "video_core/shader/async_shaders.h" | ||
| 24 | #include "video_core/shader/memory_util.h" | ||
| 25 | #include "video_core/shader/registry.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader_cache.h" | 20 | #include "video_core/shader_cache.h" |
| 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 29 | 22 | ||
| @@ -35,7 +28,7 @@ namespace Vulkan { | |||
| 35 | 28 | ||
| 36 | class Device; | 29 | class Device; |
| 37 | class RasterizerVulkan; | 30 | class RasterizerVulkan; |
| 38 | class VKComputePipeline; | 31 | class ComputePipeline; |
| 39 | class VKDescriptorPool; | 32 | class VKDescriptorPool; |
| 40 | class VKScheduler; | 33 | class VKScheduler; |
| 41 | class VKUpdateDescriptorQueue; | 34 | class VKUpdateDescriptorQueue; |
| @@ -47,7 +40,7 @@ struct ComputePipelineCacheKey { | |||
| 47 | u32 shared_memory_size; | 40 | u32 shared_memory_size; |
| 48 | std::array<u32, 3> workgroup_size; | 41 | std::array<u32, 3> workgroup_size; |
| 49 | 42 | ||
| 50 | std::size_t Hash() const noexcept; | 43 | size_t Hash() const noexcept; |
| 51 | 44 | ||
| 52 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; | 45 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; |
| 53 | 46 | ||
| @@ -64,15 +57,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | |||
| 64 | namespace std { | 57 | namespace std { |
| 65 | 58 | ||
| 66 | template <> | 59 | template <> |
| 67 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 68 | std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 69 | return k.Hash(); | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | |||
| 73 | template <> | ||
| 74 | struct hash<Vulkan::ComputePipelineCacheKey> { | 60 | struct hash<Vulkan::ComputePipelineCacheKey> { |
| 75 | std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { | 61 | size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { |
| 76 | return k.Hash(); | 62 | return k.Hash(); |
| 77 | } | 63 | } |
| 78 | }; | 64 | }; |
| @@ -83,66 +69,26 @@ namespace Vulkan { | |||
| 83 | 69 | ||
| 84 | class Shader { | 70 | class Shader { |
| 85 | public: | 71 | public: |
| 86 | explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, | 72 | explicit Shader(); |
| 87 | Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, | ||
| 88 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); | ||
| 89 | ~Shader(); | 73 | ~Shader(); |
| 90 | |||
| 91 | GPUVAddr GetGpuAddr() const { | ||
| 92 | return gpu_addr; | ||
| 93 | } | ||
| 94 | |||
| 95 | VideoCommon::Shader::ShaderIR& GetIR() { | ||
| 96 | return shader_ir; | ||
| 97 | } | ||
| 98 | |||
| 99 | const VideoCommon::Shader::ShaderIR& GetIR() const { | ||
| 100 | return shader_ir; | ||
| 101 | } | ||
| 102 | |||
| 103 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 104 | return registry; | ||
| 105 | } | ||
| 106 | |||
| 107 | const ShaderEntries& GetEntries() const { | ||
| 108 | return entries; | ||
| 109 | } | ||
| 110 | |||
| 111 | private: | ||
| 112 | GPUVAddr gpu_addr{}; | ||
| 113 | VideoCommon::Shader::ProgramCode program_code; | ||
| 114 | VideoCommon::Shader::Registry registry; | ||
| 115 | VideoCommon::Shader::ShaderIR shader_ir; | ||
| 116 | ShaderEntries entries; | ||
| 117 | }; | 74 | }; |
| 118 | 75 | ||
| 119 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | 76 | class PipelineCache final : public VideoCommon::ShaderCache<Shader> { |
| 120 | public: | 77 | public: |
| 121 | explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | 78 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, |
| 122 | Tegra::Engines::Maxwell3D& maxwell3d, | 79 | Tegra::Engines::Maxwell3D& maxwell3d, |
| 123 | Tegra::Engines::KeplerCompute& kepler_compute, | 80 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 124 | Tegra::MemoryManager& gpu_memory, const Device& device, | 81 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 125 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, | 82 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, |
| 126 | VKUpdateDescriptorQueue& update_descriptor_queue); | 83 | VKUpdateDescriptorQueue& update_descriptor_queue); |
| 127 | ~VKPipelineCache() override; | 84 | ~PipelineCache() override; |
| 128 | |||
| 129 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | ||
| 130 | 85 | ||
| 131 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | 86 | ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |
| 132 | u32 num_color_buffers, | ||
| 133 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 134 | |||
| 135 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | ||
| 136 | |||
| 137 | void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); | ||
| 138 | 87 | ||
| 139 | protected: | 88 | protected: |
| 140 | void OnShaderRemoval(Shader* shader) final; | 89 | void OnShaderRemoval(Shader* shader) final; |
| 141 | 90 | ||
| 142 | private: | 91 | private: |
| 143 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | ||
| 144 | const FixedPipelineState& fixed_state); | ||
| 145 | |||
| 146 | Tegra::GPU& gpu; | 92 | Tegra::GPU& gpu; |
| 147 | Tegra::Engines::Maxwell3D& maxwell3d; | 93 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 148 | Tegra::Engines::KeplerCompute& kepler_compute; | 94 | Tegra::Engines::KeplerCompute& kepler_compute; |
| @@ -158,17 +104,8 @@ private: | |||
| 158 | 104 | ||
| 159 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | 105 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |
| 160 | 106 | ||
| 161 | GraphicsPipelineCacheKey last_graphics_key; | ||
| 162 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||
| 163 | |||
| 164 | std::mutex pipeline_cache; | 107 | std::mutex pipeline_cache; |
| 165 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | 108 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 166 | graphics_cache; | ||
| 167 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||
| 168 | }; | 109 | }; |
| 169 | 110 | ||
| 170 | void FillDescriptorUpdateTemplateEntries( | ||
| 171 | const ShaderEntries& entries, u32& binding, u32& offset, | ||
| 172 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); | ||
| 173 | |||
| 174 | } // namespace Vulkan | 111 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..f152297d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 29 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 28 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| @@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | |||
| 97 | return scissor; | 96 | return scissor; |
| 98 | } | 97 | } |
| 99 | 98 | ||
| 100 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 101 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | ||
| 102 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 103 | for (size_t i = 0; i < std::size(addresses); ++i) { | ||
| 104 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 105 | } | ||
| 106 | return addresses; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct TextureHandle { | 99 | struct TextureHandle { |
| 110 | constexpr TextureHandle(u32 data, bool via_header_index) { | 100 | constexpr TextureHandle(u32 data, bool via_header_index) { |
| 111 | const Tegra::Texture::TextureHandle handle{data}; | 101 | const Tegra::Texture::TextureHandle handle{data}; |
| @@ -117,98 +107,6 @@ struct TextureHandle { | |||
| 117 | u32 sampler; | 107 | u32 sampler; |
| 118 | }; | 108 | }; |
| 119 | 109 | ||
| 120 | template <typename Engine, typename Entry> | ||
| 121 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 122 | size_t stage, size_t index = 0) { | ||
| 123 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 124 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 125 | if (entry.is_separated) { | ||
| 126 | const u32 buffer_1 = entry.buffer; | ||
| 127 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 128 | const u32 offset_1 = entry.offset; | ||
| 129 | const u32 offset_2 = entry.secondary_offset; | ||
| 130 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 131 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 132 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | if (entry.is_bindless) { | ||
| 136 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 137 | return TextureHandle(raw, via_header_index); | ||
| 138 | } | ||
| 139 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 140 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 145 | if (entry.is_buffer) { | ||
| 146 | return ImageViewType::e2D; | ||
| 147 | } | ||
| 148 | switch (entry.type) { | ||
| 149 | case Tegra::Shader::TextureType::Texture1D: | ||
| 150 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 151 | case Tegra::Shader::TextureType::Texture2D: | ||
| 152 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 153 | case Tegra::Shader::TextureType::Texture3D: | ||
| 154 | return ImageViewType::e3D; | ||
| 155 | case Tegra::Shader::TextureType::TextureCube: | ||
| 156 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 157 | } | ||
| 158 | UNREACHABLE(); | ||
| 159 | return ImageViewType::e2D; | ||
| 160 | } | ||
| 161 | |||
| 162 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 163 | switch (entry.type) { | ||
| 164 | case Tegra::Shader::ImageType::Texture1D: | ||
| 165 | return ImageViewType::e1D; | ||
| 166 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 167 | return ImageViewType::e1DArray; | ||
| 168 | case Tegra::Shader::ImageType::Texture2D: | ||
| 169 | return ImageViewType::e2D; | ||
| 170 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 171 | return ImageViewType::e2DArray; | ||
| 172 | case Tegra::Shader::ImageType::Texture3D: | ||
| 173 | return ImageViewType::e3D; | ||
| 174 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 175 | return ImageViewType::Buffer; | ||
| 176 | } | ||
| 177 | UNREACHABLE(); | ||
| 178 | return ImageViewType::e2D; | ||
| 179 | } | ||
| 180 | |||
| 181 | void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, | ||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 183 | ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { | ||
| 184 | for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { | ||
| 185 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 186 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 187 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 188 | } | ||
| 189 | for (const auto& entry : entries.samplers) { | ||
| 190 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 191 | const VkSampler sampler = *sampler_ptr++; | ||
| 192 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 193 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 194 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 195 | update_descriptor_queue.AddSampledImage(handle, sampler); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | for ([[maybe_unused]] const auto& entry : entries.storage_texels) { | ||
| 199 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 200 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 201 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 202 | } | ||
| 203 | for (const auto& entry : entries.images) { | ||
| 204 | // TODO: Mark as modified | ||
| 205 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 206 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 207 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 208 | update_descriptor_queue.AddImage(handle); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | 110 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 213 | bool is_indexed) { | 111 | bool is_indexed) { |
| 214 | DrawParams params{ | 112 | DrawParams params{ |
| @@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 253 | descriptor_pool, update_descriptor_queue), | 151 | descriptor_pool, update_descriptor_queue), |
| 254 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | 152 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, |
| 255 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 153 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 256 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 154 | wfi_event(device.GetLogical().CreateEvent()) { |
| 257 | scheduler.SetQueryCache(query_cache); | 155 | scheduler.SetQueryCache(query_cache); |
| 258 | if (device.UseAsynchronousShaders()) { | ||
| 259 | async_shaders.AllocateWorkers(); | ||
| 260 | } | ||
| 261 | } | 156 | } |
| 262 | 157 | ||
| 263 | RasterizerVulkan::~RasterizerVulkan() = default; | 158 | RasterizerVulkan::~RasterizerVulkan() = default; |
| 264 | 159 | ||
| 265 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | 160 | void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { |
| 266 | MICROPROFILE_SCOPE(Vulkan_Drawing); | 161 | UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced); |
| 267 | |||
| 268 | SCOPE_EXIT({ gpu.TickWork(); }); | ||
| 269 | FlushWork(); | ||
| 270 | |||
| 271 | query_cache.UpdateCounters(); | ||
| 272 | |||
| 273 | graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); | ||
| 274 | |||
| 275 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 276 | |||
| 277 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 278 | texture_cache.UpdateRenderTargets(false); | ||
| 279 | |||
| 280 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 281 | graphics_key.shaders = GetShaderAddresses(shaders); | ||
| 282 | |||
| 283 | SetupShaderDescriptors(shaders, is_indexed); | ||
| 284 | |||
| 285 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | ||
| 286 | graphics_key.renderpass = framebuffer->RenderPass(); | ||
| 287 | |||
| 288 | VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( | ||
| 289 | graphics_key, framebuffer->NumColorBuffers(), async_shaders); | ||
| 290 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | ||
| 291 | // Async graphics pipeline was not ready. | ||
| 292 | return; | ||
| 293 | } | ||
| 294 | |||
| 295 | BeginTransformFeedback(); | ||
| 296 | |||
| 297 | scheduler.RequestRenderpass(framebuffer); | ||
| 298 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 299 | UpdateDynamicStates(); | ||
| 300 | |||
| 301 | const auto& regs = maxwell3d.regs; | ||
| 302 | const u32 num_instances = maxwell3d.mme_draw.instance_count; | ||
| 303 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | ||
| 304 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | ||
| 305 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 306 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | ||
| 307 | if (descriptor_set) { | ||
| 308 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | ||
| 309 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 310 | } | ||
| 311 | if (draw_params.is_indexed) { | ||
| 312 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||
| 313 | draw_params.base_vertex, draw_params.base_instance); | ||
| 314 | } else { | ||
| 315 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||
| 316 | draw_params.base_vertex, draw_params.base_instance); | ||
| 317 | } | ||
| 318 | }); | ||
| 319 | |||
| 320 | EndTransformFeedback(); | ||
| 321 | } | 162 | } |
| 322 | 163 | ||
| 323 | void RasterizerVulkan::Clear() { | 164 | void RasterizerVulkan::Clear() { |
| @@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() { | |||
| 395 | }); | 236 | }); |
| 396 | } | 237 | } |
| 397 | 238 | ||
| 398 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 239 | void RasterizerVulkan::DispatchCompute() { |
| 399 | MICROPROFILE_SCOPE(Vulkan_Compute); | 240 | UNREACHABLE_MSG("Not implemented"); |
| 400 | |||
| 401 | query_cache.UpdateCounters(); | ||
| 402 | |||
| 403 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 404 | auto& pipeline = pipeline_cache.GetComputePipeline({ | ||
| 405 | .shader = code_addr, | ||
| 406 | .shared_memory_size = launch_desc.shared_alloc, | ||
| 407 | .workgroup_size{ | ||
| 408 | launch_desc.block_dim_x, | ||
| 409 | launch_desc.block_dim_y, | ||
| 410 | launch_desc.block_dim_z, | ||
| 411 | }, | ||
| 412 | }); | ||
| 413 | |||
| 414 | // Compute dispatches can't be executed inside a renderpass | ||
| 415 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 416 | |||
| 417 | image_view_indices.clear(); | ||
| 418 | sampler_handles.clear(); | ||
| 419 | |||
| 420 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 421 | |||
| 422 | const auto& entries = pipeline.GetEntries(); | ||
| 423 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 424 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 425 | u32 ssbo_index = 0; | ||
| 426 | for (const auto& buffer : entries.global_buffers) { | ||
| 427 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 428 | buffer.is_written); | ||
| 429 | ++ssbo_index; | ||
| 430 | } | ||
| 431 | buffer_cache.UpdateComputeBuffers(); | ||
| 432 | |||
| 433 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 434 | |||
| 435 | SetupComputeUniformTexels(entries); | ||
| 436 | SetupComputeTextures(entries); | ||
| 437 | SetupComputeStorageTexels(entries); | ||
| 438 | SetupComputeImages(entries); | ||
| 439 | |||
| 440 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 441 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 442 | |||
| 443 | update_descriptor_queue.Acquire(); | ||
| 444 | |||
| 445 | buffer_cache.BindHostComputeBuffers(); | ||
| 446 | |||
| 447 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 448 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 449 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 450 | sampler_ptr); | ||
| 451 | |||
| 452 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||
| 453 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||
| 454 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 455 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 456 | grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, | ||
| 457 | descriptor_set](vk::CommandBuffer cmdbuf) { | ||
| 458 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); | ||
| 459 | if (descriptor_set) { | ||
| 460 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, | ||
| 461 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 462 | } | ||
| 463 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | ||
| 464 | }); | ||
| 465 | } | 241 | } |
| 466 | 242 | ||
| 467 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 243 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { |
| @@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | |||
| 716 | return buffer_cache.DMACopy(src_address, dest_address, amount); | 492 | return buffer_cache.DMACopy(src_address, dest_address, amount); |
| 717 | } | 493 | } |
| 718 | 494 | ||
| 719 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 720 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | ||
| 721 | image_view_indices.clear(); | ||
| 722 | sampler_handles.clear(); | ||
| 723 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 724 | Shader* const shader = shaders[stage + 1]; | ||
| 725 | if (!shader) { | ||
| 726 | continue; | ||
| 727 | } | ||
| 728 | const ShaderEntries& entries = shader->GetEntries(); | ||
| 729 | SetupGraphicsUniformTexels(entries, stage); | ||
| 730 | SetupGraphicsTextures(entries, stage); | ||
| 731 | SetupGraphicsStorageTexels(entries, stage); | ||
| 732 | SetupGraphicsImages(entries, stage); | ||
| 733 | |||
| 734 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 735 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 736 | u32 ssbo_index = 0; | ||
| 737 | for (const auto& buffer : entries.global_buffers) { | ||
| 738 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 739 | buffer.cbuf_offset, buffer.is_written); | ||
| 740 | ++ssbo_index; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 744 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 745 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 746 | |||
| 747 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 748 | |||
| 749 | update_descriptor_queue.Acquire(); | ||
| 750 | |||
| 751 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 752 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 753 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 754 | // Skip VertexA stage | ||
| 755 | Shader* const shader = shaders[stage + 1]; | ||
| 756 | if (!shader) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 759 | buffer_cache.BindHostStageBuffers(stage); | ||
| 760 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, | ||
| 761 | image_view_id_ptr, sampler_ptr); | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | void RasterizerVulkan::UpdateDynamicStates() { | 495 | void RasterizerVulkan::UpdateDynamicStates() { |
| 766 | auto& regs = maxwell3d.regs; | 496 | auto& regs = maxwell3d.regs; |
| 767 | UpdateViewportsState(regs); | 497 | UpdateViewportsState(regs); |
| @@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 810 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 540 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 811 | } | 541 | } |
| 812 | 542 | ||
| 813 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | ||
| 814 | const auto& regs = maxwell3d.regs; | ||
| 815 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 816 | for (const auto& entry : entries.uniform_texels) { | ||
| 817 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 818 | image_view_indices.push_back(handle.image); | ||
| 819 | } | ||
| 820 | } | ||
| 821 | |||
| 822 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | ||
| 823 | const auto& regs = maxwell3d.regs; | ||
| 824 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 825 | for (const auto& entry : entries.samplers) { | ||
| 826 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 827 | const TextureHandle handle = | ||
| 828 | GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); | ||
| 829 | image_view_indices.push_back(handle.image); | ||
| 830 | |||
| 831 | Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 832 | sampler_handles.push_back(sampler->Handle()); | ||
| 833 | } | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | ||
| 838 | const auto& regs = maxwell3d.regs; | ||
| 839 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 840 | for (const auto& entry : entries.storage_texels) { | ||
| 841 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 842 | image_view_indices.push_back(handle.image); | ||
| 843 | } | ||
| 844 | } | ||
| 845 | |||
| 846 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | ||
| 847 | const auto& regs = maxwell3d.regs; | ||
| 848 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 849 | for (const auto& entry : entries.images) { | ||
| 850 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 851 | image_view_indices.push_back(handle.image); | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | ||
| 856 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 857 | for (const auto& entry : entries.uniform_texels) { | ||
| 858 | const TextureHandle handle = | ||
| 859 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 860 | image_view_indices.push_back(handle.image); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 865 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 866 | for (const auto& entry : entries.samplers) { | ||
| 867 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 868 | const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, | ||
| 869 | COMPUTE_SHADER_INDEX, index); | ||
| 870 | image_view_indices.push_back(handle.image); | ||
| 871 | |||
| 872 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 873 | sampler_handles.push_back(sampler->Handle()); | ||
| 874 | } | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||
| 879 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 880 | for (const auto& entry : entries.storage_texels) { | ||
| 881 | const TextureHandle handle = | ||
| 882 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 883 | image_view_indices.push_back(handle.image); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 888 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 889 | for (const auto& entry : entries.images) { | ||
| 890 | const TextureHandle handle = | ||
| 891 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 892 | image_view_indices.push_back(handle.image); | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 543 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 897 | if (!state_tracker.TouchViewports()) { | 544 | if (!state_tracker.TouchViewports()) { |
| 898 | return; | 545 | return; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..31017dc2b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 31 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 33 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 34 | 33 | ||
| @@ -73,7 +72,7 @@ public: | |||
| 73 | 72 | ||
| 74 | void Draw(bool is_indexed, bool is_instanced) override; | 73 | void Draw(bool is_indexed, bool is_instanced) override; |
| 75 | void Clear() override; | 74 | void Clear() override; |
| 76 | void DispatchCompute(GPUVAddr code_addr) override; | 75 | void DispatchCompute() override; |
| 77 | void ResetCounter(VideoCore::QueryType type) override; | 76 | void ResetCounter(VideoCore::QueryType type) override; |
| 78 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 77 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 79 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 78 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -103,19 +102,6 @@ public: | |||
| 103 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 102 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 104 | u32 pixel_stride) override; | 103 | u32 pixel_stride) override; |
| 105 | 104 | ||
| 106 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||
| 107 | return async_shaders; | ||
| 108 | } | ||
| 109 | |||
| 110 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 111 | return async_shaders; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 115 | static constexpr size_t MaxConstbufferSize = 0x10000; | ||
| 116 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 117 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 118 | |||
| 119 | private: | 105 | private: |
| 120 | static constexpr size_t MAX_TEXTURES = 192; | 106 | static constexpr size_t MAX_TEXTURES = 192; |
| 121 | static constexpr size_t MAX_IMAGES = 48; | 107 | static constexpr size_t MAX_IMAGES = 48; |
| @@ -125,40 +111,12 @@ private: | |||
| 125 | 111 | ||
| 126 | void FlushWork(); | 112 | void FlushWork(); |
| 127 | 113 | ||
| 128 | /// Setup descriptors in the graphics pipeline. | ||
| 129 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, | ||
| 130 | bool is_indexed); | ||
| 131 | |||
| 132 | void UpdateDynamicStates(); | 114 | void UpdateDynamicStates(); |
| 133 | 115 | ||
| 134 | void BeginTransformFeedback(); | 116 | void BeginTransformFeedback(); |
| 135 | 117 | ||
| 136 | void EndTransformFeedback(); | 118 | void EndTransformFeedback(); |
| 137 | 119 | ||
| 138 | /// Setup uniform texels in the graphics pipeline. | ||
| 139 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 140 | |||
| 141 | /// Setup textures in the graphics pipeline. | ||
| 142 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 143 | |||
| 144 | /// Setup storage texels in the graphics pipeline. | ||
| 145 | void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 146 | |||
| 147 | /// Setup images in the graphics pipeline. | ||
| 148 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 149 | |||
| 150 | /// Setup texel buffers in the compute pipeline. | ||
| 151 | void SetupComputeUniformTexels(const ShaderEntries& entries); | ||
| 152 | |||
| 153 | /// Setup textures in the compute pipeline. | ||
| 154 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 155 | |||
| 156 | /// Setup storage texels in the compute pipeline. | ||
| 157 | void SetupComputeStorageTexels(const ShaderEntries& entries); | ||
| 158 | |||
| 159 | /// Setup images in the compute pipeline. | ||
| 160 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 161 | |||
| 162 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 120 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 163 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 121 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 164 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 122 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -198,13 +156,12 @@ private: | |||
| 198 | TextureCache texture_cache; | 156 | TextureCache texture_cache; |
| 199 | BufferCacheRuntime buffer_cache_runtime; | 157 | BufferCacheRuntime buffer_cache_runtime; |
| 200 | BufferCache buffer_cache; | 158 | BufferCache buffer_cache; |
| 201 | VKPipelineCache pipeline_cache; | 159 | PipelineCache pipeline_cache; |
| 202 | VKQueryCache query_cache; | 160 | VKQueryCache query_cache; |
| 203 | AccelerateDMA accelerate_dma; | 161 | AccelerateDMA accelerate_dma; |
| 204 | VKFenceManager fence_manager; | 162 | VKFenceManager fence_manager; |
| 205 | 163 | ||
| 206 | vk::Event wfi_event; | 164 | vk::Event wfi_event; |
| 207 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 208 | 165 | ||
| 209 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 166 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 210 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 167 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |