summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar bunnei2021-07-25 11:39:04 -0700
committerGravatar GitHub2021-07-25 11:39:04 -0700
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/video_core/renderer_vulkan
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp40
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h2
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp92
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h79
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp54
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h7
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h154
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp94
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp68
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h22
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp270
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp296
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h72
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp839
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h145
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp867
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h176
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp475
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h59
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.cpp96
-rw-r--r--src/video_core/renderer_vulkan/vk_render_pass_cache.h55
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp172
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp3166
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h99
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp59
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp243
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h80
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h4
42 files changed, 2988 insertions, 5326 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index b7f5b8bc2..6c1b2f063 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA
49 .bindingCount = 1, 49 .bindingCount = 1,
50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, 50 .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
51}; 51};
52template <u32 num_textures>
53inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{
54 .uniform_buffers = 0,
55 .storage_buffers = 0,
56 .texture_buffers = 0,
57 .image_buffers = 0,
58 .textures = num_textures,
59 .images = 0,
60 .score = 2,
61};
52constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ 62constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
54 .pNext = nullptr, 64 .pNext = nullptr,
@@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
323 cmdbuf.SetScissor(0, scissor); 333 cmdbuf.SetScissor(0, scissor);
324 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); 334 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
325} 335}
326
327} // Anonymous namespace 336} // Anonymous namespace
328 337
329BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, 338BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
330 StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) 339 StateTracker& state_tracker_, DescriptorPool& descriptor_pool)
331 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, 340 : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
332 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( 341 one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
333 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), 342 ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
334 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( 343 two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
335 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), 344 TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
336 one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), 345 one_texture_descriptor_allocator{
337 two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), 346 descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)},
347 two_textures_descriptor_allocator{
348 descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)},
338 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( 349 one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
339 PipelineLayoutCreateInfo(one_texture_set_layout.address()))), 350 PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
340 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( 351 two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
@@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
362 .operation = operation, 373 .operation = operation,
363 }; 374 };
364 const VkPipelineLayout layout = *one_texture_pipeline_layout; 375 const VkPipelineLayout layout = *one_texture_pipeline_layout;
365 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); 376 const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
366 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; 377 const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
367 const VkPipeline pipeline = FindOrEmplacePipeline(key); 378 const VkPipeline pipeline = FindOrEmplacePipeline(key);
368 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
369 scheduler.RequestRenderpass(dst_framebuffer); 379 scheduler.RequestRenderpass(dst_framebuffer);
370 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, 380 scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
371 &device = device](vk::CommandBuffer cmdbuf) { 381 src_view](vk::CommandBuffer cmdbuf) {
372 // TODO: Barriers 382 // TODO: Barriers
383 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
373 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); 384 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
374 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 385 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
375 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, 386 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
@@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
391 const VkPipelineLayout layout = *two_textures_pipeline_layout; 402 const VkPipelineLayout layout = *two_textures_pipeline_layout;
392 const VkSampler sampler = *nearest_sampler; 403 const VkSampler sampler = *nearest_sampler;
393 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); 404 const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
394 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
395 scheduler.RequestRenderpass(dst_framebuffer); 405 scheduler.RequestRenderpass(dst_framebuffer);
396 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, 406 scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
397 src_stencil_view, descriptor_set, 407 src_stencil_view, this](vk::CommandBuffer cmdbuf) {
398 &device = device](vk::CommandBuffer cmdbuf) {
399 // TODO: Barriers 408 // TODO: Barriers
409 const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
400 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, 410 UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
401 src_stencil_view); 411 src_stencil_view);
402 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 412 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
416 426
417void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, 427void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
418 const ImageView& src_image_view) { 428 const ImageView& src_image_view) {
419
420 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); 429 ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
421 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); 430 Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
422} 431}
@@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
436void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, 445void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
437 const ImageView& src_image_view) { 446 const ImageView& src_image_view) {
438 const VkPipelineLayout layout = *one_texture_pipeline_layout; 447 const VkPipelineLayout layout = *one_texture_pipeline_layout;
439 const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); 448 const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
440 const VkSampler sampler = *nearest_sampler; 449 const VkSampler sampler = *nearest_sampler;
441 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
442 const VkExtent2D extent{ 450 const VkExtent2D extent{
443 .width = src_image_view.size.width, 451 .width = src_image_view.size.width,
444 .height = src_image_view.size.height, 452 .height = src_image_view.size.height,
445 }; 453 };
446 scheduler.RequestRenderpass(dst_framebuffer); 454 scheduler.RequestRenderpass(dst_framebuffer);
447 scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, 455 scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
448 &device = device](vk::CommandBuffer cmdbuf) {
449 const VkOffset2D offset{ 456 const VkOffset2D offset{
450 .x = 0, 457 .x = 0,
451 .y = 0, 458 .y = 0,
@@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
466 .tex_scale = {viewport.width, viewport.height}, 473 .tex_scale = {viewport.width, viewport.height},
467 .tex_offset = {0.0f, 0.0f}, 474 .tex_offset = {0.0f, 0.0f},
468 }; 475 };
476 const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
469 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); 477 UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
470 478
471 // TODO: Barriers 479 // TODO: Barriers
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 0d81a06ed..33ee095c1 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -31,7 +31,7 @@ struct BlitImagePipelineKey {
31class BlitImageHelper { 31class BlitImageHelper {
32public: 32public:
33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, 33 explicit BlitImageHelper(const Device& device, VKScheduler& scheduler,
34 StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); 34 StateTracker& state_tracker, DescriptorPool& descriptor_pool);
35 ~BlitImageHelper(); 35 ~BlitImageHelper();
36 36
37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, 37 void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 362278f01..d70153df3 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -15,9 +15,7 @@
15#include "video_core/renderer_vulkan/vk_state_tracker.h" 15#include "video_core/renderer_vulkan/vk_state_tracker.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18
19namespace { 18namespace {
20
21constexpr size_t POINT = 0; 19constexpr size_t POINT = 0;
22constexpr size_t LINE = 1; 20constexpr size_t LINE = 1;
23constexpr size_t POLYGON = 2; 21constexpr size_t POLYGON = 2;
@@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
39 POLYGON, // Patches 37 POLYGON, // Patches
40}; 38};
41 39
40void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) {
41 std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) {
42 return VideoCommon::TransformFeedbackState::Layout{
43 .stream = layout.stream,
44 .varying_count = layout.varying_count,
45 .stride = layout.stride,
46 };
47 });
48 state.varyings = regs.tfb_varying_locs;
49}
42} // Anonymous namespace 50} // Anonymous namespace
43 51
44void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, 52void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
45 bool has_extended_dynamic_state) { 53 bool has_extended_dynamic_state, bool has_dynamic_vertex_input) {
46 const Maxwell& regs = maxwell3d.regs; 54 const Maxwell& regs = maxwell3d.regs;
47 const std::array enabled_lut{ 55 const std::array enabled_lut{
48 regs.polygon_offset_point_enable, 56 regs.polygon_offset_point_enable,
@@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
52 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 60 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
53 61
54 raw1 = 0; 62 raw1 = 0;
63 extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0);
64 dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0);
65 xfb_enabled.Assign(regs.tfb_enabled != 0);
55 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); 66 primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
56 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); 67 depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
57 depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); 68 depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
63 tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); 74 tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
64 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); 75 logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
65 logic_op.Assign(PackLogicOp(regs.logic_op.operation)); 76 logic_op.Assign(PackLogicOp(regs.logic_op.operation));
66 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
67 topology.Assign(regs.draw.topology); 77 topology.Assign(regs.draw.topology);
68 msaa_mode.Assign(regs.multisample_mode); 78 msaa_mode.Assign(regs.multisample_mode);
69 79
70 raw2 = 0; 80 raw2 = 0;
81 rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
71 const auto test_func = 82 const auto test_func =
72 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; 83 regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always;
73 alpha_test_func.Assign(PackComparisonOp(test_func)); 84 alpha_test_func.Assign(PackComparisonOp(test_func));
74 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); 85 early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
75 86 depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0);
87 depth_format.Assign(static_cast<u32>(regs.zeta.format));
88 y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0);
89 provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0);
90 conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0);
91 smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0);
92
93 for (size_t i = 0; i < regs.rt.size(); ++i) {
94 color_formats[i] = static_cast<u8>(regs.rt[i].format);
95 }
76 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); 96 alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref);
77 point_size = Common::BitCast<u32>(regs.point_size); 97 point_size = Common::BitCast<u32>(regs.point_size);
78 98
79 if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { 99 if (maxwell3d.dirty.flags[Dirty::VertexInput]) {
80 maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; 100 if (has_dynamic_vertex_input) {
81 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 101 // Dirty flag will be reset by the command buffer update
82 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); 102 static constexpr std::array LUT{
83 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; 103 0u, // Invalid
84 } 104 1u, // SignedNorm
85 } 105 1u, // UnsignedNorm
86 if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { 106 2u, // SignedInt
87 maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; 107 3u, // UnsignedInt
88 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 108 1u, // UnsignedScaled
89 const auto& input = regs.vertex_attrib_format[index]; 109 1u, // SignedScaled
90 auto& attribute = attributes[index]; 110 1u, // Float
91 attribute.raw = 0; 111 };
92 attribute.enabled.Assign(input.IsConstant() ? 0 : 1); 112 const auto& attrs = regs.vertex_attrib_format;
93 attribute.buffer.Assign(input.buffer); 113 attribute_types = 0;
94 attribute.offset.Assign(input.offset); 114 for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
95 attribute.type.Assign(static_cast<u32>(input.type.Value())); 115 const u32 mask = attrs[i].constant != 0 ? 0 : 3;
96 attribute.size.Assign(static_cast<u32>(input.size.Value())); 116 const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())];
117 attribute_types |= static_cast<u64>(type & mask) << (i * 2);
118 }
119 } else {
120 maxwell3d.dirty.flags[Dirty::VertexInput] = false;
121 enabled_divisors = 0;
122 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
123 const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index);
124 binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0;
125 enabled_divisors |= (is_enabled ? u64{1} : 0) << index;
126 }
127 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
128 const auto& input = regs.vertex_attrib_format[index];
129 auto& attribute = attributes[index];
130 attribute.raw = 0;
131 attribute.enabled.Assign(input.constant ? 0 : 1);
132 attribute.buffer.Assign(input.buffer);
133 attribute.offset.Assign(input.offset);
134 attribute.type.Assign(static_cast<u32>(input.type.Value()));
135 attribute.size.Assign(static_cast<u32>(input.size.Value()));
136 }
97 } 137 }
98 } 138 }
99 if (maxwell3d.dirty.flags[Dirty::Blending]) { 139 if (maxwell3d.dirty.flags[Dirty::Blending]) {
@@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
109 return static_cast<u16>(viewport.swizzle.raw); 149 return static_cast<u16>(viewport.swizzle.raw);
110 }); 150 });
111 } 151 }
112 if (!has_extended_dynamic_state) { 152 if (!extended_dynamic_state) {
113 no_extended_dynamic_state.Assign(1);
114 dynamic_state.Refresh(regs); 153 dynamic_state.Refresh(regs);
115 } 154 }
155 if (xfb_enabled) {
156 RefreshXfbState(xfb_state, regs);
157 }
116} 158}
117 159
118void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { 160void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index a0eb83a68..c9be37935 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -12,6 +12,7 @@
12 12
13#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/surface.h" 14#include "video_core/surface.h"
15#include "video_core/transform_feedback.h"
15 16
16namespace Vulkan { 17namespace Vulkan {
17 18
@@ -60,7 +61,7 @@ struct FixedPipelineState {
60 61
61 void Refresh(const Maxwell& regs, size_t index); 62 void Refresh(const Maxwell& regs, size_t index);
62 63
63 constexpr std::array<bool, 4> Mask() const noexcept { 64 std::array<bool, 4> Mask() const noexcept {
64 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; 65 return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0};
65 } 66 }
66 67
@@ -97,11 +98,11 @@ struct FixedPipelineState {
97 BitField<20, 3, u32> type; 98 BitField<20, 3, u32> type;
98 BitField<23, 6, u32> size; 99 BitField<23, 6, u32> size;
99 100
100 constexpr Maxwell::VertexAttribute::Type Type() const noexcept { 101 Maxwell::VertexAttribute::Type Type() const noexcept {
101 return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); 102 return static_cast<Maxwell::VertexAttribute::Type>(type.Value());
102 } 103 }
103 104
104 constexpr Maxwell::VertexAttribute::Size Size() const noexcept { 105 Maxwell::VertexAttribute::Size Size() const noexcept {
105 return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); 106 return static_cast<Maxwell::VertexAttribute::Size>(size.Value());
106 } 107 }
107 }; 108 };
@@ -167,37 +168,53 @@ struct FixedPipelineState {
167 168
168 union { 169 union {
169 u32 raw1; 170 u32 raw1;
170 BitField<0, 1, u32> no_extended_dynamic_state; 171 BitField<0, 1, u32> extended_dynamic_state;
171 BitField<2, 1, u32> primitive_restart_enable; 172 BitField<1, 1, u32> dynamic_vertex_input;
172 BitField<3, 1, u32> depth_bias_enable; 173 BitField<2, 1, u32> xfb_enabled;
173 BitField<4, 1, u32> depth_clamp_disabled; 174 BitField<3, 1, u32> primitive_restart_enable;
174 BitField<5, 1, u32> ndc_minus_one_to_one; 175 BitField<4, 1, u32> depth_bias_enable;
175 BitField<6, 2, u32> polygon_mode; 176 BitField<5, 1, u32> depth_clamp_disabled;
176 BitField<8, 5, u32> patch_control_points_minus_one; 177 BitField<6, 1, u32> ndc_minus_one_to_one;
177 BitField<13, 2, u32> tessellation_primitive; 178 BitField<7, 2, u32> polygon_mode;
178 BitField<15, 2, u32> tessellation_spacing; 179 BitField<9, 5, u32> patch_control_points_minus_one;
179 BitField<17, 1, u32> tessellation_clockwise; 180 BitField<14, 2, u32> tessellation_primitive;
180 BitField<18, 1, u32> logic_op_enable; 181 BitField<16, 2, u32> tessellation_spacing;
181 BitField<19, 4, u32> logic_op; 182 BitField<18, 1, u32> tessellation_clockwise;
182 BitField<23, 1, u32> rasterize_enable; 183 BitField<19, 1, u32> logic_op_enable;
184 BitField<20, 4, u32> logic_op;
183 BitField<24, 4, Maxwell::PrimitiveTopology> topology; 185 BitField<24, 4, Maxwell::PrimitiveTopology> topology;
184 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; 186 BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
185 }; 187 };
186 union { 188 union {
187 u32 raw2; 189 u32 raw2;
188 BitField<0, 3, u32> alpha_test_func; 190 BitField<0, 1, u32> rasterize_enable;
189 BitField<3, 1, u32> early_z; 191 BitField<1, 3, u32> alpha_test_func;
192 BitField<4, 1, u32> early_z;
193 BitField<5, 1, u32> depth_enabled;
194 BitField<6, 5, u32> depth_format;
195 BitField<11, 1, u32> y_negate;
196 BitField<12, 1, u32> provoking_vertex_last;
197 BitField<13, 1, u32> conservative_raster_enable;
198 BitField<14, 1, u32> smooth_lines;
190 }; 199 };
200 std::array<u8, Maxwell::NumRenderTargets> color_formats;
191 201
192 u32 alpha_test_ref; 202 u32 alpha_test_ref;
193 u32 point_size; 203 u32 point_size;
194 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
195 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
196 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; 204 std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
197 std::array<u16, Maxwell::NumViewports> viewport_swizzles; 205 std::array<u16, Maxwell::NumViewports> viewport_swizzles;
206 union {
207 u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state
208 u64 enabled_divisors;
209 };
210 std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes;
211 std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
212
198 DynamicState dynamic_state; 213 DynamicState dynamic_state;
214 VideoCommon::TransformFeedbackState xfb_state;
199 215
200 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); 216 void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state,
217 bool has_dynamic_vertex_input);
201 218
202 size_t Hash() const noexcept; 219 size_t Hash() const noexcept;
203 220
@@ -208,8 +225,24 @@ struct FixedPipelineState {
208 } 225 }
209 226
210 size_t Size() const noexcept { 227 size_t Size() const noexcept {
211 const size_t total_size = sizeof *this; 228 if (xfb_enabled) {
212 return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); 229 // When transform feedback is enabled, use the whole struct
230 return sizeof(*this);
231 }
232 if (dynamic_vertex_input) {
233 // Exclude dynamic state and attributes
234 return offsetof(FixedPipelineState, attributes);
235 }
236 if (extended_dynamic_state) {
237 // Exclude dynamic state
238 return offsetof(FixedPipelineState, dynamic_state);
239 }
240 // Default
241 return offsetof(FixedPipelineState, xfb_state);
242 }
243
244 u32 DynamicAttributeType(size_t index) const noexcept {
245 return (attribute_types >> (index * 2)) & 0b11;
213 } 246 }
214}; 247};
215static_assert(std::has_unique_object_representations_v<FixedPipelineState>); 248static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f088447e9..68a23b602 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -157,7 +157,7 @@ struct FormatTuple {
157 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT 157 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT
158 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT 158 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT
159 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM 159 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM
160 {VK_FORMAT_UNDEFINED}, // R16_SNORM 160 {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM
161 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT 161 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT
162 {VK_FORMAT_UNDEFINED}, // R16_SINT 162 {VK_FORMAT_UNDEFINED}, // R16_SINT
163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM 163 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
@@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
266 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 266 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
267} 267}
268 268
269VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { 269VkShaderStageFlagBits ShaderStage(Shader::Stage stage) {
270 switch (stage) { 270 switch (stage) {
271 case Tegra::Engines::ShaderType::Vertex: 271 case Shader::Stage::VertexA:
272 case Shader::Stage::VertexB:
272 return VK_SHADER_STAGE_VERTEX_BIT; 273 return VK_SHADER_STAGE_VERTEX_BIT;
273 case Tegra::Engines::ShaderType::TesselationControl: 274 case Shader::Stage::TessellationControl:
274 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 275 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
275 case Tegra::Engines::ShaderType::TesselationEval: 276 case Shader::Stage::TessellationEval:
276 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 277 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
277 case Tegra::Engines::ShaderType::Geometry: 278 case Shader::Stage::Geometry:
278 return VK_SHADER_STAGE_GEOMETRY_BIT; 279 return VK_SHADER_STAGE_GEOMETRY_BIT;
279 case Tegra::Engines::ShaderType::Fragment: 280 case Shader::Stage::Fragment:
280 return VK_SHADER_STAGE_FRAGMENT_BIT; 281 return VK_SHADER_STAGE_FRAGMENT_BIT;
281 case Tegra::Engines::ShaderType::Compute: 282 case Shader::Stage::Compute:
282 return VK_SHADER_STAGE_COMPUTE_BIT; 283 return VK_SHADER_STAGE_COMPUTE_BIT;
283 } 284 }
284 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); 285 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage);
@@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) {
685 return {}; 686 return {};
686} 687}
687 688
689VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) {
690 switch (polygon_mode) {
691 case Maxwell::PolygonMode::Point:
692 return VK_POLYGON_MODE_POINT;
693 case Maxwell::PolygonMode::Line:
694 return VK_POLYGON_MODE_LINE;
695 case Maxwell::PolygonMode::Fill:
696 return VK_POLYGON_MODE_FILL;
697 }
698 UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode);
699 return {};
700}
701
688VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { 702VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
689 switch (swizzle) { 703 switch (swizzle) {
690 case Tegra::Texture::SwizzleSource::Zero: 704 case Tegra::Texture::SwizzleSource::Zero:
@@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti
741 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; 755 return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
742} 756}
743 757
758VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
759 switch (msaa_mode) {
760 case Tegra::Texture::MsaaMode::Msaa1x1:
761 return VK_SAMPLE_COUNT_1_BIT;
762 case Tegra::Texture::MsaaMode::Msaa2x1:
763 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
764 return VK_SAMPLE_COUNT_2_BIT;
765 case Tegra::Texture::MsaaMode::Msaa2x2:
766 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
767 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
768 return VK_SAMPLE_COUNT_4_BIT;
769 case Tegra::Texture::MsaaMode::Msaa4x2:
770 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
771 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
772 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
773 return VK_SAMPLE_COUNT_8_BIT;
774 case Tegra::Texture::MsaaMode::Msaa4x4:
775 return VK_SAMPLE_COUNT_16_BIT;
776 default:
777 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
778 return VK_SAMPLE_COUNT_1_BIT;
779 }
780}
781
744} // namespace Vulkan::MaxwellToVK 782} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index e3e06ba38..8a9616039 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "shader_recompiler/stage.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h" 10#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -45,7 +46,7 @@ struct FormatInfo {
45[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, 46[[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb,
46 PixelFormat pixel_format); 47 PixelFormat pixel_format);
47 48
48VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); 49VkShaderStageFlagBits ShaderStage(Shader::Stage stage);
49 50
50VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); 51VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology);
51 52
@@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face);
65 66
66VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); 67VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face);
67 68
69VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode);
70
68VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); 71VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
69 72
70VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); 73VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
71 74
72VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); 75VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
73 76
77VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode);
78
74} // namespace Vulkan::MaxwellToVK 79} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
new file mode 100644
index 000000000..4847db6b6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -0,0 +1,154 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8
9#include <boost/container/small_vector.hpp>
10
11#include "common/assert.h"
12#include "common/common_types.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/renderer_vulkan/vk_texture_cache.h"
15#include "video_core/renderer_vulkan/vk_update_descriptor.h"
16#include "video_core/texture_cache/texture_cache.h"
17#include "video_core/texture_cache/types.h"
18#include "video_core/textures/texture.h"
19#include "video_core/vulkan_common/vulkan_device.h"
20
21namespace Vulkan {
22
23class DescriptorLayoutBuilder {
24public:
25 DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
26
27 bool CanUsePushDescriptor() const noexcept {
28 return device->IsKhrPushDescriptorSupported() &&
29 num_descriptors <= device->MaxPushDescriptors();
30 }
31
32 vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
33 if (bindings.empty()) {
34 return nullptr;
35 }
36 const VkDescriptorSetLayoutCreateFlags flags =
37 use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
38 return device->GetLogical().CreateDescriptorSetLayout({
39 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
40 .pNext = nullptr,
41 .flags = flags,
42 .bindingCount = static_cast<u32>(bindings.size()),
43 .pBindings = bindings.data(),
44 });
45 }
46
47 vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
48 VkPipelineLayout pipeline_layout,
49 bool use_push_descriptor) const {
50 if (entries.empty()) {
51 return nullptr;
52 }
53 const VkDescriptorUpdateTemplateType type =
54 use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
55 : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
56 return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
57 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
58 .pNext = nullptr,
59 .flags = 0,
60 .descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
61 .pDescriptorUpdateEntries = entries.data(),
62 .templateType = type,
63 .descriptorSetLayout = descriptor_set_layout,
64 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
65 .pipelineLayout = pipeline_layout,
66 .set = 0,
67 });
68 }
69
70 vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
71 return device->GetLogical().CreatePipelineLayout({
72 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
73 .pNext = nullptr,
74 .flags = 0,
75 .setLayoutCount = descriptor_set_layout ? 1U : 0U,
76 .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout,
77 .pushConstantRangeCount = 0,
78 .pPushConstantRanges = nullptr,
79 });
80 }
81
82 void Add(const Shader::Info& info, VkShaderStageFlags stage) {
83 Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors);
84 Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors);
85 Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors);
86 Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors);
87 Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors);
88 Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors);
89 }
90
91private:
92 template <typename Descriptors>
93 void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) {
94 const size_t num{descriptors.size()};
95 for (size_t i = 0; i < num; ++i) {
96 bindings.push_back({
97 .binding = binding,
98 .descriptorType = type,
99 .descriptorCount = descriptors[i].count,
100 .stageFlags = stage,
101 .pImmutableSamplers = nullptr,
102 });
103 entries.push_back({
104 .dstBinding = binding,
105 .dstArrayElement = 0,
106 .descriptorCount = descriptors[i].count,
107 .descriptorType = type,
108 .offset = offset,
109 .stride = sizeof(DescriptorUpdateEntry),
110 });
111 ++binding;
112 num_descriptors += descriptors[i].count;
113 offset += sizeof(DescriptorUpdateEntry);
114 }
115 }
116
117 const Device* device{};
118 boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
119 boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
120 u32 binding{};
121 u32 num_descriptors{};
122 size_t offset{};
123};
124
125inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers,
126 const ImageId*& image_view_ids, TextureCache& texture_cache,
127 VKUpdateDescriptorQueue& update_descriptor_queue) {
128 for (const auto& desc : info.texture_buffer_descriptors) {
129 image_view_ids += desc.count;
130 }
131 for (const auto& desc : info.image_buffer_descriptors) {
132 image_view_ids += desc.count;
133 }
134 for (const auto& desc : info.texture_descriptors) {
135 for (u32 index = 0; index < desc.count; ++index) {
136 const VkSampler sampler{*(samplers++)};
137 ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
138 const VkImageView vk_image_view{image_view.Handle(desc.type)};
139 update_descriptor_queue.AddSampledImage(vk_image_view, sampler);
140 }
141 }
142 for (const auto& desc : info.image_descriptors) {
143 for (u32 index = 0; index < desc.count; ++index) {
144 ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))};
145 if (desc.is_written) {
146 texture_cache.MarkModification(image_view.image_id);
147 }
148 const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
149 update_descriptor_queue.AddImage(vk_image_view);
150 }
151 }
152}
153
154} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index bec3a81d9..a8d04dc61 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
130 if (!framebuffer) { 130 if (!framebuffer) {
131 return; 131 return;
132 } 132 }
133 const auto& layout = render_window.GetFramebufferLayout(); 133 SCOPE_EXIT({ render_window.OnFrameDisplayed(); });
134 if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { 134 if (!render_window.IsShown()) {
135 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 135 return;
136 const bool use_accelerated = 136 }
137 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 137 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
138 const bool is_srgb = use_accelerated && screen_info.is_srgb; 138 const bool use_accelerated =
139 if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { 139 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
140 swapchain.Create(layout.width, layout.height, is_srgb); 140 const bool is_srgb = use_accelerated && screen_info.is_srgb;
141 blit_screen.Recreate(); 141
142 } 142 bool has_been_recreated = false;
143 143 const auto recreate_swapchain = [&] {
144 scheduler.WaitWorker(); 144 if (!has_been_recreated) {
145 145 has_been_recreated = true;
146 while (!swapchain.AcquireNextImage()) { 146 scheduler.WaitWorker();
147 swapchain.Create(layout.width, layout.height, is_srgb);
148 blit_screen.Recreate();
149 } 147 }
150 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); 148 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
151 149 swapchain.Create(layout.width, layout.height, is_srgb);
152 scheduler.Flush(render_semaphore); 150 };
153 151 if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) {
154 if (swapchain.Present(render_semaphore)) { 152 recreate_swapchain();
155 blit_screen.Recreate(); 153 }
154 bool is_outdated;
155 do {
156 swapchain.AcquireNextImage();
157 is_outdated = swapchain.IsOutDated();
158 if (is_outdated) {
159 recreate_swapchain();
156 } 160 }
157 gpu.RendererFrameEndNotify(); 161 } while (is_outdated);
158 rasterizer.TickFrame(); 162 if (has_been_recreated) {
163 blit_screen.Recreate();
159 } 164 }
165 const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
166 scheduler.Flush(render_semaphore);
167 scheduler.WaitWorker();
168 swapchain.Present(render_semaphore);
160 169
161 render_window.OnFrameDisplayed(); 170 gpu.RendererFrameEndNotify();
171 rasterizer.TickFrame();
162} 172}
163 173
164void RendererVulkan::Report() const { 174void RendererVulkan::Report() const {
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 363134129..516f428e7 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
184 .depth = 1, 184 .depth = 1,
185 }, 185 },
186 }; 186 };
187 scheduler.Record( 187 scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) {
188 [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { 188 const VkImage image = *raw_images[image_index];
189 const VkImageMemoryBarrier base_barrier{ 189 const VkImageMemoryBarrier base_barrier{
190 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 190 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
191 .pNext = nullptr, 191 .pNext = nullptr,
192 .srcAccessMask = 0, 192 .srcAccessMask = 0,
193 .dstAccessMask = 0, 193 .dstAccessMask = 0,
194 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, 194 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
195 .newLayout = VK_IMAGE_LAYOUT_GENERAL, 195 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
196 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 196 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
197 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 197 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
198 .image = image, 198 .image = image,
199 .subresourceRange = 199 .subresourceRange{
200 { 200 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
201 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 201 .baseMipLevel = 0,
202 .baseMipLevel = 0, 202 .levelCount = 1,
203 .levelCount = 1, 203 .baseArrayLayer = 0,
204 .baseArrayLayer = 0, 204 .layerCount = 1,
205 .layerCount = 1, 205 },
206 }, 206 };
207 }; 207 VkImageMemoryBarrier read_barrier = base_barrier;
208 VkImageMemoryBarrier read_barrier = base_barrier; 208 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
209 read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; 209 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
210 read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 210 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
211 read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; 211
212 212 VkImageMemoryBarrier write_barrier = base_barrier;
213 VkImageMemoryBarrier write_barrier = base_barrier; 213 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
214 write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; 214 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
215 write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; 215
216 216 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
217 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 217 read_barrier);
218 0, read_barrier); 218 cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
219 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); 219 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
220 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, 220 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
221 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); 221 });
222 });
223 } 222 }
224 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 223 scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
225 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
226 size = swapchain.GetSize(), pipeline = *pipeline,
227 layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
228 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; 224 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
229 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; 225 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
230 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; 226 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
@@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
234 const VkRenderPassBeginInfo renderpass_bi{ 230 const VkRenderPassBeginInfo renderpass_bi{
235 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 231 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
236 .pNext = nullptr, 232 .pNext = nullptr,
237 .renderPass = renderpass, 233 .renderPass = *renderpass,
238 .framebuffer = framebuffer, 234 .framebuffer = *framebuffers[image_index],
239 .renderArea = 235 .renderArea =
240 { 236 {
241 .offset = {0, 0}, 237 .offset = {0, 0},
@@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
257 .extent = size, 253 .extent = size,
258 }; 254 };
259 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 255 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
260 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); 256 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
261 cmdbuf.SetViewport(0, viewport); 257 cmdbuf.SetViewport(0, viewport);
262 cmdbuf.SetScissor(0, scissor); 258 cmdbuf.SetScissor(0, scissor);
263 259
264 cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); 260 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
265 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); 261 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
262 descriptor_sets[image_index], {});
266 cmdbuf.Draw(4, 1, 0, 0); 263 cmdbuf.Draw(4, 1, 0, 0);
267 cmdbuf.EndRenderPass(); 264 cmdbuf.EndRenderPass();
268 }); 265 });
@@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() {
304 301
305void VKBlitScreen::CreateSemaphores() { 302void VKBlitScreen::CreateSemaphores() {
306 semaphores.resize(image_count); 303 semaphores.resize(image_count);
307 std::generate(semaphores.begin(), semaphores.end(), 304 std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
308 [this] { return device.GetLogical().CreateSemaphore(); });
309} 305}
310 306
311void VKBlitScreen::CreateDescriptorPool() { 307void VKBlitScreen::CreateDescriptorPool() {
@@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() {
633} 629}
634 630
635void VKBlitScreen::ReleaseRawImages() { 631void VKBlitScreen::ReleaseRawImages() {
636 for (std::size_t i = 0; i < raw_images.size(); ++i) { 632 for (const u64 tick : resource_ticks) {
637 scheduler.Wait(resource_ticks.at(i)); 633 scheduler.Wait(tick);
638 } 634 }
639 raw_images.clear(); 635 raw_images.clear();
640 raw_buffer_commits.clear(); 636 raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 0def1e769..f4b3ee95c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
60 } 60 }
61 return indices; 61 return indices;
62} 62}
63} // Anonymous namespace
64
65Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
66 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
67 63
68Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 64vk::Buffer CreateBuffer(const Device& device, u64 size) {
69 VAddr cpu_addr_, u64 size_bytes_) 65 VkBufferUsageFlags flags =
70 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { 66 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
71 buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 67 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
68 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
69 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
70 if (device.IsExtTransformFeedbackSupported()) {
71 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
72 }
73 return device.GetLogical().CreateBuffer({
72 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 74 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
73 .pNext = nullptr, 75 .pNext = nullptr,
74 .flags = 0, 76 .flags = 0,
75 .size = SizeBytes(), 77 .size = size,
76 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 78 .usage = flags,
77 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
78 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
79 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
80 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
81 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 79 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
82 .queueFamilyIndexCount = 0, 80 .queueFamilyIndexCount = 0,
83 .pQueueFamilyIndices = nullptr, 81 .pQueueFamilyIndices = nullptr,
84 }); 82 });
83}
84} // Anonymous namespace
85
86Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
87 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {}
88
89Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
90 VAddr cpu_addr_, u64 size_bytes_)
91 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
92 device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
93 commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
85 if (runtime.device.HasDebuggingToolAttached()) { 94 if (runtime.device.HasDebuggingToolAttached()) {
86 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 95 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
87 } 96 }
88 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 97}
98
99VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
100 if (!device) {
101 // Null buffer, return a null descriptor
102 return VK_NULL_HANDLE;
103 }
104 const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
105 return offset == view.offset && size == view.size && format == view.format;
106 })};
107 if (it != views.end()) {
108 return *it->handle;
109 }
110 views.push_back({
111 .offset = offset,
112 .size = size,
113 .format = format,
114 .handle = device->GetLogical().CreateBufferView({
115 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
116 .pNext = nullptr,
117 .flags = 0,
118 .buffer = *buffer,
119 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format,
120 .offset = offset,
121 .range = size,
122 }),
123 });
124 return *views.back().handle;
89} 125}
90 126
91BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, 127BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_,
92 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, 128 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
93 VKUpdateDescriptorQueue& update_descriptor_queue_, 129 VKUpdateDescriptorQueue& update_descriptor_queue_,
94 VKDescriptorPool& descriptor_pool) 130 DescriptorPool& descriptor_pool)
95 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, 131 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
96 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, 132 staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_},
97 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 133 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3bb81d5b3..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -9,13 +9,14 @@
9#include "video_core/renderer_vulkan/vk_compute_pass.h" 9#include "video_core/renderer_vulkan/vk_compute_pass.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/renderer_vulkan/vk_update_descriptor.h" 11#include "video_core/renderer_vulkan/vk_update_descriptor.h"
12#include "video_core/surface.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
14 15
15namespace Vulkan { 16namespace Vulkan {
16 17
17class Device; 18class Device;
18class VKDescriptorPool; 19class DescriptorPool;
19class VKScheduler; 20class VKScheduler;
20 21
21class BufferCacheRuntime; 22class BufferCacheRuntime;
@@ -26,6 +27,8 @@ public:
26 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 27 explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
27 VAddr cpu_addr_, u64 size_bytes_); 28 VAddr cpu_addr_, u64 size_bytes_);
28 29
30 [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
31
29 [[nodiscard]] VkBuffer Handle() const noexcept { 32 [[nodiscard]] VkBuffer Handle() const noexcept {
30 return *buffer; 33 return *buffer;
31 } 34 }
@@ -35,8 +38,17 @@ public:
35 } 38 }
36 39
37private: 40private:
41 struct BufferView {
42 u32 offset;
43 u32 size;
44 VideoCore::Surface::PixelFormat format;
45 vk::BufferView handle;
46 };
47
48 const Device* device{};
38 vk::Buffer buffer; 49 vk::Buffer buffer;
39 MemoryCommit commit; 50 MemoryCommit commit;
51 std::vector<BufferView> views;
40}; 52};
41 53
42class BufferCacheRuntime { 54class BufferCacheRuntime {
@@ -49,7 +61,7 @@ public:
49 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, 61 explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_,
50 VKScheduler& scheduler_, StagingBufferPool& staging_pool_, 62 VKScheduler& scheduler_, StagingBufferPool& staging_pool_,
51 VKUpdateDescriptorQueue& update_descriptor_queue_, 63 VKUpdateDescriptorQueue& update_descriptor_queue_,
52 VKDescriptorPool& descriptor_pool); 64 DescriptorPool& descriptor_pool);
53 65
54 void Finish(); 66 void Finish();
55 67
@@ -87,6 +99,11 @@ public:
87 BindBuffer(buffer, offset, size); 99 BindBuffer(buffer, offset, size);
88 } 100 }
89 101
102 void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
103 VideoCore::Surface::PixelFormat format) {
104 update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
105 }
106
90private: 107private:
91 void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { 108 void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
92 update_descriptor_queue.AddBuffer(buffer, offset, size); 109 update_descriptor_queue.AddBuffer(buffer, offset, size);
@@ -124,6 +141,7 @@ struct BufferCacheParams {
124 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; 141 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
125 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 142 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
126 static constexpr bool USE_MEMORY_MAPS = true; 143 static constexpr bool USE_MEMORY_MAPS = true;
144 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
127}; 145};
128 146
129using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 147using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 4181d83ee..8e426ce2c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2;
41constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; 41constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3;
42constexpr size_t ASTC_NUM_BINDINGS = 4; 42constexpr size_t ASTC_NUM_BINDINGS = 4;
43 43
44VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 44template <size_t size>
45 return { 45inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
46 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 46 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
47 .offset = 0, 47 .offset = 0,
48 .size = static_cast<u32>(size), 48 .size = static_cast<u32>(size),
49 }; 49};
50}
51
52std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
53 return {{
54 {
55 .binding = 0,
56 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
57 .descriptorCount = 1,
58 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
59 .pImmutableSamplers = nullptr,
60 },
61 {
62 .binding = 1,
63 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
64 .descriptorCount = 1,
65 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
66 .pImmutableSamplers = nullptr,
67 },
68 }};
69}
70 50
71std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { 51constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{
72 return {{ 52 {
73 { 53 .binding = 0,
74 .binding = ASTC_BINDING_INPUT_BUFFER, 54 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
75 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 55 .descriptorCount = 1,
76 .descriptorCount = 1, 56 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
77 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 57 .pImmutableSamplers = nullptr,
78 .pImmutableSamplers = nullptr, 58 },
79 }, 59 {
80 { 60 .binding = 1,
81 .binding = ASTC_BINDING_ENC_BUFFER, 61 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
82 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 62 .descriptorCount = 1,
83 .descriptorCount = 1, 63 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
84 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 64 .pImmutableSamplers = nullptr,
85 .pImmutableSamplers = nullptr, 65 },
86 }, 66}};
87 { 67
88 .binding = ASTC_BINDING_SWIZZLE_BUFFER, 68constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{
89 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 69 .uniform_buffers = 0,
90 .descriptorCount = 1, 70 .storage_buffers = 2,
91 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 71 .texture_buffers = 0,
92 .pImmutableSamplers = nullptr, 72 .image_buffers = 0,
93 }, 73 .textures = 0,
94 { 74 .images = 0,
95 .binding = ASTC_BINDING_OUTPUT_IMAGE, 75 .score = 2,
96 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 76};
97 .descriptorCount = 1,
98 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
99 .pImmutableSamplers = nullptr,
100 },
101 }};
102}
103 77
104VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { 78constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{
105 return { 79 {
106 .dstBinding = 0, 80 .binding = ASTC_BINDING_INPUT_BUFFER,
107 .dstArrayElement = 0,
108 .descriptorCount = 2,
109 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 81 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
110 .offset = 0, 82 .descriptorCount = 1,
111 .stride = sizeof(DescriptorUpdateEntry), 83 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
112 }; 84 .pImmutableSamplers = nullptr,
113} 85 },
86 {
87 .binding = ASTC_BINDING_ENC_BUFFER,
88 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
89 .descriptorCount = 1,
90 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
91 .pImmutableSamplers = nullptr,
92 },
93 {
94 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
95 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
96 .descriptorCount = 1,
97 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98 .pImmutableSamplers = nullptr,
99 },
100 {
101 .binding = ASTC_BINDING_OUTPUT_IMAGE,
102 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
103 .descriptorCount = 1,
104 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
105 .pImmutableSamplers = nullptr,
106 },
107}};
108
109constexpr DescriptorBankInfo ASTC_BANK_INFO{
110 .uniform_buffers = 0,
111 .storage_buffers = 3,
112 .texture_buffers = 0,
113 .image_buffers = 0,
114 .textures = 0,
115 .images = 1,
116 .score = 4,
117};
114 118
115std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> 119constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
116BuildASTCPassDescriptorUpdateTemplateEntry() { 120 .dstBinding = 0,
117 return {{ 121 .dstArrayElement = 0,
122 .descriptorCount = 2,
123 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
124 .offset = 0,
125 .stride = sizeof(DescriptorUpdateEntry),
126};
127
128constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
129 ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{
118 { 130 {
119 .dstBinding = ASTC_BINDING_INPUT_BUFFER, 131 .dstBinding = ASTC_BINDING_INPUT_BUFFER,
120 .dstArrayElement = 0, 132 .dstArrayElement = 0,
@@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() {
148 .stride = sizeof(DescriptorUpdateEntry), 160 .stride = sizeof(DescriptorUpdateEntry),
149 }, 161 },
150 }}; 162 }};
151}
152 163
153struct AstcPushConstants { 164struct AstcPushConstants {
154 std::array<u32, 2> blocks_dims; 165 std::array<u32, 2> blocks_dims;
@@ -159,14 +170,14 @@ struct AstcPushConstants {
159 u32 block_height; 170 u32 block_height;
160 u32 block_height_mask; 171 u32 block_height_mask;
161}; 172};
162
163} // Anonymous namespace 173} // Anonymous namespace
164 174
165VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 175ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool,
166 vk::Span<VkDescriptorSetLayoutBinding> bindings, 176 vk::Span<VkDescriptorSetLayoutBinding> bindings,
167 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 177 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
168 vk::Span<VkPushConstantRange> push_constants, 178 const DescriptorBankInfo& bank_info,
169 std::span<const u32> code) { 179 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code)
180 : device{device_} {
170 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ 181 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
171 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 182 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
172 .pNext = nullptr, 183 .pNext = nullptr,
@@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
196 .pipelineLayout = *layout, 207 .pipelineLayout = *layout,
197 .set = 0, 208 .set = 0,
198 }); 209 });
199 210 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info);
200 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
201 } 211 }
202 module = device.GetLogical().CreateShaderModule({ 212 module = device.GetLogical().CreateShaderModule({
203 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 213 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_
206 .codeSize = static_cast<u32>(code.size_bytes()), 216 .codeSize = static_cast<u32>(code.size_bytes()),
207 .pCode = code.data(), 217 .pCode = code.data(),
208 }); 218 });
219 device.SaveShader(code);
209 pipeline = device.GetLogical().CreateComputePipeline({ 220 pipeline = device.GetLogical().CreateComputePipeline({
210 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 221 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
211 .pNext = nullptr, 222 .pNext = nullptr,
212 .flags = 0, 223 .flags = 0,
213 .stage = 224 .stage{
214 { 225 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
215 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 226 .pNext = nullptr,
216 .pNext = nullptr, 227 .flags = 0,
217 .flags = 0, 228 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 229 .module = *module,
219 .module = *module, 230 .pName = "main",
220 .pName = "main", 231 .pSpecializationInfo = nullptr,
221 .pSpecializationInfo = nullptr, 232 },
222 },
223 .layout = *layout, 233 .layout = *layout,
224 .basePipelineHandle = nullptr, 234 .basePipelineHandle = nullptr,
225 .basePipelineIndex = 0, 235 .basePipelineIndex = 0,
226 }); 236 });
227} 237}
228 238
229VKComputePass::~VKComputePass() = default; 239ComputePass::~ComputePass() = default;
230 240
231VkDescriptorSet VKComputePass::CommitDescriptorSet( 241Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_,
232 VKUpdateDescriptorQueue& update_descriptor_queue) { 242 DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
233 if (!descriptor_template) {
234 return nullptr;
235 }
236 const VkDescriptorSet set = descriptor_allocator->Commit();
237 update_descriptor_queue.Send(*descriptor_template, set);
238 return set;
239}
240
241Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_,
242 VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_,
243 VKUpdateDescriptorQueue& update_descriptor_queue_) 243 VKUpdateDescriptorQueue& update_descriptor_queue_)
244 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), 244 : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
245 BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), 245 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {},
246 VULKAN_UINT8_COMP_SPV),
246 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 247 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
247 update_descriptor_queue{update_descriptor_queue_} {} 248 update_descriptor_queue{update_descriptor_queue_} {}
248 249
@@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
256 update_descriptor_queue.Acquire(); 257 update_descriptor_queue.Acquire();
257 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); 258 update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
258 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); 259 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
259 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 260 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
261 const VkBuffer buffer{staging.buffer};
260 262
261 scheduler.RequestOutsideRenderPassOperationContext(); 263 scheduler.RequestOutsideRenderPassOperationContext();
262 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 264 scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) {
263 num_vertices](vk::CommandBuffer cmdbuf) {
264 static constexpr u32 DISPATCH_SIZE = 1024; 265 static constexpr u32 DISPATCH_SIZE = 1024;
265 static constexpr VkMemoryBarrier WRITE_BARRIER{ 266 static constexpr VkMemoryBarrier WRITE_BARRIER{
266 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 267 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
268 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 269 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
269 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 270 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
270 }; 271 };
271 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 272 const VkDescriptorSet set = descriptor_allocator.Commit();
272 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 273 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
274 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
275 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
273 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); 276 cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1);
274 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 277 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
275 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); 278 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER);
@@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
278} 281}
279 282
280QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 283QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
281 VKDescriptorPool& descriptor_pool_, 284 DescriptorPool& descriptor_pool_,
282 StagingBufferPool& staging_buffer_pool_, 285 StagingBufferPool& staging_buffer_pool_,
283 VKUpdateDescriptorQueue& update_descriptor_queue_) 286 VKUpdateDescriptorQueue& update_descriptor_queue_)
284 : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), 287 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
285 BuildInputOutputDescriptorUpdateTemplate(), 288 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO,
286 BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), 289 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV),
287 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, 290 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
288 update_descriptor_queue{update_descriptor_queue_} {} 291 update_descriptor_queue{update_descriptor_queue_} {}
289 292
@@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
313 update_descriptor_queue.Acquire(); 316 update_descriptor_queue.Acquire();
314 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); 317 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
315 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); 318 update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
316 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); 319 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
317 320
318 scheduler.RequestOutsideRenderPassOperationContext(); 321 scheduler.RequestOutsideRenderPassOperationContext();
319 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, 322 scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex,
320 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { 323 index_shift](vk::CommandBuffer cmdbuf) {
321 static constexpr u32 DISPATCH_SIZE = 1024; 324 static constexpr u32 DISPATCH_SIZE = 1024;
322 static constexpr VkMemoryBarrier WRITE_BARRIER{ 325 static constexpr VkMemoryBarrier WRITE_BARRIER{
323 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 326 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
@@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
325 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, 328 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
326 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, 329 .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
327 }; 330 };
328 const std::array push_constants = {base_vertex, index_shift}; 331 const std::array push_constants{base_vertex, index_shift};
329 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); 332 const VkDescriptorSet set = descriptor_allocator.Commit();
330 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); 333 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
331 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), 334 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
335 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
336 cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
332 &push_constants); 337 &push_constants);
333 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); 338 cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1);
334 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 339 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
@@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
338} 343}
339 344
340ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, 345ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
341 VKDescriptorPool& descriptor_pool_, 346 DescriptorPool& descriptor_pool_,
342 StagingBufferPool& staging_buffer_pool_, 347 StagingBufferPool& staging_buffer_pool_,
343 VKUpdateDescriptorQueue& update_descriptor_queue_, 348 VKUpdateDescriptorQueue& update_descriptor_queue_,
344 MemoryAllocator& memory_allocator_) 349 MemoryAllocator& memory_allocator_)
345 : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), 350 : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS,
346 BuildASTCPassDescriptorUpdateTemplateEntry(), 351 ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO,
347 BuildComputePushConstantRange(sizeof(AstcPushConstants)), 352 COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV),
348 ASTC_DECODER_COMP_SPV), 353 scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
349 device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
350 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} 354 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
351 355
352ASTCDecoderPass::~ASTCDecoderPass() = default; 356ASTCDecoderPass::~ASTCDecoderPass() = default;
@@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
444 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), 448 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES),
445 sizeof(SWIZZLE_TABLE)); 449 sizeof(SWIZZLE_TABLE));
446 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); 450 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
447 451 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
448 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
449 const VkPipelineLayout vk_layout = *layout;
450 452
451 // To unswizzle the ASTC data 453 // To unswizzle the ASTC data
452 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); 454 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
453 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 455 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
454 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 456 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
455 scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, 457 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
456 block_dims, params, set](vk::CommandBuffer cmdbuf) { 458 params, descriptor_data](vk::CommandBuffer cmdbuf) {
457 const AstcPushConstants uniforms{ 459 const AstcPushConstants uniforms{
458 .blocks_dims = block_dims, 460 .blocks_dims = block_dims,
459 .bytes_per_block_log2 = params.bytes_per_block_log2, 461 .bytes_per_block_log2 = params.bytes_per_block_log2,
@@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
463 .block_height = params.block_height, 465 .block_height = params.block_height,
464 .block_height_mask = params.block_height_mask, 466 .block_height_mask = params.block_height_mask,
465 }; 467 };
466 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); 468 const VkDescriptorSet set = descriptor_allocator.Commit();
467 cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); 469 device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data);
470 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {});
471 cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
468 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); 472 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
469 }); 473 });
470 } 474 }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 5ea187c30..114aef2bd 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -4,7 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <optional>
8#include <span> 7#include <span>
9#include <utility> 8#include <utility>
10 9
@@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue;
27class Image; 26class Image;
28struct StagingBufferRef; 27struct StagingBufferRef;
29 28
30class VKComputePass { 29class ComputePass {
31public: 30public:
32 explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 31 explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool,
33 vk::Span<VkDescriptorSetLayoutBinding> bindings, 32 vk::Span<VkDescriptorSetLayoutBinding> bindings,
34 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, 33 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); 34 const DescriptorBankInfo& bank_info,
36 ~VKComputePass(); 35 vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
36 ~ComputePass();
37 37
38protected: 38protected:
39 VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); 39 const Device& device;
40
41 vk::DescriptorUpdateTemplateKHR descriptor_template; 40 vk::DescriptorUpdateTemplateKHR descriptor_template;
42 vk::PipelineLayout layout; 41 vk::PipelineLayout layout;
43 vk::Pipeline pipeline; 42 vk::Pipeline pipeline;
43 vk::DescriptorSetLayout descriptor_set_layout;
44 DescriptorAllocator descriptor_allocator;
44 45
45private: 46private:
46 vk::DescriptorSetLayout descriptor_set_layout;
47 std::optional<DescriptorAllocator> descriptor_allocator;
48 vk::ShaderModule module; 47 vk::ShaderModule module;
49}; 48};
50 49
51class Uint8Pass final : public VKComputePass { 50class Uint8Pass final : public ComputePass {
52public: 51public:
53 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, 52 explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_,
54 VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, 53 DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_,
55 VKUpdateDescriptorQueue& update_descriptor_queue_); 54 VKUpdateDescriptorQueue& update_descriptor_queue_);
56 ~Uint8Pass(); 55 ~Uint8Pass();
57 56
@@ -66,10 +65,10 @@ private:
66 VKUpdateDescriptorQueue& update_descriptor_queue; 65 VKUpdateDescriptorQueue& update_descriptor_queue;
67}; 66};
68 67
69class QuadIndexedPass final : public VKComputePass { 68class QuadIndexedPass final : public ComputePass {
70public: 69public:
71 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, 70 explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_,
72 VKDescriptorPool& descriptor_pool_, 71 DescriptorPool& descriptor_pool_,
73 StagingBufferPool& staging_buffer_pool_, 72 StagingBufferPool& staging_buffer_pool_,
74 VKUpdateDescriptorQueue& update_descriptor_queue_); 73 VKUpdateDescriptorQueue& update_descriptor_queue_);
75 ~QuadIndexedPass(); 74 ~QuadIndexedPass();
@@ -84,10 +83,10 @@ private:
84 VKUpdateDescriptorQueue& update_descriptor_queue; 83 VKUpdateDescriptorQueue& update_descriptor_queue;
85}; 84};
86 85
87class ASTCDecoderPass final : public VKComputePass { 86class ASTCDecoderPass final : public ComputePass {
88public: 87public:
89 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, 88 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
90 VKDescriptorPool& descriptor_pool_, 89 DescriptorPool& descriptor_pool_,
91 StagingBufferPool& staging_buffer_pool_, 90 StagingBufferPool& staging_buffer_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_, 91 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 MemoryAllocator& memory_allocator_); 92 MemoryAllocator& memory_allocator_);
@@ -99,7 +98,6 @@ public:
99private: 98private:
100 void MakeDataBuffer(); 99 void MakeDataBuffer();
101 100
102 const Device& device;
103 VKScheduler& scheduler; 101 VKScheduler& scheduler;
104 StagingBufferPool& staging_buffer_pool; 102 StagingBufferPool& staging_buffer_pool;
105 VKUpdateDescriptorQueue& update_descriptor_queue; 103 VKUpdateDescriptorQueue& update_descriptor_queue;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 3a48219b7..70b84c7a6 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -2,152 +2,198 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <vector> 6#include <vector>
6 7
8#include <boost/container/small_vector.hpp>
9
10#include "video_core/renderer_vulkan/pipeline_helper.h"
11#include "video_core/renderer_vulkan/vk_buffer_cache.h"
7#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 12#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 14#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
11#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
12#include "video_core/renderer_vulkan/vk_update_descriptor.h" 16#include "video_core/renderer_vulkan/vk_update_descriptor.h"
17#include "video_core/shader_notify.h"
13#include "video_core/vulkan_common/vulkan_device.h" 18#include "video_core/vulkan_common/vulkan_device.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
15 20
16namespace Vulkan { 21namespace Vulkan {
17 22
18VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 23using Shader::ImageBufferDescriptor;
19 VKDescriptorPool& descriptor_pool_, 24using Tegra::Texture::TexturePair;
20 VKUpdateDescriptorQueue& update_descriptor_queue_, 25
21 const SPIRVShader& shader_) 26ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
22 : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, 27 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 descriptor_set_layout{CreateDescriptorSetLayout()}, 28 Common::ThreadWorker* thread_worker,
24 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 29 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
25 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, 30 vk::ShaderModule spv_module_)
26 descriptor_template{CreateDescriptorUpdateTemplate()}, 31 : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_},
27 shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} 32 spv_module(std::move(spv_module_)) {
28 33 if (shader_notify) {
29VKComputePipeline::~VKComputePipeline() = default; 34 shader_notify->MarkShaderBuilding();
30
31VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
32 if (!descriptor_template) {
33 return {};
34 }
35 const VkDescriptorSet set = descriptor_allocator.Commit();
36 update_descriptor_queue.Send(*descriptor_template, set);
37 return set;
38}
39
40vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
41 std::vector<VkDescriptorSetLayoutBinding> bindings;
42 u32 binding = 0;
43 const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
44 // TODO(Rodrigo): Maybe make individual bindings here?
45 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
46 bindings.push_back({
47 .binding = binding++,
48 .descriptorType = descriptor_type,
49 .descriptorCount = 1,
50 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
51 .pImmutableSamplers = nullptr,
52 });
53 }
54 };
55 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
56 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
57 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
58 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
59 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
60 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
61
62 return device.GetLogical().CreateDescriptorSetLayout({
63 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
64 .pNext = nullptr,
65 .flags = 0,
66 .bindingCount = static_cast<u32>(bindings.size()),
67 .pBindings = bindings.data(),
68 });
69}
70
71vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
72 return device.GetLogical().CreatePipelineLayout({
73 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
74 .pNext = nullptr,
75 .flags = 0,
76 .setLayoutCount = 1,
77 .pSetLayouts = descriptor_set_layout.address(),
78 .pushConstantRangeCount = 0,
79 .pPushConstantRanges = nullptr,
80 });
81}
82
83vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
84 std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
85 u32 binding = 0;
86 u32 offset = 0;
87 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
88 if (template_entries.empty()) {
89 // If the shader doesn't use descriptor sets, skip template creation.
90 return {};
91 } 35 }
92 36 std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
93 return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ 37 uniform_buffer_sizes.begin());
94 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 38
95 .pNext = nullptr, 39 auto func{[this, &descriptor_pool, shader_notify] {
96 .flags = 0, 40 DescriptorLayoutBuilder builder{device};
97 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), 41 builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
98 .pDescriptorUpdateEntries = template_entries.data(), 42
99 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, 43 descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
100 .descriptorSetLayout = *descriptor_set_layout, 44 pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
101 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 45 descriptor_update_template =
102 .pipelineLayout = *layout, 46 builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
103 .set = DESCRIPTOR_SET, 47 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
104 }); 48 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
105} 49 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
106 50 .pNext = nullptr,
107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { 51 .requiredSubgroupSize = GuestWarpSize,
108 device.SaveShader(code); 52 };
109 53 pipeline = device.GetLogical().CreateComputePipeline({
110 return device.GetLogical().CreateShaderModule({ 54 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
111 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 55 .pNext = nullptr,
112 .pNext = nullptr, 56 .flags = 0,
113 .flags = 0, 57 .stage{
114 .codeSize = code.size() * sizeof(u32),
115 .pCode = code.data(),
116 });
117}
118
119vk::Pipeline VKComputePipeline::CreatePipeline() const {
120
121 VkComputePipelineCreateInfo ci{
122 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
123 .pNext = nullptr,
124 .flags = 0,
125 .stage =
126 {
127 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 58 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128 .pNext = nullptr, 59 .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
129 .flags = 0, 60 .flags = 0,
130 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 61 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
131 .module = *shader_module, 62 .module = *spv_module,
132 .pName = "main", 63 .pName = "main",
133 .pSpecializationInfo = nullptr, 64 .pSpecializationInfo = nullptr,
134 }, 65 },
135 .layout = *layout, 66 .layout = *pipeline_layout,
136 .basePipelineHandle = nullptr, 67 .basePipelineHandle = 0,
137 .basePipelineIndex = 0, 68 .basePipelineIndex = 0,
138 }; 69 });
139 70 std::lock_guard lock{build_mutex};
140 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ 71 is_built = true;
141 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, 72 build_condvar.notify_one();
142 .pNext = nullptr, 73 if (shader_notify) {
143 .requiredSubgroupSize = GuestWarpSize, 74 shader_notify->MarkShaderComplete();
144 }; 75 }
145 76 }};
146 if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { 77 if (thread_worker) {
147 ci.stage.pNext = &subgroup_size_ci; 78 thread_worker->QueueWork(std::move(func));
79 } else {
80 func();
81 }
82}
83
84void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
85 Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler,
86 BufferCache& buffer_cache, TextureCache& texture_cache) {
87 update_descriptor_queue.Acquire();
88
89 buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
90 buffer_cache.UnbindComputeStorageBuffers();
91 size_t ssbo_index{};
92 for (const auto& desc : info.storage_buffers_descriptors) {
93 ASSERT(desc.count == 1);
94 buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
95 desc.is_written);
96 ++ssbo_index;
148 } 97 }
149 98
150 return device.GetLogical().CreateComputePipeline(ci); 99 texture_cache.SynchronizeComputeDescriptors();
100
101 static constexpr size_t max_elements = 64;
102 std::array<ImageId, max_elements> image_view_ids;
103 boost::container::static_vector<u32, max_elements> image_view_indices;
104 boost::container::static_vector<VkSampler, max_elements> samplers;
105
106 const auto& qmd{kepler_compute.launch_description};
107 const auto& cbufs{qmd.const_buffer_config};
108 const bool via_header_index{qmd.linked_tsc != 0};
109 const auto read_handle{[&](const auto& desc, u32 index) {
110 ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
111 const u32 index_offset{index << desc.size_shift};
112 const u32 offset{desc.cbuf_offset + index_offset};
113 const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
114 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
115 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
116 if (desc.has_secondary) {
117 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
118 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
119 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
120 secondary_offset};
121 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
122 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
123 return TexturePair(lhs_raw | rhs_raw, via_header_index);
124 }
125 }
126 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
127 }};
128 const auto add_image{[&](const auto& desc) {
129 for (u32 index = 0; index < desc.count; ++index) {
130 const auto handle{read_handle(desc, index)};
131 image_view_indices.push_back(handle.first);
132 }
133 }};
134 std::ranges::for_each(info.texture_buffer_descriptors, add_image);
135 std::ranges::for_each(info.image_buffer_descriptors, add_image);
136 for (const auto& desc : info.texture_descriptors) {
137 for (u32 index = 0; index < desc.count; ++index) {
138 const auto handle{read_handle(desc, index)};
139 image_view_indices.push_back(handle.first);
140
141 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
142 samplers.push_back(sampler->Handle());
143 }
144 }
145 std::ranges::for_each(info.image_descriptors, add_image);
146
147 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
148 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
149
150 buffer_cache.UnbindComputeTextureBuffers();
151 ImageId* texture_buffer_ids{image_view_ids.data()};
152 size_t index{};
153 const auto add_buffer{[&](const auto& desc) {
154 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
155 for (u32 i = 0; i < desc.count; ++i) {
156 bool is_written{false};
157 if constexpr (is_image) {
158 is_written = desc.is_written;
159 }
160 ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
161 buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
162 image_view.BufferSize(), image_view.format,
163 is_written, is_image);
164 ++texture_buffer_ids;
165 ++index;
166 }
167 }};
168 std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
169 std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
170
171 buffer_cache.UpdateComputeBuffers();
172 buffer_cache.BindHostComputeBuffers();
173
174 const VkSampler* samplers_it{samplers.data()};
175 const ImageId* views_it{image_view_ids.data()};
176 PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue);
177
178 if (!is_built.load(std::memory_order::relaxed)) {
179 // Wait for the pipeline to be built
180 scheduler.Record([this](vk::CommandBuffer) {
181 std::unique_lock lock{build_mutex};
182 build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
183 });
184 }
185 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
186 scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
187 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
188 if (!descriptor_set_layout) {
189 return;
190 }
191 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
192 const vk::Device& dev{device.GetLogical()};
193 dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
194 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
195 descriptor_set, nullptr);
196 });
151} 197}
152 198
153} // namespace Vulkan 199} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 7e16575ac..52fec04d3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -4,61 +4,63 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
8#include <condition_variable>
9#include <mutex>
10
7#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/thread_worker.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/memory_manager.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h"
8#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 16#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
9#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 17#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
11 20
21namespace VideoCore {
22class ShaderNotify;
23}
24
12namespace Vulkan { 25namespace Vulkan {
13 26
14class Device; 27class Device;
15class VKScheduler; 28class VKScheduler;
16class VKUpdateDescriptorQueue;
17 29
18class VKComputePipeline final { 30class ComputePipeline {
19public: 31public:
20 explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, 32 explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool,
21 VKDescriptorPool& descriptor_pool_, 33 VKUpdateDescriptorQueue& update_descriptor_queue,
22 VKUpdateDescriptorQueue& update_descriptor_queue_, 34 Common::ThreadWorker* thread_worker,
23 const SPIRVShader& shader_); 35 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info,
24 ~VKComputePipeline(); 36 vk::ShaderModule spv_module);
25
26 VkDescriptorSet CommitDescriptorSet();
27 37
28 VkPipeline GetHandle() const { 38 ComputePipeline& operator=(ComputePipeline&&) noexcept = delete;
29 return *pipeline; 39 ComputePipeline(ComputePipeline&&) noexcept = delete;
30 }
31 40
32 VkPipelineLayout GetLayout() const { 41 ComputePipeline& operator=(const ComputePipeline&) = delete;
33 return *layout; 42 ComputePipeline(const ComputePipeline&) = delete;
34 }
35 43
36 const ShaderEntries& GetEntries() const { 44 void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory,
37 return entries; 45 VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache);
38 }
39 46
40private: 47private:
41 vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
42
43 vk::PipelineLayout CreatePipelineLayout() const;
44
45 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
46
47 vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
48
49 vk::Pipeline CreatePipeline() const;
50
51 const Device& device; 48 const Device& device;
52 VKScheduler& scheduler; 49 VKUpdateDescriptorQueue& update_descriptor_queue;
53 ShaderEntries entries; 50 Shader::Info info;
54 51
52 VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{};
53
54 vk::ShaderModule spv_module;
55 vk::DescriptorSetLayout descriptor_set_layout; 55 vk::DescriptorSetLayout descriptor_set_layout;
56 DescriptorAllocator descriptor_allocator; 56 DescriptorAllocator descriptor_allocator;
57 VKUpdateDescriptorQueue& update_descriptor_queue; 57 vk::PipelineLayout pipeline_layout;
58 vk::PipelineLayout layout; 58 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
59 vk::DescriptorUpdateTemplateKHR descriptor_template;
60 vk::ShaderModule shader_module;
61 vk::Pipeline pipeline; 59 vk::Pipeline pipeline;
60
61 std::condition_variable build_condvar;
62 std::mutex build_mutex;
63 std::atomic_bool is_built{false};
62}; 64};
63 65
64} // namespace Vulkan 66} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ef9fb5910..8e77e4796 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex>
6#include <span>
5#include <vector> 7#include <vector>
6 8
7#include "common/common_types.h" 9#include "common/common_types.h"
@@ -13,79 +15,149 @@
13 15
14namespace Vulkan { 16namespace Vulkan {
15 17
16// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. 18// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
17constexpr std::size_t SETS_GROW_RATE = 0x20; 19constexpr size_t SETS_GROW_RATE = 16;
20constexpr s32 SCORE_THRESHOLD = 3;
21constexpr u32 SETS_PER_POOL = 64;
18 22
19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, 23struct DescriptorBank {
20 VkDescriptorSetLayout layout_) 24 DescriptorBankInfo info;
21 : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), 25 std::vector<vk::DescriptorPool> pools;
22 descriptor_pool{descriptor_pool_}, layout{layout_} {} 26};
23 27
24DescriptorAllocator::~DescriptorAllocator() = default; 28bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
29 return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
30 texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
31 textures >= subset.textures && images >= subset.image_buffers;
32}
25 33
26VkDescriptorSet DescriptorAllocator::Commit() { 34template <typename Descriptors>
27 const std::size_t index = CommitResource(); 35static u32 Accumulate(const Descriptors& descriptors) {
28 return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; 36 u32 count = 0;
37 for (const auto& descriptor : descriptors) {
38 count += descriptor.count;
39 }
40 return count;
29} 41}
30 42
31void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { 43static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
32 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); 44 DescriptorBankInfo bank;
45 for (const Shader::Info& info : infos) {
46 bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
47 bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
48 bank.texture_buffers += Accumulate(info.texture_buffer_descriptors);
49 bank.image_buffers += Accumulate(info.image_buffer_descriptors);
50 bank.textures += Accumulate(info.texture_descriptors);
51 bank.images += Accumulate(info.image_descriptors);
52 }
53 bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
54 bank.image_buffers + bank.textures + bank.images;
55 return bank;
33} 56}
34 57
35VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) 58static void AllocatePool(const Device& device, DescriptorBank& bank) {
36 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ 59 std::array<VkDescriptorPoolSize, 6> pool_sizes;
37 AllocateNewPool()} {} 60 size_t pool_cursor{};
38 61 const auto add = [&](VkDescriptorType type, u32 count) {
39VKDescriptorPool::~VKDescriptorPool() = default; 62 if (count > 0) {
40 63 pool_sizes[pool_cursor++] = {
41vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { 64 .type = type,
42 static constexpr u32 num_sets = 0x20000; 65 .descriptorCount = count * SETS_PER_POOL,
43 static constexpr VkDescriptorPoolSize pool_sizes[] = { 66 };
44 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, 67 }
45 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
46 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
47 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
48 {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
49 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40},
50 }; 68 };
51 69 const auto& info{bank.info};
52 const VkDescriptorPoolCreateInfo ci{ 70 add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers);
71 add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers);
72 add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers);
73 add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers);
74 add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures);
75 add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images);
76 bank.pools.push_back(device.GetLogical().CreateDescriptorPool({
53 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 77 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
54 .pNext = nullptr, 78 .pNext = nullptr,
55 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 79 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
56 .maxSets = num_sets, 80 .maxSets = SETS_PER_POOL,
57 .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), 81 .poolSizeCount = static_cast<u32>(pool_cursor),
58 .pPoolSizes = std::data(pool_sizes), 82 .pPoolSizes = std::data(pool_sizes),
59 }; 83 }));
60 return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); 84}
85
86DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
87 DescriptorBank& bank_, VkDescriptorSetLayout layout_)
88 : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_},
89 layout{layout_} {}
90
91VkDescriptorSet DescriptorAllocator::Commit() {
92 const size_t index = CommitResource();
93 return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
61} 94}
62 95
63vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, 96void DescriptorAllocator::Allocate(size_t begin, size_t end) {
64 std::size_t count) { 97 sets.push_back(AllocateDescriptors(end - begin));
65 const std::vector layout_copies(count, layout); 98}
66 VkDescriptorSetAllocateInfo ai{ 99
100vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
101 const std::vector<VkDescriptorSetLayout> layouts(count, layout);
102 VkDescriptorSetAllocateInfo allocate_info{
67 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 103 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
68 .pNext = nullptr, 104 .pNext = nullptr,
69 .descriptorPool = **active_pool, 105 .descriptorPool = *bank->pools.back(),
70 .descriptorSetCount = static_cast<u32>(count), 106 .descriptorSetCount = static_cast<u32>(count),
71 .pSetLayouts = layout_copies.data(), 107 .pSetLayouts = layouts.data(),
72 }; 108 };
73 109 vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
74 vk::DescriptorSets sets = active_pool->Allocate(ai); 110 if (!new_sets.IsOutOfPoolMemory()) {
75 if (!sets.IsOutOfPoolMemory()) { 111 return new_sets;
76 return sets;
77 } 112 }
78
79 // Our current pool is out of memory. Allocate a new one and retry 113 // Our current pool is out of memory. Allocate a new one and retry
80 active_pool = AllocateNewPool(); 114 AllocatePool(*device, *bank);
81 ai.descriptorPool = **active_pool; 115 allocate_info.descriptorPool = *bank->pools.back();
82 sets = active_pool->Allocate(ai); 116 new_sets = bank->pools.back().Allocate(allocate_info);
83 if (!sets.IsOutOfPoolMemory()) { 117 if (!new_sets.IsOutOfPoolMemory()) {
84 return sets; 118 return new_sets;
85 } 119 }
86
87 // After allocating a new pool, we are out of memory again. We can't handle this from here. 120 // After allocating a new pool, we are out of memory again. We can't handle this from here.
88 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); 121 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
89} 122}
90 123
124DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler)
125 : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {}
126
127DescriptorPool::~DescriptorPool() = default;
128
129DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
130 std::span<const Shader::Info> infos) {
131 return Allocator(layout, MakeBankInfo(infos));
132}
133
134DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
135 const Shader::Info& info) {
136 return Allocator(layout, MakeBankInfo(std::array{info}));
137}
138
139DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
140 const DescriptorBankInfo& info) {
141 return DescriptorAllocator(device, master_semaphore, Bank(info), layout);
142}
143
144DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
145 std::shared_lock read_lock{banks_mutex};
146 const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
147 return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
148 });
149 if (it != bank_infos.end()) {
150 return *banks[std::distance(bank_infos.begin(), it)].get();
151 }
152 read_lock.unlock();
153
154 std::unique_lock write_lock{banks_mutex};
155 bank_infos.push_back(reqs);
156
157 auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
158 bank.info = reqs;
159 AllocatePool(device, bank);
160 return bank;
161}
162
91} // namespace Vulkan 163} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index f892be7be..59466aac5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -4,57 +4,85 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <shared_mutex>
8#include <span>
7#include <vector> 9#include <vector>
8 10
11#include "shader_recompiler/shader_info.h"
9#include "video_core/renderer_vulkan/vk_resource_pool.h" 12#include "video_core/renderer_vulkan/vk_resource_pool.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
11 14
12namespace Vulkan { 15namespace Vulkan {
13 16
14class Device; 17class Device;
15class VKDescriptorPool;
16class VKScheduler; 18class VKScheduler;
17 19
20struct DescriptorBank;
21
22struct DescriptorBankInfo {
23 [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept;
24
25 u32 uniform_buffers{}; ///< Number of uniform buffer descriptors
26 u32 storage_buffers{}; ///< Number of storage buffer descriptors
27 u32 texture_buffers{}; ///< Number of texture buffer descriptors
28 u32 image_buffers{}; ///< Number of image buffer descriptors
29 u32 textures{}; ///< Number of texture descriptors
30 u32 images{}; ///< Number of image descriptors
31 s32 score{}; ///< Number of descriptors in total
32};
33
18class DescriptorAllocator final : public ResourcePool { 34class DescriptorAllocator final : public ResourcePool {
35 friend class DescriptorPool;
36
19public: 37public:
20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); 38 explicit DescriptorAllocator() = default;
21 ~DescriptorAllocator() override; 39 ~DescriptorAllocator() override = default;
40
41 DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default;
42 DescriptorAllocator(DescriptorAllocator&&) noexcept = default;
22 43
23 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; 44 DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
24 DescriptorAllocator(const DescriptorAllocator&) = delete; 45 DescriptorAllocator(const DescriptorAllocator&) = delete;
25 46
26 VkDescriptorSet Commit(); 47 VkDescriptorSet Commit();
27 48
28protected:
29 void Allocate(std::size_t begin, std::size_t end) override;
30
31private: 49private:
32 VKDescriptorPool& descriptor_pool; 50 explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_,
33 const VkDescriptorSetLayout layout; 51 DescriptorBank& bank_, VkDescriptorSetLayout layout_);
34 52
35 std::vector<vk::DescriptorSets> descriptors_allocations; 53 void Allocate(size_t begin, size_t end) override;
36}; 54
55 vk::DescriptorSets AllocateDescriptors(size_t count);
56
57 const Device* device{};
58 DescriptorBank* bank{};
59 VkDescriptorSetLayout layout{};
37 60
38class VKDescriptorPool final { 61 std::vector<vk::DescriptorSets> sets;
39 friend DescriptorAllocator; 62};
40 63
64class DescriptorPool {
41public: 65public:
42 explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); 66 explicit DescriptorPool(const Device& device, VKScheduler& scheduler);
43 ~VKDescriptorPool(); 67 ~DescriptorPool();
44 68
45 VKDescriptorPool(const VKDescriptorPool&) = delete; 69 DescriptorPool& operator=(const DescriptorPool&) = delete;
46 VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; 70 DescriptorPool(const DescriptorPool&) = delete;
47 71
48private: 72 DescriptorAllocator Allocator(VkDescriptorSetLayout layout,
49 vk::DescriptorPool* AllocateNewPool(); 73 std::span<const Shader::Info> infos);
74 DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info);
75 DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info);
50 76
51 vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); 77private:
78 DescriptorBank& Bank(const DescriptorBankInfo& reqs);
52 79
53 const Device& device; 80 const Device& device;
54 MasterSemaphore& master_semaphore; 81 MasterSemaphore& master_semaphore;
55 82
56 std::vector<vk::DescriptorPool> pools; 83 std::shared_mutex banks_mutex;
57 vk::DescriptorPool* active_pool; 84 std::vector<DescriptorBankInfo> bank_infos;
85 std::vector<std::unique_ptr<DescriptorBank>> banks;
58}; 86};
59 87
60} // namespace Vulkan \ No newline at end of file 88} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index fc6dd83eb..18482e1d0 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -1,29 +1,58 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <span>
7#include <cstring>
8#include <vector>
9 7
10#include "common/common_types.h" 8#include <boost/container/small_vector.hpp>
11#include "common/microprofile.h" 9#include <boost/container/static_vector.hpp>
12#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 10
11#include "common/bit_field.h"
13#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/pipeline_helper.h"
14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 15#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
16#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 16#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 17#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_texture_cache.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h" 19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/shader_notify.h"
19#include "video_core/vulkan_common/vulkan_device.h" 21#include "video_core/vulkan_common/vulkan_device.h"
20#include "video_core/vulkan_common/vulkan_wrapper.h"
21
22namespace Vulkan {
23 22
24MICROPROFILE_DECLARE(Vulkan_PipelineCache); 23#if defined(_MSC_VER) && defined(NDEBUG)
24#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
25#else
26#define LAMBDA_FORCEINLINE
27#endif
25 28
29namespace Vulkan {
26namespace { 30namespace {
31using boost::container::small_vector;
32using boost::container::static_vector;
33using Shader::ImageBufferDescriptor;
34using Tegra::Texture::TexturePair;
35using VideoCore::Surface::PixelFormat;
36using VideoCore::Surface::PixelFormatFromDepthFormat;
37using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
38
39constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
40constexpr size_t MAX_IMAGE_ELEMENTS = 64;
41
42DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
43 DescriptorLayoutBuilder builder{device};
44 for (size_t index = 0; index < infos.size(); ++index) {
45 static constexpr std::array stages{
46 VK_SHADER_STAGE_VERTEX_BIT,
47 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
48 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
49 VK_SHADER_STAGE_GEOMETRY_BIT,
50 VK_SHADER_STAGE_FRAGMENT_BIT,
51 };
52 builder.Add(infos[index], stages.at(index));
53 }
54 return builder;
55}
27 56
28template <class StencilFace> 57template <class StencilFace>
29VkStencilOpState GetStencilFaceState(const StencilFace& face) { 58VkStencilOpState GetStencilFaceState(const StencilFace& face) {
@@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) {
39} 68}
40 69
41bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { 70bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
42 static constexpr std::array unsupported_topologies = { 71 static constexpr std::array unsupported_topologies{
43 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 72 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
44 VK_PRIMITIVE_TOPOLOGY_LINE_LIST, 73 VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
45 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 74 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
46 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, 75 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
47 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, 76 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
48 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; 77 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
49 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), 78 // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT,
50 topology) == std::end(unsupported_topologies); 79 };
80 return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end();
81}
82
83bool IsLine(VkPrimitiveTopology topology) {
84 static constexpr std::array line_topologies{
85 VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
86 // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT,
87 };
88 return std::ranges::find(line_topologies, topology) == line_topologies.end();
51} 89}
52 90
53VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { 91VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
@@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
59 BitField<12, 3, Maxwell::ViewportSwizzle> w; 97 BitField<12, 3, Maxwell::ViewportSwizzle> w;
60 }; 98 };
61 const Swizzle unpacked{swizzle}; 99 const Swizzle unpacked{swizzle};
62 100 return VkViewportSwizzleNV{
63 return {
64 .x = MaxwellToVK::ViewportSwizzle(unpacked.x), 101 .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
65 .y = MaxwellToVK::ViewportSwizzle(unpacked.y), 102 .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
66 .z = MaxwellToVK::ViewportSwizzle(unpacked.z), 103 .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
@@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
68 }; 105 };
69} 106}
70 107
71VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { 108PixelFormat DecodeFormat(u8 encoded_format) {
72 switch (msaa_mode) { 109 const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)};
73 case Tegra::Texture::MsaaMode::Msaa1x1: 110 if (format == Tegra::RenderTargetFormat::NONE) {
74 return VK_SAMPLE_COUNT_1_BIT; 111 return PixelFormat::Invalid;
75 case Tegra::Texture::MsaaMode::Msaa2x1:
76 case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
77 return VK_SAMPLE_COUNT_2_BIT;
78 case Tegra::Texture::MsaaMode::Msaa2x2:
79 case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
80 case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
81 return VK_SAMPLE_COUNT_4_BIT;
82 case Tegra::Texture::MsaaMode::Msaa4x2:
83 case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
84 case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
85 case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
86 return VK_SAMPLE_COUNT_8_BIT;
87 case Tegra::Texture::MsaaMode::Msaa4x4:
88 return VK_SAMPLE_COUNT_16_BIT;
89 default:
90 UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
91 return VK_SAMPLE_COUNT_1_BIT;
92 } 112 }
113 return PixelFormatFromRenderTargetFormat(format);
93} 114}
94 115
95} // Anonymous namespace 116RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
117 RenderPassKey key;
118 std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat);
119 if (state.depth_enabled != 0) {
120 const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())};
121 key.depth_format = PixelFormatFromDepthFormat(depth_format);
122 } else {
123 key.depth_format = PixelFormat::Invalid;
124 }
125 key.samples = MaxwellToVK::MsaaMode(state.msaa_mode);
126 return key;
127}
96 128
97VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, 129size_t NumAttachments(const FixedPipelineState& state) {
98 VKDescriptorPool& descriptor_pool_, 130 size_t num{};
99 VKUpdateDescriptorQueue& update_descriptor_queue_, 131 for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
100 const GraphicsPipelineCacheKey& key, 132 const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
101 vk::Span<VkDescriptorSetLayoutBinding> bindings, 133 if (format != Tegra::RenderTargetFormat::NONE) {
102 const SPIRVProgram& program, u32 num_color_buffers) 134 num = index + 1;
103 : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, 135 }
104 descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, 136 }
105 descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, 137 return num;
106 update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
107 descriptor_template{CreateDescriptorUpdateTemplate(program)},
108 modules(CreateShaderModules(program)),
109 pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
110
111VKGraphicsPipeline::~VKGraphicsPipeline() = default;
112
113VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
114 if (!descriptor_template) {
115 return {};
116 }
117 const VkDescriptorSet set = descriptor_allocator.Commit();
118 update_descriptor_queue.Send(*descriptor_template, set);
119 return set;
120} 138}
121 139
122vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( 140template <typename Spec>
123 vk::Span<VkDescriptorSetLayoutBinding> bindings) const { 141bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
124 const VkDescriptorSetLayoutCreateInfo ci{ 142 const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
125 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 143 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
126 .pNext = nullptr, 144 if (!Spec::enabled_stages[stage] && modules[stage]) {
127 .flags = 0, 145 return false;
128 .bindingCount = bindings.size(), 146 }
129 .pBindings = bindings.data(), 147 const auto& info{stage_infos[stage]};
130 }; 148 if constexpr (!Spec::has_storage_buffers) {
131 return device.GetLogical().CreateDescriptorSetLayout(ci); 149 if (!info.storage_buffers_descriptors.empty()) {
150 return false;
151 }
152 }
153 if constexpr (!Spec::has_texture_buffers) {
154 if (!info.texture_buffer_descriptors.empty()) {
155 return false;
156 }
157 }
158 if constexpr (!Spec::has_image_buffers) {
159 if (!info.image_buffer_descriptors.empty()) {
160 return false;
161 }
162 }
163 if constexpr (!Spec::has_images) {
164 if (!info.image_descriptors.empty()) {
165 return false;
166 }
167 }
168 }
169 return true;
132} 170}
133 171
134vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { 172using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool);
135 const VkPipelineLayoutCreateInfo ci{ 173
136 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 174template <typename Spec, typename... Specs>
137 .pNext = nullptr, 175ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
138 .flags = 0, 176 const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
139 .setLayoutCount = 1, 177 if constexpr (sizeof...(Specs) > 0) {
140 .pSetLayouts = descriptor_set_layout.address(), 178 if (!Passes<Spec>(modules, stage_infos)) {
141 .pushConstantRangeCount = 0, 179 return FindSpec<Specs...>(modules, stage_infos);
142 .pPushConstantRanges = nullptr, 180 }
143 }; 181 }
144 return device.GetLogical().CreatePipelineLayout(ci); 182 return GraphicsPipeline::MakeConfigureSpecFunc<Spec>();
145} 183}
146 184
147vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( 185struct SimpleVertexFragmentSpec {
148 const SPIRVProgram& program) const { 186 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true};
149 std::vector<VkDescriptorUpdateTemplateEntry> template_entries; 187 static constexpr bool has_storage_buffers = false;
150 u32 binding = 0; 188 static constexpr bool has_texture_buffers = false;
151 u32 offset = 0; 189 static constexpr bool has_image_buffers = false;
152 for (const auto& stage : program) { 190 static constexpr bool has_images = false;
153 if (stage) { 191};
154 FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); 192
193struct SimpleVertexSpec {
194 static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false};
195 static constexpr bool has_storage_buffers = false;
196 static constexpr bool has_texture_buffers = false;
197 static constexpr bool has_image_buffers = false;
198 static constexpr bool has_images = false;
199};
200
201struct DefaultSpec {
202 static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
203 static constexpr bool has_storage_buffers = true;
204 static constexpr bool has_texture_buffers = true;
205 static constexpr bool has_image_buffers = true;
206 static constexpr bool has_images = true;
207};
208
209ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
210 const std::array<Shader::Info, NUM_STAGES>& infos) {
211 return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos);
212}
213} // Anonymous namespace
214
215GraphicsPipeline::GraphicsPipeline(
216 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
217 VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
218 VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool,
219 VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
220 RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_,
221 std::array<vk::ShaderModule, NUM_STAGES> stages,
222 const std::array<const Shader::Info*, NUM_STAGES>& infos)
223 : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_},
224 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_},
225 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
226 if (shader_notify) {
227 shader_notify->MarkShaderBuilding();
228 }
229 for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
230 const Shader::Info* const info{infos[stage]};
231 if (!info) {
232 continue;
155 } 233 }
234 stage_infos[stage] = *info;
235 enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
236 std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
156 } 237 }
157 if (template_entries.empty()) { 238 auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
158 // If the shader doesn't use descriptor sets, skip template creation. 239 DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
159 return {}; 240 uses_push_descriptor = builder.CanUsePushDescriptor();
241 descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
242 if (!uses_push_descriptor) {
243 descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
244 }
245 const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
246 pipeline_layout = builder.CreatePipelineLayout(set_layout);
247 descriptor_update_template =
248 builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
249
250 const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
251 Validate();
252 MakePipeline(render_pass);
253
254 std::lock_guard lock{build_mutex};
255 is_built = true;
256 build_condvar.notify_one();
257 if (shader_notify) {
258 shader_notify->MarkShaderComplete();
259 }
260 }};
261 if (worker_thread) {
262 worker_thread->QueueWork(std::move(func));
263 } else {
264 func();
160 } 265 }
266 configure_func = ConfigureFunc(spv_modules, stage_infos);
267}
161 268
162 const VkDescriptorUpdateTemplateCreateInfoKHR ci{ 269void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
163 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, 270 transition_keys.push_back(transition->key);
164 .pNext = nullptr, 271 transitions.push_back(transition);
165 .flags = 0,
166 .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
167 .pDescriptorUpdateEntries = template_entries.data(),
168 .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
169 .descriptorSetLayout = *descriptor_set_layout,
170 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
171 .pipelineLayout = *layout,
172 .set = DESCRIPTOR_SET,
173 };
174 return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
175} 272}
176 273
177std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( 274template <typename Spec>
178 const SPIRVProgram& program) const { 275void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
179 VkShaderModuleCreateInfo ci{ 276 std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids;
180 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 277 std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices;
181 .pNext = nullptr, 278 std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers;
182 .flags = 0, 279 size_t sampler_index{};
183 .codeSize = 0, 280 size_t image_index{};
184 .pCode = nullptr, 281
185 }; 282 texture_cache.SynchronizeGraphicsDescriptors();
283
284 buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
285
286 const auto& regs{maxwell3d.regs};
287 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
288 const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
289 const Shader::Info& info{stage_infos[stage]};
290 buffer_cache.UnbindGraphicsStorageBuffers(stage);
291 if constexpr (Spec::has_storage_buffers) {
292 size_t ssbo_index{};
293 for (const auto& desc : info.storage_buffers_descriptors) {
294 ASSERT(desc.count == 1);
295 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
296 desc.cbuf_offset, desc.is_written);
297 ++ssbo_index;
298 }
299 }
300 const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
301 const auto read_handle{[&](const auto& desc, u32 index) {
302 ASSERT(cbufs[desc.cbuf_index].enabled);
303 const u32 index_offset{index << desc.size_shift};
304 const u32 offset{desc.cbuf_offset + index_offset};
305 const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
306 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
307 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
308 if (desc.has_secondary) {
309 ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
310 const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
311 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
312 second_offset};
313 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
314 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
315 const u32 raw{lhs_raw | rhs_raw};
316 return TexturePair(raw, via_header_index);
317 }
318 }
319 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
320 }};
321 const auto add_image{[&](const auto& desc) {
322 for (u32 index = 0; index < desc.count; ++index) {
323 const auto handle{read_handle(desc, index)};
324 image_view_indices[image_index++] = handle.first;
325 }
326 }};
327 if constexpr (Spec::has_texture_buffers) {
328 for (const auto& desc : info.texture_buffer_descriptors) {
329 add_image(desc);
330 }
331 }
332 if constexpr (Spec::has_image_buffers) {
333 for (const auto& desc : info.image_buffer_descriptors) {
334 add_image(desc);
335 }
336 }
337 for (const auto& desc : info.texture_descriptors) {
338 for (u32 index = 0; index < desc.count; ++index) {
339 const auto handle{read_handle(desc, index)};
340 image_view_indices[image_index++] = handle.first;
186 341
187 std::vector<vk::ShaderModule> shader_modules; 342 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
188 shader_modules.reserve(Maxwell::MaxShaderStage); 343 samplers[sampler_index++] = sampler->Handle();
189 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { 344 }
190 const auto& stage = program[i]; 345 }
191 if (!stage) { 346 if constexpr (Spec::has_images) {
192 continue; 347 for (const auto& desc : info.image_descriptors) {
348 add_image(desc);
349 }
193 } 350 }
351 }};
352 if constexpr (Spec::enabled_stages[0]) {
353 config_stage(0);
354 }
355 if constexpr (Spec::enabled_stages[1]) {
356 config_stage(1);
357 }
358 if constexpr (Spec::enabled_stages[2]) {
359 config_stage(2);
360 }
361 if constexpr (Spec::enabled_stages[3]) {
362 config_stage(3);
363 }
364 if constexpr (Spec::enabled_stages[4]) {
365 config_stage(4);
366 }
367 const std::span indices_span(image_view_indices.data(), image_index);
368 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
369
370 ImageId* texture_buffer_index{image_view_ids.data()};
371 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
372 size_t index{};
373 const auto add_buffer{[&](const auto& desc) {
374 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
375 for (u32 i = 0; i < desc.count; ++i) {
376 bool is_written{false};
377 if constexpr (is_image) {
378 is_written = desc.is_written;
379 }
380 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
381 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
382 image_view.BufferSize(), image_view.format,
383 is_written, is_image);
384 ++index;
385 ++texture_buffer_index;
386 }
387 }};
388 buffer_cache.UnbindGraphicsTextureBuffers(stage);
194 389
195 device.SaveShader(stage->code); 390 const Shader::Info& info{stage_infos[stage]};
391 if constexpr (Spec::has_texture_buffers) {
392 for (const auto& desc : info.texture_buffer_descriptors) {
393 add_buffer(desc);
394 }
395 }
396 if constexpr (Spec::has_image_buffers) {
397 for (const auto& desc : info.image_buffer_descriptors) {
398 add_buffer(desc);
399 }
400 }
401 for (const auto& desc : info.texture_descriptors) {
402 texture_buffer_index += desc.count;
403 }
404 if constexpr (Spec::has_images) {
405 for (const auto& desc : info.image_descriptors) {
406 texture_buffer_index += desc.count;
407 }
408 }
409 }};
410 if constexpr (Spec::enabled_stages[0]) {
411 bind_stage_info(0);
412 }
413 if constexpr (Spec::enabled_stages[1]) {
414 bind_stage_info(1);
415 }
416 if constexpr (Spec::enabled_stages[2]) {
417 bind_stage_info(2);
418 }
419 if constexpr (Spec::enabled_stages[3]) {
420 bind_stage_info(3);
421 }
422 if constexpr (Spec::enabled_stages[4]) {
423 bind_stage_info(4);
424 }
425
426 buffer_cache.UpdateGraphicsBuffers(is_indexed);
427 buffer_cache.BindHostGeometryBuffers(is_indexed);
196 428
197 ci.codeSize = stage->code.size() * sizeof(u32); 429 update_descriptor_queue.Acquire();
198 ci.pCode = stage->code.data(); 430
199 shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); 431 const VkSampler* samplers_it{samplers.data()};
432 const ImageId* views_it{image_view_ids.data()};
433 const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE {
434 buffer_cache.BindHostStageBuffers(stage);
435 PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache,
436 update_descriptor_queue);
437 }};
438 if constexpr (Spec::enabled_stages[0]) {
439 prepare_stage(0);
440 }
441 if constexpr (Spec::enabled_stages[1]) {
442 prepare_stage(1);
200 } 443 }
201 return shader_modules; 444 if constexpr (Spec::enabled_stages[2]) {
445 prepare_stage(2);
446 }
447 if constexpr (Spec::enabled_stages[3]) {
448 prepare_stage(3);
449 }
450 if constexpr (Spec::enabled_stages[4]) {
451 prepare_stage(4);
452 }
453 ConfigureDraw();
202} 454}
203 455
204vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, 456void GraphicsPipeline::ConfigureDraw() {
205 VkRenderPass renderpass, 457 texture_cache.UpdateRenderTargets(false);
206 u32 num_color_buffers) const { 458 scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
207 const auto& state = cache_key.fixed_state; 459
208 const auto& viewport_swizzles = state.viewport_swizzles; 460 if (!is_built.load(std::memory_order::relaxed)) {
209 461 // Wait for the pipeline to be built
210 FixedPipelineState::DynamicState dynamic; 462 scheduler.Record([this](vk::CommandBuffer) {
211 if (device.IsExtExtendedDynamicStateSupported()) { 463 std::unique_lock lock{build_mutex};
212 // Insert dummy values, as long as they are valid they don't matter as extended dynamic 464 build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
213 // state is ignored
214 dynamic.raw1 = 0;
215 dynamic.raw2 = 0;
216 dynamic.vertex_strides.fill(0);
217 } else {
218 dynamic = state.dynamic_state;
219 }
220
221 std::vector<VkVertexInputBindingDescription> vertex_bindings;
222 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
223 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
224 const bool instanced = state.binding_divisors[index] != 0;
225 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
226 vertex_bindings.push_back({
227 .binding = static_cast<u32>(index),
228 .stride = dynamic.vertex_strides[index],
229 .inputRate = rate,
230 }); 465 });
231 if (instanced) {
232 vertex_binding_divisors.push_back({
233 .binding = static_cast<u32>(index),
234 .divisor = state.binding_divisors[index],
235 });
236 }
237 } 466 }
467 const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
468 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
469 scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) {
470 if (bind_pipeline) {
471 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
472 }
473 if (!descriptor_set_layout) {
474 return;
475 }
476 if (uses_push_descriptor) {
477 cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
478 0, descriptor_data);
479 } else {
480 const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
481 const vk::Device& dev{device.GetLogical()};
482 dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
483 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
484 descriptor_set, nullptr);
485 }
486 });
487}
238 488
239 std::vector<VkVertexInputAttributeDescription> vertex_attributes; 489void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
240 const auto& input_attributes = program[0]->entries.attributes; 490 FixedPipelineState::DynamicState dynamic{};
241 for (std::size_t index = 0; index < state.attributes.size(); ++index) { 491 if (!key.state.extended_dynamic_state) {
242 const auto& attribute = state.attributes[index]; 492 dynamic = key.state.dynamic_state;
243 if (!attribute.enabled) { 493 }
244 continue; 494 static_vector<VkVertexInputBindingDescription, 32> vertex_bindings;
495 static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors;
496 static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes;
497 if (key.state.dynamic_vertex_input) {
498 for (size_t index = 0; index < key.state.attributes.size(); ++index) {
499 const u32 type = key.state.DynamicAttributeType(index);
500 if (!stage_infos[0].loads.Generic(index) || type == 0) {
501 continue;
502 }
503 vertex_attributes.push_back({
504 .location = static_cast<u32>(index),
505 .binding = 0,
506 .format = type == 1 ? VK_FORMAT_R32_SFLOAT
507 : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT,
508 .offset = 0,
509 });
245 } 510 }
246 if (!input_attributes.contains(static_cast<u32>(index))) { 511 if (!vertex_attributes.empty()) {
247 // Skip attributes not used by the vertex shaders. 512 vertex_bindings.push_back({
248 continue; 513 .binding = 0,
514 .stride = 4,
515 .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
516 });
517 }
518 } else {
519 for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
520 const bool instanced = key.state.binding_divisors[index] != 0;
521 const auto rate =
522 instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
523 vertex_bindings.push_back({
524 .binding = static_cast<u32>(index),
525 .stride = dynamic.vertex_strides[index],
526 .inputRate = rate,
527 });
528 if (instanced) {
529 vertex_binding_divisors.push_back({
530 .binding = static_cast<u32>(index),
531 .divisor = key.state.binding_divisors[index],
532 });
533 }
534 }
535 for (size_t index = 0; index < key.state.attributes.size(); ++index) {
536 const auto& attribute = key.state.attributes[index];
537 if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) {
538 continue;
539 }
540 vertex_attributes.push_back({
541 .location = static_cast<u32>(index),
542 .binding = attribute.buffer,
543 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
544 .offset = attribute.offset,
545 });
249 } 546 }
250 vertex_attributes.push_back({
251 .location = static_cast<u32>(index),
252 .binding = attribute.buffer,
253 .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
254 .offset = attribute.offset,
255 });
256 } 547 }
257
258 VkPipelineVertexInputStateCreateInfo vertex_input_ci{ 548 VkPipelineVertexInputStateCreateInfo vertex_input_ci{
259 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, 549 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
260 .pNext = nullptr, 550 .pNext = nullptr,
@@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
264 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), 554 .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
265 .pVertexAttributeDescriptions = vertex_attributes.data(), 555 .pVertexAttributeDescriptions = vertex_attributes.data(),
266 }; 556 };
267
268 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ 557 const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
269 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, 558 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
270 .pNext = nullptr, 559 .pNext = nullptr,
@@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
274 if (!vertex_binding_divisors.empty()) { 563 if (!vertex_binding_divisors.empty()) {
275 vertex_input_ci.pNext = &input_divisor_ci; 564 vertex_input_ci.pNext = &input_divisor_ci;
276 } 565 }
277 566 auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology);
278 const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); 567 if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) {
568 if (!spv_modules[1] && !spv_modules[2]) {
569 LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points");
570 input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
571 }
572 }
279 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ 573 const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
280 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, 574 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
281 .pNext = nullptr, 575 .pNext = nullptr,
282 .flags = 0, 576 .flags = 0,
283 .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), 577 .topology = input_assembly_topology,
284 .primitiveRestartEnable = state.primitive_restart_enable != 0 && 578 .primitiveRestartEnable = key.state.primitive_restart_enable != 0 &&
285 SupportsPrimitiveRestart(input_assembly_topology), 579 SupportsPrimitiveRestart(input_assembly_topology),
286 }; 580 };
287
288 const VkPipelineTessellationStateCreateInfo tessellation_ci{ 581 const VkPipelineTessellationStateCreateInfo tessellation_ci{
289 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, 582 .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
290 .pNext = nullptr, 583 .pNext = nullptr,
291 .flags = 0, 584 .flags = 0,
292 .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, 585 .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1,
293 };
294
295 VkPipelineViewportStateCreateInfo viewport_ci{
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
297 .pNext = nullptr,
298 .flags = 0,
299 .viewportCount = Maxwell::NumViewports,
300 .pViewports = nullptr,
301 .scissorCount = Maxwell::NumViewports,
302 .pScissors = nullptr,
303 }; 586 };
304 587
305 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; 588 std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
306 std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); 589 std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
307 VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ 590 const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
308 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, 591 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
309 .pNext = nullptr, 592 .pNext = nullptr,
310 .flags = 0, 593 .flags = 0,
311 .viewportCount = Maxwell::NumViewports, 594 .viewportCount = Maxwell::NumViewports,
312 .pViewportSwizzles = swizzles.data(), 595 .pViewportSwizzles = swizzles.data(),
313 }; 596 };
314 if (device.IsNvViewportSwizzleSupported()) { 597 const VkPipelineViewportStateCreateInfo viewport_ci{
315 viewport_ci.pNext = &swizzle_ci; 598 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
316 } 599 .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr,
600 .flags = 0,
601 .viewportCount = Maxwell::NumViewports,
602 .pViewports = nullptr,
603 .scissorCount = Maxwell::NumViewports,
604 .pScissors = nullptr,
605 };
317 606
318 const VkPipelineRasterizationStateCreateInfo rasterization_ci{ 607 VkPipelineRasterizationStateCreateInfo rasterization_ci{
319 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, 608 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
320 .pNext = nullptr, 609 .pNext = nullptr,
321 .flags = 0, 610 .flags = 0,
322 .depthClampEnable = 611 .depthClampEnable =
323 static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), 612 static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
324 .rasterizerDiscardEnable = 613 .rasterizerDiscardEnable =
325 static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), 614 static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
326 .polygonMode = VK_POLYGON_MODE_FILL, 615 .polygonMode =
616 MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)),
327 .cullMode = static_cast<VkCullModeFlags>( 617 .cullMode = static_cast<VkCullModeFlags>(
328 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), 618 dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
329 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), 619 .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
330 .depthBiasEnable = state.depth_bias_enable, 620 .depthBiasEnable = key.state.depth_bias_enable,
331 .depthBiasConstantFactor = 0.0f, 621 .depthBiasConstantFactor = 0.0f,
332 .depthBiasClamp = 0.0f, 622 .depthBiasClamp = 0.0f,
333 .depthBiasSlopeFactor = 0.0f, 623 .depthBiasSlopeFactor = 0.0f,
334 .lineWidth = 1.0f, 624 .lineWidth = 1.0f,
335 }; 625 };
626 VkPipelineRasterizationLineStateCreateInfoEXT line_state{
627 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT,
628 .pNext = nullptr,
629 .lineRasterizationMode = key.state.smooth_lines != 0
630 ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT
631 : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT,
632 .stippledLineEnable = VK_FALSE, // TODO
633 .lineStippleFactor = 0,
634 .lineStipplePattern = 0,
635 };
636 VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{
637 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT,
638 .pNext = nullptr,
639 .flags = 0,
640 .conservativeRasterizationMode = key.state.conservative_raster_enable != 0
641 ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
642 : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
643 .extraPrimitiveOverestimationSize = 0.0f,
644 };
645 VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{
646 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
647 .pNext = nullptr,
648 .provokingVertexMode = key.state.provoking_vertex_last != 0
649 ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT
650 : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT,
651 };
652 if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) {
653 line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state);
654 }
655 if (device.IsExtConservativeRasterizationSupported()) {
656 conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster);
657 }
658 if (device.IsExtProvokingVertexSupported()) {
659 provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex);
660 }
336 661
337 const VkPipelineMultisampleStateCreateInfo multisample_ci{ 662 const VkPipelineMultisampleStateCreateInfo multisample_ci{
338 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, 663 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
339 .pNext = nullptr, 664 .pNext = nullptr,
340 .flags = 0, 665 .flags = 0,
341 .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), 666 .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode),
342 .sampleShadingEnable = VK_FALSE, 667 .sampleShadingEnable = VK_FALSE,
343 .minSampleShading = 0.0f, 668 .minSampleShading = 0.0f,
344 .pSampleMask = nullptr, 669 .pSampleMask = nullptr,
345 .alphaToCoverageEnable = VK_FALSE, 670 .alphaToCoverageEnable = VK_FALSE,
346 .alphaToOneEnable = VK_FALSE, 671 .alphaToOneEnable = VK_FALSE,
347 }; 672 };
348
349 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ 673 const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
350 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, 674 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
351 .pNext = nullptr, 675 .pNext = nullptr,
@@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
355 .depthCompareOp = dynamic.depth_test_enable 679 .depthCompareOp = dynamic.depth_test_enable
356 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) 680 ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
357 : VK_COMPARE_OP_ALWAYS, 681 : VK_COMPARE_OP_ALWAYS,
358 .depthBoundsTestEnable = dynamic.depth_bounds_enable, 682 .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(),
359 .stencilTestEnable = dynamic.stencil_enable, 683 .stencilTestEnable = dynamic.stencil_enable,
360 .front = GetStencilFaceState(dynamic.front), 684 .front = GetStencilFaceState(dynamic.front),
361 .back = GetStencilFaceState(dynamic.back), 685 .back = GetStencilFaceState(dynamic.back),
362 .minDepthBounds = 0.0f, 686 .minDepthBounds = 0.0f,
363 .maxDepthBounds = 0.0f, 687 .maxDepthBounds = 0.0f,
364 }; 688 };
365 689 if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) {
366 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 690 LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
367 for (std::size_t index = 0; index < num_color_buffers; ++index) { 691 }
368 static constexpr std::array COMPONENT_TABLE{ 692 static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
693 const size_t num_attachments{NumAttachments(key.state)};
694 for (size_t index = 0; index < num_attachments; ++index) {
695 static constexpr std::array mask_table{
369 VK_COLOR_COMPONENT_R_BIT, 696 VK_COLOR_COMPONENT_R_BIT,
370 VK_COLOR_COMPONENT_G_BIT, 697 VK_COLOR_COMPONENT_G_BIT,
371 VK_COLOR_COMPONENT_B_BIT, 698 VK_COLOR_COMPONENT_B_BIT,
372 VK_COLOR_COMPONENT_A_BIT, 699 VK_COLOR_COMPONENT_A_BIT,
373 }; 700 };
374 const auto& blend = state.attachments[index]; 701 const auto& blend{key.state.attachments[index]};
375 702 const std::array mask{blend.Mask()};
376 VkColorComponentFlags color_components = 0; 703 VkColorComponentFlags write_mask{};
377 for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { 704 for (size_t i = 0; i < mask_table.size(); ++i) {
378 if (blend.Mask()[i]) { 705 write_mask |= mask[i] ? mask_table[i] : 0;
379 color_components |= COMPONENT_TABLE[i];
380 }
381 } 706 }
382 707 cb_attachments.push_back({
383 cb_attachments[index] = {
384 .blendEnable = blend.enable != 0, 708 .blendEnable = blend.enable != 0,
385 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), 709 .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
386 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), 710 .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
@@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
388 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), 712 .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
389 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), 713 .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
390 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), 714 .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
391 .colorWriteMask = color_components, 715 .colorWriteMask = write_mask,
392 }; 716 });
393 } 717 }
394
395 const VkPipelineColorBlendStateCreateInfo color_blend_ci{ 718 const VkPipelineColorBlendStateCreateInfo color_blend_ci{
396 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, 719 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
397 .pNext = nullptr, 720 .pNext = nullptr,
398 .flags = 0, 721 .flags = 0,
399 .logicOpEnable = VK_FALSE, 722 .logicOpEnable = VK_FALSE,
400 .logicOp = VK_LOGIC_OP_COPY, 723 .logicOp = VK_LOGIC_OP_COPY,
401 .attachmentCount = num_color_buffers, 724 .attachmentCount = static_cast<u32>(cb_attachments.size()),
402 .pAttachments = cb_attachments.data(), 725 .pAttachments = cb_attachments.data(),
403 .blendConstants = {}, 726 .blendConstants = {},
404 }; 727 };
405 728 static_vector<VkDynamicState, 19> dynamic_states{
406 std::vector dynamic_states{
407 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, 729 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
408 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 730 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
409 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 731 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
410 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 732 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
733 VK_DYNAMIC_STATE_LINE_WIDTH,
411 }; 734 };
412 if (device.IsExtExtendedDynamicStateSupported()) { 735 if (key.state.extended_dynamic_state) {
413 static constexpr std::array extended{ 736 static constexpr std::array extended{
414 VK_DYNAMIC_STATE_CULL_MODE_EXT, 737 VK_DYNAMIC_STATE_CULL_MODE_EXT,
415 VK_DYNAMIC_STATE_FRONT_FACE_EXT, 738 VK_DYNAMIC_STATE_FRONT_FACE_EXT,
@@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
421 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, 744 VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
422 VK_DYNAMIC_STATE_STENCIL_OP_EXT, 745 VK_DYNAMIC_STATE_STENCIL_OP_EXT,
423 }; 746 };
747 if (key.state.dynamic_vertex_input) {
748 dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT);
749 }
424 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); 750 dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
425 } 751 }
426
427 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ 752 const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
428 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, 753 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
429 .pNext = nullptr, 754 .pNext = nullptr,
@@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
431 .dynamicStateCount = static_cast<u32>(dynamic_states.size()), 756 .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
432 .pDynamicStates = dynamic_states.data(), 757 .pDynamicStates = dynamic_states.data(),
433 }; 758 };
434 759 [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
435 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
436 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, 760 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
437 .pNext = nullptr, 761 .pNext = nullptr,
438 .requiredSubgroupSize = GuestWarpSize, 762 .requiredSubgroupSize = GuestWarpSize,
439 }; 763 };
440 764 static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages;
441 std::vector<VkPipelineShaderStageCreateInfo> shader_stages; 765 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
442 std::size_t module_index = 0; 766 if (!spv_modules[stage]) {
443 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
444 if (!program[stage]) {
445 continue; 767 continue;
446 } 768 }
447 769 [[maybe_unused]] auto& stage_ci =
448 VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); 770 shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{
449 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 771 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
450 stage_ci.pNext = nullptr; 772 .pNext = nullptr,
451 stage_ci.flags = 0; 773 .flags = 0,
452 stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); 774 .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)),
453 stage_ci.module = *modules[module_index++]; 775 .module = *spv_modules[stage],
454 stage_ci.pName = "main"; 776 .pName = "main",
455 stage_ci.pSpecializationInfo = nullptr; 777 .pSpecializationInfo = nullptr,
456 778 });
779 /*
457 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { 780 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
458 stage_ci.pNext = &subgroup_size_ci; 781 stage_ci.pNext = &subgroup_size_ci;
459 } 782 }
783 */
460 } 784 }
461 return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ 785 pipeline = device.GetLogical().CreateGraphicsPipeline({
462 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 786 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
463 .pNext = nullptr, 787 .pNext = nullptr,
464 .flags = 0, 788 .flags = 0,
@@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
473 .pDepthStencilState = &depth_stencil_ci, 797 .pDepthStencilState = &depth_stencil_ci,
474 .pColorBlendState = &color_blend_ci, 798 .pColorBlendState = &color_blend_ci,
475 .pDynamicState = &dynamic_state_ci, 799 .pDynamicState = &dynamic_state_ci,
476 .layout = *layout, 800 .layout = *pipeline_layout,
477 .renderPass = renderpass, 801 .renderPass = render_pass,
478 .subpass = 0, 802 .subpass = 0,
479 .basePipelineHandle = nullptr, 803 .basePipelineHandle = nullptr,
480 .basePipelineIndex = 0, 804 .basePipelineIndex = 0,
481 }); 805 });
482} 806}
483 807
808void GraphicsPipeline::Validate() {
809 size_t num_images{};
810 for (const auto& info : stage_infos) {
811 for (const auto& desc : info.texture_buffer_descriptors) {
812 num_images += desc.count;
813 }
814 for (const auto& desc : info.image_buffer_descriptors) {
815 num_images += desc.count;
816 }
817 for (const auto& desc : info.texture_descriptors) {
818 num_images += desc.count;
819 }
820 for (const auto& desc : info.image_descriptors) {
821 num_images += desc.count;
822 }
823 }
824 ASSERT(num_images <= MAX_IMAGE_ELEMENTS);
825}
826
484} // namespace Vulkan 827} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 8b6a98fe0..2bd48d697 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -1,30 +1,36 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
7#include <array> 8#include <array>
8#include <optional> 9#include <atomic>
9#include <vector> 10#include <condition_variable>
11#include <mutex>
12#include <type_traits>
10 13
11#include "common/common_types.h" 14#include "common/thread_worker.h"
15#include "shader_recompiler/shader_info.h"
12#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 17#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
18#include "video_core/renderer_vulkan/vk_buffer_cache.h"
14#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 19#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 20#include "video_core/renderer_vulkan/vk_texture_cache.h"
16#include "video_core/vulkan_common/vulkan_wrapper.h" 21#include "video_core/vulkan_common/vulkan_wrapper.h"
17 22
18namespace Vulkan { 23namespace VideoCore {
24class ShaderNotify;
25}
19 26
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 27namespace Vulkan {
21 28
22struct GraphicsPipelineCacheKey { 29struct GraphicsPipelineCacheKey {
23 VkRenderPass renderpass; 30 std::array<u64, 6> unique_hashes;
24 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; 31 FixedPipelineState state;
25 FixedPipelineState fixed_state;
26 32
27 std::size_t Hash() const noexcept; 33 size_t Hash() const noexcept;
28 34
29 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; 35 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
30 36
@@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey {
32 return !operator==(rhs); 38 return !operator==(rhs);
33 } 39 }
34 40
35 std::size_t Size() const noexcept { 41 size_t Size() const noexcept {
36 return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); 42 return sizeof(unique_hashes) + state.Size();
37 } 43 }
38}; 44};
39static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); 45static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
40static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); 46static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
41static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); 47static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
42 48
49} // namespace Vulkan
50
51namespace std {
52template <>
53struct hash<Vulkan::GraphicsPipelineCacheKey> {
54 size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
55 return k.Hash();
56 }
57};
58} // namespace std
59
60namespace Vulkan {
61
43class Device; 62class Device;
44class VKDescriptorPool; 63class RenderPassCache;
45class VKScheduler; 64class VKScheduler;
46class VKUpdateDescriptorQueue; 65class VKUpdateDescriptorQueue;
47 66
48using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; 67class GraphicsPipeline {
68 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
49 69
50class VKGraphicsPipeline final {
51public: 70public:
52 explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, 71 explicit GraphicsPipeline(
53 VKDescriptorPool& descriptor_pool, 72 Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
54 VKUpdateDescriptorQueue& update_descriptor_queue_, 73 VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
55 const GraphicsPipelineCacheKey& key, 74 VideoCore::ShaderNotify* shader_notify, const Device& device,
56 vk::Span<VkDescriptorSetLayoutBinding> bindings, 75 DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue,
57 const SPIRVProgram& program, u32 num_color_buffers); 76 Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache,
58 ~VKGraphicsPipeline(); 77 const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
59 78 const std::array<const Shader::Info*, NUM_STAGES>& infos);
60 VkDescriptorSet CommitDescriptorSet(); 79
61 80 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
62 VkPipeline GetHandle() const { 81 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
63 return *pipeline; 82
83 GraphicsPipeline& operator=(const GraphicsPipeline&) = delete;
84 GraphicsPipeline(const GraphicsPipeline&) = delete;
85
86 void AddTransition(GraphicsPipeline* transition);
87
88 void Configure(bool is_indexed) {
89 configure_func(this, is_indexed);
64 } 90 }
65 91
66 VkPipelineLayout GetLayout() const { 92 [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept {
67 return *layout; 93 if (key == current_key) {
94 return this;
95 }
96 const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)};
97 return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)]
98 : nullptr;
68 } 99 }
69 100
70 GraphicsPipelineCacheKey GetCacheKey() const { 101 [[nodiscard]] bool IsBuilt() const noexcept {
71 return cache_key; 102 return is_built.load(std::memory_order::relaxed);
72 } 103 }
73 104
74private: 105 template <typename Spec>
75 vk::DescriptorSetLayout CreateDescriptorSetLayout( 106 static auto MakeConfigureSpecFunc() {
76 vk::Span<VkDescriptorSetLayoutBinding> bindings) const; 107 return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); };
108 }
77 109
78 vk::PipelineLayout CreatePipelineLayout() const; 110private:
111 template <typename Spec>
112 void ConfigureImpl(bool is_indexed);
79 113
80 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( 114 void ConfigureDraw();
81 const SPIRVProgram& program) const;
82 115
83 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 116 void MakePipeline(VkRenderPass render_pass);
84 117
85 vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, 118 void Validate();
86 u32 num_color_buffers) const;
87 119
120 const GraphicsPipelineCacheKey key;
121 Tegra::Engines::Maxwell3D& maxwell3d;
122 Tegra::MemoryManager& gpu_memory;
88 const Device& device; 123 const Device& device;
124 TextureCache& texture_cache;
125 BufferCache& buffer_cache;
89 VKScheduler& scheduler; 126 VKScheduler& scheduler;
90 const GraphicsPipelineCacheKey cache_key; 127 VKUpdateDescriptorQueue& update_descriptor_queue;
91 const u64 hash; 128
129 void (*configure_func)(GraphicsPipeline*, bool){};
130
131 std::vector<GraphicsPipelineCacheKey> transition_keys;
132 std::vector<GraphicsPipeline*> transitions;
133
134 std::array<vk::ShaderModule, NUM_STAGES> spv_modules;
135
136 std::array<Shader::Info, NUM_STAGES> stage_infos;
137 std::array<u32, 5> enabled_uniform_buffer_masks{};
138 VideoCommon::UniformBufferSizes uniform_buffer_sizes{};
92 139
93 vk::DescriptorSetLayout descriptor_set_layout; 140 vk::DescriptorSetLayout descriptor_set_layout;
94 DescriptorAllocator descriptor_allocator; 141 DescriptorAllocator descriptor_allocator;
95 VKUpdateDescriptorQueue& update_descriptor_queue; 142 vk::PipelineLayout pipeline_layout;
96 vk::PipelineLayout layout; 143 vk::DescriptorUpdateTemplateKHR descriptor_update_template;
97 vk::DescriptorUpdateTemplateKHR descriptor_template;
98 std::vector<vk::ShaderModule> modules;
99
100 vk::Pipeline pipeline; 144 vk::Pipeline pipeline;
145
146 std::condition_variable build_condvar;
147 std::mutex build_mutex;
148 std::atomic_bool is_built{false};
149 bool uses_push_descriptor{false};
101}; 150};
102 151
103} // namespace Vulkan 152} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index ee3cd35d0..4f8688118 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -39,9 +39,9 @@ public:
39 return KnownGpuTick() >= tick; 39 return KnownGpuTick() >= tick;
40 } 40 }
41 41
42 /// Advance to the logical tick. 42 /// Advance to the logical tick and return the old one
43 void NextTick() noexcept { 43 [[nodiscard]] u64 NextTick() noexcept {
44 ++current_tick; 44 return current_tick.fetch_add(1, std::memory_order::relaxed);
45 } 45 }
46 46
47 /// Refresh the known GPU tick 47 /// Refresh the known GPU tick
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 8991505ca..57b163247 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -4,444 +4,613 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <fstream>
7#include <memory> 8#include <memory>
9#include <thread>
8#include <vector> 10#include <vector>
9 11
10#include "common/bit_cast.h" 12#include "common/bit_cast.h"
11#include "common/cityhash.h" 13#include "common/cityhash.h"
14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
12#include "common/microprofile.h" 16#include "common/microprofile.h"
17#include "common/thread_worker.h"
13#include "core/core.h" 18#include "core/core.h"
14#include "core/memory.h" 19#include "core/memory.h"
20#include "shader_recompiler/backend/spirv/emit_spirv.h"
21#include "shader_recompiler/environment.h"
22#include "shader_recompiler/frontend/maxwell/control_flow.h"
23#include "shader_recompiler/frontend/maxwell/translate_program.h"
24#include "shader_recompiler/program_header.h"
25#include "video_core/dirty_flags.h"
15#include "video_core/engines/kepler_compute.h" 26#include "video_core/engines/kepler_compute.h"
16#include "video_core/engines/maxwell_3d.h" 27#include "video_core/engines/maxwell_3d.h"
17#include "video_core/memory_manager.h" 28#include "video_core/memory_manager.h"
18#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 29#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
19#include "video_core/renderer_vulkan/maxwell_to_vk.h" 30#include "video_core/renderer_vulkan/maxwell_to_vk.h"
31#include "video_core/renderer_vulkan/pipeline_helper.h"
20#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 32#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 33#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 34#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
24#include "video_core/renderer_vulkan/vk_rasterizer.h" 35#include "video_core/renderer_vulkan/vk_rasterizer.h"
25#include "video_core/renderer_vulkan/vk_scheduler.h" 36#include "video_core/renderer_vulkan/vk_scheduler.h"
37#include "video_core/renderer_vulkan/vk_shader_util.h"
26#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
27#include "video_core/shader/compiler_settings.h"
28#include "video_core/shader/memory_util.h"
29#include "video_core/shader_cache.h" 39#include "video_core/shader_cache.h"
40#include "video_core/shader_environment.h"
30#include "video_core/shader_notify.h" 41#include "video_core/shader_notify.h"
31#include "video_core/vulkan_common/vulkan_device.h" 42#include "video_core/vulkan_common/vulkan_device.h"
32#include "video_core/vulkan_common/vulkan_wrapper.h" 43#include "video_core/vulkan_common/vulkan_wrapper.h"
33 44
34namespace Vulkan { 45namespace Vulkan {
35
36MICROPROFILE_DECLARE(Vulkan_PipelineCache); 46MICROPROFILE_DECLARE(Vulkan_PipelineCache);
37 47
38using Tegra::Engines::ShaderType;
39using VideoCommon::Shader::GetShaderAddress;
40using VideoCommon::Shader::GetShaderCode;
41using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
42using VideoCommon::Shader::ProgramCode;
43using VideoCommon::Shader::STAGE_MAIN_OFFSET;
44
45namespace { 48namespace {
49using Shader::Backend::SPIRV::EmitSPIRV;
50using Shader::Maxwell::MergeDualVertexPrograms;
51using Shader::Maxwell::TranslateProgram;
52using VideoCommon::ComputeEnvironment;
53using VideoCommon::FileEnvironment;
54using VideoCommon::GenericEnvironment;
55using VideoCommon::GraphicsEnvironment;
56
57constexpr u32 CACHE_VERSION = 5;
58
59template <typename Container>
60auto MakeSpan(Container& container) {
61 return std::span(container.data(), container.size());
62}
46 63
47constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 64Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
48constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 65 switch (comparison) {
49constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; 66 case Maxwell::ComparisonOp::Never:
50constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 67 case Maxwell::ComparisonOp::NeverOld:
51constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; 68 return Shader::CompareFunction::Never;
52constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 69 case Maxwell::ComparisonOp::Less:
53 70 case Maxwell::ComparisonOp::LessOld:
54constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 71 return Shader::CompareFunction::Less;
55 .depth = VideoCommon::Shader::CompileDepth::FullDecompile, 72 case Maxwell::ComparisonOp::Equal:
56 .disable_else_derivation = true, 73 case Maxwell::ComparisonOp::EqualOld:
57}; 74 return Shader::CompareFunction::Equal;
58 75 case Maxwell::ComparisonOp::LessEqual:
59constexpr std::size_t GetStageFromProgram(std::size_t program) { 76 case Maxwell::ComparisonOp::LessEqualOld:
60 return program == 0 ? 0 : program - 1; 77 return Shader::CompareFunction::LessThanEqual;
78 case Maxwell::ComparisonOp::Greater:
79 case Maxwell::ComparisonOp::GreaterOld:
80 return Shader::CompareFunction::Greater;
81 case Maxwell::ComparisonOp::NotEqual:
82 case Maxwell::ComparisonOp::NotEqualOld:
83 return Shader::CompareFunction::NotEqual;
84 case Maxwell::ComparisonOp::GreaterEqual:
85 case Maxwell::ComparisonOp::GreaterEqualOld:
86 return Shader::CompareFunction::GreaterThanEqual;
87 case Maxwell::ComparisonOp::Always:
88 case Maxwell::ComparisonOp::AlwaysOld:
89 return Shader::CompareFunction::Always;
90 }
91 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison);
92 return {};
61} 93}
62 94
63constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { 95Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) {
64 return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); 96 if (attr.enabled == 0) {
97 return Shader::AttributeType::Disabled;
98 }
99 switch (attr.Type()) {
100 case Maxwell::VertexAttribute::Type::SignedNorm:
101 case Maxwell::VertexAttribute::Type::UnsignedNorm:
102 case Maxwell::VertexAttribute::Type::UnsignedScaled:
103 case Maxwell::VertexAttribute::Type::SignedScaled:
104 case Maxwell::VertexAttribute::Type::Float:
105 return Shader::AttributeType::Float;
106 case Maxwell::VertexAttribute::Type::SignedInt:
107 return Shader::AttributeType::SignedInt;
108 case Maxwell::VertexAttribute::Type::UnsignedInt:
109 return Shader::AttributeType::UnsignedInt;
110 }
111 return Shader::AttributeType::Float;
65} 112}
66 113
67ShaderType GetShaderType(Maxwell::ShaderProgram program) { 114Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) {
68 switch (program) { 115 switch (state.DynamicAttributeType(index)) {
69 case Maxwell::ShaderProgram::VertexB: 116 case 0:
70 return ShaderType::Vertex; 117 return Shader::AttributeType::Disabled;
71 case Maxwell::ShaderProgram::TesselationControl: 118 case 1:
72 return ShaderType::TesselationControl; 119 return Shader::AttributeType::Float;
73 case Maxwell::ShaderProgram::TesselationEval: 120 case 2:
74 return ShaderType::TesselationEval; 121 return Shader::AttributeType::SignedInt;
75 case Maxwell::ShaderProgram::Geometry: 122 case 3:
76 return ShaderType::Geometry; 123 return Shader::AttributeType::UnsignedInt;
77 case Maxwell::ShaderProgram::Fragment:
78 return ShaderType::Fragment;
79 default:
80 UNIMPLEMENTED_MSG("program={}", program);
81 return ShaderType::Vertex;
82 } 124 }
125 return Shader::AttributeType::Disabled;
83} 126}
84 127
85template <VkDescriptorType descriptor_type, class Container> 128Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs,
86void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, 129 const GraphicsPipelineCacheKey& key,
87 VkShaderStageFlags stage_flags, const Container& container) { 130 const Shader::IR::Program& program,
88 const u32 num_entries = static_cast<u32>(std::size(container)); 131 const Shader::IR::Program* previous_program) {
89 for (std::size_t i = 0; i < num_entries; ++i) { 132 Shader::RuntimeInfo info;
90 u32 count = 1; 133 if (previous_program) {
91 if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { 134 info.previous_stage_stores = previous_program->info.stores;
92 // Combined image samplers can be arrayed. 135 if (previous_program->is_geometry_passthrough) {
93 count = container[i].size; 136 info.previous_stage_stores.mask |= previous_program->info.passthrough.mask;
94 } 137 }
95 bindings.push_back({ 138 } else {
96 .binding = binding++, 139 info.previous_stage_stores.mask.set();
97 .descriptorType = descriptor_type, 140 }
98 .descriptorCount = count, 141 const Shader::Stage stage{program.stage};
99 .stageFlags = stage_flags, 142 const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough};
100 .pImmutableSamplers = nullptr, 143 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
101 }); 144 const float point_size{Common::BitCast<float>(key.state.point_size)};
145 switch (stage) {
146 case Shader::Stage::VertexB:
147 if (!has_geometry) {
148 if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
149 info.fixed_state_point_size = point_size;
150 }
151 if (key.state.xfb_enabled) {
152 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
153 }
154 info.convert_depth_mode = gl_ndc;
155 }
156 if (key.state.dynamic_vertex_input) {
157 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
158 info.generic_input_types[index] = AttributeType(key.state, index);
159 }
160 } else {
161 std::ranges::transform(key.state.attributes, info.generic_input_types.begin(),
162 &CastAttributeType);
163 }
164 break;
165 case Shader::Stage::TessellationEval:
166 // We have to flip tessellation clockwise for some reason...
167 info.tess_clockwise = key.state.tessellation_clockwise == 0;
168 info.tess_primitive = [&key] {
169 const u32 raw{key.state.tessellation_primitive.Value()};
170 switch (static_cast<Maxwell::TessellationPrimitive>(raw)) {
171 case Maxwell::TessellationPrimitive::Isolines:
172 return Shader::TessPrimitive::Isolines;
173 case Maxwell::TessellationPrimitive::Triangles:
174 return Shader::TessPrimitive::Triangles;
175 case Maxwell::TessellationPrimitive::Quads:
176 return Shader::TessPrimitive::Quads;
177 }
178 UNREACHABLE();
179 return Shader::TessPrimitive::Triangles;
180 }();
181 info.tess_spacing = [&] {
182 const u32 raw{key.state.tessellation_spacing};
183 switch (static_cast<Maxwell::TessellationSpacing>(raw)) {
184 case Maxwell::TessellationSpacing::Equal:
185 return Shader::TessSpacing::Equal;
186 case Maxwell::TessellationSpacing::FractionalOdd:
187 return Shader::TessSpacing::FractionalOdd;
188 case Maxwell::TessellationSpacing::FractionalEven:
189 return Shader::TessSpacing::FractionalEven;
190 }
191 UNREACHABLE();
192 return Shader::TessSpacing::Equal;
193 }();
194 break;
195 case Shader::Stage::Geometry:
196 if (program.output_topology == Shader::OutputTopology::PointList) {
197 info.fixed_state_point_size = point_size;
198 }
199 if (key.state.xfb_enabled != 0) {
200 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
201 }
202 info.convert_depth_mode = gl_ndc;
203 break;
204 case Shader::Stage::Fragment:
205 info.alpha_test_func = MaxwellToCompareFunction(
206 key.state.UnpackComparisonOp(key.state.alpha_test_func.Value()));
207 info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref);
208 break;
209 default:
210 break;
211 }
212 switch (key.state.topology) {
213 case Maxwell::PrimitiveTopology::Points:
214 info.input_topology = Shader::InputTopology::Points;
215 break;
216 case Maxwell::PrimitiveTopology::Lines:
217 case Maxwell::PrimitiveTopology::LineLoop:
218 case Maxwell::PrimitiveTopology::LineStrip:
219 info.input_topology = Shader::InputTopology::Lines;
220 break;
221 case Maxwell::PrimitiveTopology::Triangles:
222 case Maxwell::PrimitiveTopology::TriangleStrip:
223 case Maxwell::PrimitiveTopology::TriangleFan:
224 case Maxwell::PrimitiveTopology::Quads:
225 case Maxwell::PrimitiveTopology::QuadStrip:
226 case Maxwell::PrimitiveTopology::Polygon:
227 case Maxwell::PrimitiveTopology::Patches:
228 info.input_topology = Shader::InputTopology::Triangles;
229 break;
230 case Maxwell::PrimitiveTopology::LinesAdjacency:
231 case Maxwell::PrimitiveTopology::LineStripAdjacency:
232 info.input_topology = Shader::InputTopology::LinesAdjacency;
233 break;
234 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
235 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
236 info.input_topology = Shader::InputTopology::TrianglesAdjacency;
237 break;
102 } 238 }
239 info.force_early_z = key.state.early_z != 0;
240 info.y_negate = key.state.y_negate != 0;
241 return info;
103} 242}
243} // Anonymous namespace
104 244
105u32 FillDescriptorLayout(const ShaderEntries& entries, 245size_t ComputePipelineCacheKey::Hash() const noexcept {
106 std::vector<VkDescriptorSetLayoutBinding>& bindings, 246 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
107 Maxwell::ShaderProgram program_type, u32 base_binding) { 247 return static_cast<size_t>(hash);
108 const ShaderType stage = GetStageFromProgram(program_type);
109 const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
110
111 u32 binding = base_binding;
112 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
113 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
114 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
115 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
116 AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
117 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
118 return binding;
119} 248}
120 249
121} // Anonymous namespace 250bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
251 return std::memcmp(&rhs, this, sizeof *this) == 0;
252}
122 253
123std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { 254size_t GraphicsPipelineCacheKey::Hash() const noexcept {
124 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); 255 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
125 return static_cast<std::size_t>(hash); 256 return static_cast<size_t>(hash);
126} 257}
127 258
128bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { 259bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
129 return std::memcmp(&rhs, this, Size()) == 0; 260 return std::memcmp(&rhs, this, Size()) == 0;
130} 261}
131 262
132std::size_t ComputePipelineCacheKey::Hash() const noexcept { 263PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
133 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); 264 Tegra::Engines::KeplerCompute& kepler_compute_,
134 return static_cast<std::size_t>(hash); 265 Tegra::MemoryManager& gpu_memory_, const Device& device_,
135} 266 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
136 267 VKUpdateDescriptorQueue& update_descriptor_queue_,
137bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { 268 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
138 return std::memcmp(&rhs, this, sizeof *this) == 0; 269 TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
270 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
271 device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
272 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
273 buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
274 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
275 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
276 serialization_thread(1, "yuzu:PipelineSerialization") {
277 const auto& float_control{device.FloatControlProperties()};
278 const VkDriverIdKHR driver_id{device.GetDriverID()};
279 profile = Shader::Profile{
280 .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U,
281 .unified_descriptor_binding = true,
282 .support_descriptor_aliasing = true,
283 .support_int8 = true,
284 .support_int16 = device.IsShaderInt16Supported(),
285 .support_int64 = device.IsShaderInt64Supported(),
286 .support_vertex_instance_id = false,
287 .support_float_controls = true,
288 .support_separate_denorm_behavior = float_control.denormBehaviorIndependence ==
289 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
290 .support_separate_rounding_mode =
291 float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR,
292 .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE,
293 .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE,
294 .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE,
295 .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE,
296 .support_fp16_signed_zero_nan_preserve =
297 float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE,
298 .support_fp32_signed_zero_nan_preserve =
299 float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
300 .support_fp64_signed_zero_nan_preserve =
301 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
302 .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
303 .support_vote = true,
304 .support_viewport_index_layer_non_geometry =
305 device.IsExtShaderViewportIndexLayerSupported(),
306 .support_viewport_mask = device.IsNvViewportArray2Supported(),
307 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
308 .support_demote_to_helper_invocation = true,
309 .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
310 .support_derivative_control = true,
311 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
312
313 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
314
315 .lower_left_origin_mode = false,
316 .need_declared_frag_colors = false,
317
318 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
319 .has_broken_unsigned_image_offsets = false,
320 .has_broken_signed_operations = false,
321 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR,
322 .ignore_nan_fp_comparisons = false,
323 };
324 host_info = Shader::HostTranslateInfo{
325 .support_float16 = device.IsFloat16Supported(),
326 .support_int64 = device.IsShaderInt64Supported(),
327 };
139} 328}
140 329
141Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, 330PipelineCache::~PipelineCache() = default;
142 GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
143 : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
144 shader_ir(program_code, main_offset_, compiler_settings, registry),
145 entries(GenerateShaderEntries(shader_ir)) {}
146
147Shader::~Shader() = default;
148
149VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
150 Tegra::Engines::Maxwell3D& maxwell3d_,
151 Tegra::Engines::KeplerCompute& kepler_compute_,
152 Tegra::MemoryManager& gpu_memory_, const Device& device_,
153 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
154 VKUpdateDescriptorQueue& update_descriptor_queue_)
155 : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
156 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
157 scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
158 update_descriptor_queue_} {}
159
160VKPipelineCache::~VKPipelineCache() = default;
161 331
162std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 332GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
163 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; 333 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
164
165 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
166 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
167
168 // Skip stages that are not enabled
169 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
170 continue;
171 }
172
173 const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
174 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
175 ASSERT(cpu_addr);
176
177 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
178 if (!result) {
179 const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
180
181 // No shader found - create a new one
182 static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
183 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
184 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
185 const std::size_t size_in_bytes = code.size() * sizeof(u64);
186
187 auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
188 std::move(code), stage_offset);
189 result = shader.get();
190 334
191 if (cpu_addr) { 335 if (!RefreshStages(graphics_key.unique_hashes)) {
192 Register(std::move(shader), *cpu_addr, size_in_bytes); 336 current_pipeline = nullptr;
193 } else { 337 return nullptr;
194 null_shader = std::move(shader); 338 }
195 } 339 graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
340 device.IsExtVertexInputDynamicStateSupported());
341
342 if (current_pipeline) {
343 GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};
344 if (next) {
345 current_pipeline = next;
346 return BuiltPipeline(current_pipeline);
196 } 347 }
197 shaders[index] = result;
198 } 348 }
199 return last_shaders = shaders; 349 return CurrentGraphicsPipelineSlowPath();
200} 350}
201 351
202VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( 352ComputePipeline* PipelineCache::CurrentComputePipeline() {
203 const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
204 VideoCommon::Shader::AsyncShaders& async_shaders) {
205 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 353 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
206 354
207 if (last_graphics_pipeline && last_graphics_key == key) { 355 const ShaderInfo* const shader{ComputeShader()};
208 return last_graphics_pipeline; 356 if (!shader) {
209 } 357 return nullptr;
210 last_graphics_key = key;
211
212 if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
213 std::unique_lock lock{pipeline_cache};
214 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
215 if (is_cache_miss) {
216 gpu.ShaderNotify().MarkSharderBuilding();
217 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
218 const auto [program, bindings] = DecompileShaders(key.fixed_state);
219 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
220 update_descriptor_queue, bindings, program, key,
221 num_color_buffers);
222 }
223 last_graphics_pipeline = pair->second.get();
224 return last_graphics_pipeline;
225 } 358 }
226 359 const auto& qmd{kepler_compute.launch_description};
227 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); 360 const ComputePipelineCacheKey key{
228 auto& entry = pair->second; 361 .unique_hash = shader->unique_hash,
229 if (is_cache_miss) { 362 .shared_memory_size = qmd.shared_alloc,
230 gpu.ShaderNotify().MarkSharderBuilding(); 363 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
231 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 364 };
232 const auto [program, bindings] = DecompileShaders(key.fixed_state); 365 const auto [pair, is_new]{compute_cache.try_emplace(key)};
233 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 366 auto& pipeline{pair->second};
234 update_descriptor_queue, key, bindings, 367 if (!is_new) {
235 program, num_color_buffers); 368 return pipeline.get();
236 gpu.ShaderNotify().MarkShaderComplete();
237 } 369 }
238 last_graphics_pipeline = entry.get(); 370 pipeline = CreateComputePipeline(key, shader);
239 return last_graphics_pipeline; 371 return pipeline.get();
240} 372}
241 373
242VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { 374void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
243 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 375 const VideoCore::DiskResourceLoadCallback& callback) {
244 376 if (title_id == 0) {
245 const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); 377 return;
246 auto& entry = pair->second;
247 if (!is_cache_miss) {
248 return *entry;
249 } 378 }
250 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 379 const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
251 380 const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)};
252 const GPUVAddr gpu_addr = key.shader; 381 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) {
253 382 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
254 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 383 return;
255 ASSERT(cpu_addr); 384 }
385 pipeline_cache_filename = base_dir / "vulkan.bin";
386
387 struct {
388 std::mutex mutex;
389 size_t total{};
390 size_t built{};
391 bool has_loaded{};
392 } state;
393
394 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
395 ComputePipelineCacheKey key;
396 file.read(reinterpret_cast<char*>(&key), sizeof(key));
397
398 workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
399 ShaderPools pools;
400 auto pipeline{CreateComputePipeline(pools, key, env, false)};
401 std::lock_guard lock{state.mutex};
402 if (pipeline) {
403 compute_cache.emplace(key, std::move(pipeline));
404 }
405 ++state.built;
406 if (state.has_loaded) {
407 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
408 }
409 });
410 ++state.total;
411 }};
412 const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported();
413 const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported();
414 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
415 GraphicsPipelineCacheKey key;
416 file.read(reinterpret_cast<char*>(&key), sizeof(key));
417
418 if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state ||
419 (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) {
420 return;
421 }
422 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
423 ShaderPools pools;
424 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
425 for (auto& env : envs) {
426 env_ptrs.push_back(&env);
427 }
428 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
256 429
257 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); 430 std::lock_guard lock{state.mutex};
258 if (!shader) { 431 graphics_cache.emplace(key, std::move(pipeline));
259 // No shader found - create a new one 432 ++state.built;
260 const auto host_ptr = gpu_memory.GetPointer(gpu_addr); 433 if (state.has_loaded) {
434 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
435 }
436 });
437 ++state.total;
438 }};
439 VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute,
440 load_graphics);
261 441
262 ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); 442 std::unique_lock lock{state.mutex};
263 const std::size_t size_in_bytes = code.size() * sizeof(u64); 443 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
444 state.has_loaded = true;
445 lock.unlock();
264 446
265 auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, 447 workers.WaitForRequests();
266 *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); 448}
267 shader = shader_info.get();
268 449
269 if (cpu_addr) { 450GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() {
270 Register(std::move(shader_info), *cpu_addr, size_in_bytes); 451 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
271 } else { 452 auto& pipeline{pair->second};
272 null_kernel = std::move(shader_info); 453 if (is_new) {
273 } 454 pipeline = CreateGraphicsPipeline();
274 } 455 }
275 456 if (!pipeline) {
276 const Specialization specialization{ 457 return nullptr;
277 .base_binding = 0, 458 }
278 .workgroup_size = key.workgroup_size, 459 if (current_pipeline) {
279 .shared_memory_size = key.shared_memory_size, 460 current_pipeline->AddTransition(pipeline.get());
280 .point_size = std::nullopt, 461 }
281 .enabled_attributes = {}, 462 current_pipeline = pipeline.get();
282 .attribute_types = {}, 463 return BuiltPipeline(current_pipeline);
283 .ndc_minus_one_to_one = false,
284 };
285 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
286 shader->GetRegistry(), specialization),
287 shader->GetEntries()};
288 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
289 update_descriptor_queue, spirv_shader);
290 return *entry;
291} 464}
292 465
293void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { 466GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept {
294 gpu.ShaderNotify().MarkShaderComplete(); 467 if (pipeline->IsBuilt()) {
295 std::unique_lock lock{pipeline_cache}; 468 return pipeline;
296 graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); 469 }
470 if (!use_asynchronous_shaders) {
471 return pipeline;
472 }
473 // If something is using depth, we can assume that games are not rendering anything which
474 // will be used one time.
475 if (maxwell3d.regs.zeta_enable) {
476 return nullptr;
477 }
478 // If games are using a small index count, we can assume these are full screen quads.
479 // Usually these shaders are only used once for building textures so we can assume they
480 // can't be built async
481 if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
482 return pipeline;
483 }
484 return nullptr;
297} 485}
298 486
299void VKPipelineCache::OnShaderRemoval(Shader* shader) { 487std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
300 bool finished = false; 488 ShaderPools& pools, const GraphicsPipelineCacheKey& key,
301 const auto Finish = [&] { 489 std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
302 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and 490 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
303 // flush. 491 size_t env_index{0};
304 if (finished) { 492 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
305 return; 493 const bool uses_vertex_a{key.unique_hashes[0] != 0};
306 } 494 const bool uses_vertex_b{key.unique_hashes[1] != 0};
307 finished = true; 495 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
308 scheduler.Finish(); 496 if (key.unique_hashes[index] == 0) {
309 };
310
311 const GPUVAddr invalidated_addr = shader->GetGpuAddr();
312 for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
313 auto& entry = it->first;
314 if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
315 entry.shaders.end()) {
316 ++it;
317 continue; 497 continue;
318 } 498 }
319 Finish(); 499 Shader::Environment& env{*envs[env_index]};
320 it = graphics_cache.erase(it); 500 ++env_index;
501
502 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
503 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
504 if (!uses_vertex_a || index != 1) {
505 // Normal path
506 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
507 } else {
508 // VertexB path when VertexA is present.
509 auto& program_va{programs[0]};
510 auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
511 programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
512 }
321 } 513 }
322 for (auto it = compute_cache.begin(); it != compute_cache.end();) { 514 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
323 auto& entry = it->first; 515 std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules;
324 if (entry.shader != invalidated_addr) { 516
325 ++it; 517 const Shader::IR::Program* previous_stage{};
518 Shader::Backend::Bindings binding;
519 for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram;
520 ++index) {
521 if (key.unique_hashes[index] == 0) {
326 continue; 522 continue;
327 } 523 }
328 Finish(); 524 UNIMPLEMENTED_IF(index == 0);
329 it = compute_cache.erase(it); 525
526 Shader::IR::Program& program{programs[index]};
527 const size_t stage_index{index - 1};
528 infos[stage_index] = &program.info;
529
530 const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)};
531 const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)};
532 device.SaveShader(code);
533 modules[stage_index] = BuildShader(device, code);
534 if (device.HasDebuggingToolAttached()) {
535 const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])};
536 modules[stage_index].SetObjectNameEXT(name.c_str());
537 }
538 previous_stage = &program;
330 } 539 }
540 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
541 return std::make_unique<GraphicsPipeline>(
542 maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
543 descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key,
544 std::move(modules), infos);
545
546} catch (const Shader::Exception& exception) {
547 LOG_ERROR(Render_Vulkan, "{}", exception.what());
548 return nullptr;
331} 549}
332 550
333std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 551std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
334VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { 552 GraphicsEnvironments environments;
335 Specialization specialization; 553 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
336 if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
337 float point_size;
338 std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
339 specialization.point_size = point_size;
340 ASSERT(point_size != 0.0f);
341 }
342 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
343 const auto& attribute = fixed_state.attributes[i];
344 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
345 specialization.attribute_types[i] = attribute.Type();
346 }
347 specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
348 specialization.early_fragment_tests = fixed_state.early_z;
349
350 // Alpha test
351 specialization.alpha_test_func =
352 FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
353 specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
354
355 SPIRVProgram program;
356 std::vector<VkDescriptorSetLayoutBinding> bindings;
357 554
358 for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { 555 main_pools.ReleaseContents();
359 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); 556 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
360 // Skip stages that are not enabled 557 if (!pipeline || pipeline_cache_filename.empty()) {
361 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { 558 return pipeline;
362 continue;
363 }
364 const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
365 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
366 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
367
368 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
369 const ShaderType program_type = GetShaderType(program_enum);
370 const auto& entries = shader->GetEntries();
371 program[stage] = {
372 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
373 entries,
374 };
375
376 const u32 old_binding = specialization.base_binding;
377 specialization.base_binding =
378 FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
379 ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
380 } 559 }
381 return {std::move(program), std::move(bindings)}; 560 serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
382} 561 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
383 562 env_ptrs;
384template <VkDescriptorType descriptor_type, class Container> 563 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
385void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, 564 if (key.unique_hashes[index] != 0) {
386 u32& offset, const Container& container) { 565 env_ptrs.push_back(&envs[index]);
387 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); 566 }
388 const u32 count = static_cast<u32>(std::size(container));
389
390 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
391 for (u32 i = 0; i < count; ++i) {
392 const u32 num_samplers = container[i].size;
393 template_entries.push_back({
394 .dstBinding = binding,
395 .dstArrayElement = 0,
396 .descriptorCount = num_samplers,
397 .descriptorType = descriptor_type,
398 .offset = offset,
399 .stride = entry_size,
400 });
401
402 ++binding;
403 offset += num_samplers * entry_size;
404 } 567 }
405 return; 568 SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION);
406 } 569 });
570 return pipeline;
571}
407 572
408 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || 573std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
409 descriptor_type == STORAGE_TEXEL_BUFFER) { 574 const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
410 // Nvidia has a bug where updating multiple texels at once causes the driver to crash. 575 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
411 // Note: Fixed in driver Windows 443.24, Linux 440.66.15 576 const auto& qmd{kepler_compute.launch_description};
412 for (u32 i = 0; i < count; ++i) { 577 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
413 template_entries.push_back({ 578 env.SetCachedSize(shader->size_bytes);
414 .dstBinding = binding + i, 579
415 .dstArrayElement = 0, 580 main_pools.ReleaseContents();
416 .descriptorCount = 1, 581 auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
417 .descriptorType = descriptor_type, 582 if (!pipeline || pipeline_cache_filename.empty()) {
418 .offset = static_cast<std::size_t>(offset + i * entry_size), 583 return pipeline;
419 .stride = entry_size,
420 });
421 }
422 } else if (count > 0) {
423 template_entries.push_back({
424 .dstBinding = binding,
425 .dstArrayElement = 0,
426 .descriptorCount = count,
427 .descriptorType = descriptor_type,
428 .offset = offset,
429 .stride = entry_size,
430 });
431 } 584 }
432 offset += count * entry_size; 585 serialization_thread.QueueWork([this, key, env = std::move(env)] {
433 binding += count; 586 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
587 pipeline_cache_filename, CACHE_VERSION);
588 });
589 return pipeline;
434} 590}
435 591
436void FillDescriptorUpdateTemplateEntries( 592std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
437 const ShaderEntries& entries, u32& binding, u32& offset, 593 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
438 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { 594 bool build_in_parallel) try {
439 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); 595 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
440 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); 596
441 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); 597 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
442 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); 598 auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)};
443 AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); 599 const std::vector<u32> code{EmitSPIRV(profile, program)};
444 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); 600 device.SaveShader(code);
601 vk::ShaderModule spv_module{BuildShader(device, code)};
602 if (device.HasDebuggingToolAttached()) {
603 const auto name{fmt::format("Shader {:016x}", key.unique_hash)};
604 spv_module.SetObjectNameEXT(name.c_str());
605 }
606 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
607 return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue,
608 thread_worker, &shader_notify, program.info,
609 std::move(spv_module));
610
611} catch (const Shader::Exception& exception) {
612 LOG_ERROR(Render_Vulkan, "{}", exception.what());
613 return nullptr;
445} 614}
446 615
447} // namespace Vulkan 616} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 89d635a3d..efe5a7ed8 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,24 +6,28 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <filesystem>
10#include <iosfwd>
9#include <memory> 11#include <memory>
10#include <type_traits> 12#include <type_traits>
11#include <unordered_map> 13#include <unordered_map>
12#include <utility> 14#include <utility>
13#include <vector> 15#include <vector>
14 16
15#include <boost/functional/hash.hpp>
16
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "common/thread_worker.h"
19#include "shader_recompiler/frontend/ir/basic_block.h"
20#include "shader_recompiler/frontend/ir/value.h"
21#include "shader_recompiler/frontend/maxwell/control_flow.h"
22#include "shader_recompiler/host_translate_info.h"
23#include "shader_recompiler/object_pool.h"
24#include "shader_recompiler/profile.h"
19#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 26#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
27#include "video_core/renderer_vulkan/vk_buffer_cache.h"
28#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 29#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
22#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 30#include "video_core/renderer_vulkan/vk_texture_cache.h"
23#include "video_core/shader/async_shaders.h"
24#include "video_core/shader/memory_util.h"
25#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h"
27#include "video_core/shader_cache.h" 31#include "video_core/shader_cache.h"
28#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
29 33
@@ -31,23 +35,24 @@ namespace Core {
31class System; 35class System;
32} 36}
33 37
34namespace Vulkan { 38namespace Shader::IR {
39struct Program;
40}
35 41
36class Device; 42namespace VideoCore {
37class RasterizerVulkan; 43class ShaderNotify;
38class VKComputePipeline; 44}
39class VKDescriptorPool; 45
40class VKScheduler; 46namespace Vulkan {
41class VKUpdateDescriptorQueue;
42 47
43using Maxwell = Tegra::Engines::Maxwell3D::Regs; 48using Maxwell = Tegra::Engines::Maxwell3D::Regs;
44 49
45struct ComputePipelineCacheKey { 50struct ComputePipelineCacheKey {
46 GPUVAddr shader; 51 u64 unique_hash;
47 u32 shared_memory_size; 52 u32 shared_memory_size;
48 std::array<u32, 3> workgroup_size; 53 std::array<u32, 3> workgroup_size;
49 54
50 std::size_t Hash() const noexcept; 55 size_t Hash() const noexcept;
51 56
52 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; 57 bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
53 58
@@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
64namespace std { 69namespace std {
65 70
66template <> 71template <>
67struct hash<Vulkan::GraphicsPipelineCacheKey> {
68 std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
69 return k.Hash();
70 }
71};
72
73template <>
74struct hash<Vulkan::ComputePipelineCacheKey> { 72struct hash<Vulkan::ComputePipelineCacheKey> {
75 std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { 73 size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
76 return k.Hash(); 74 return k.Hash();
77 } 75 }
78}; 76};
@@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
81 79
82namespace Vulkan { 80namespace Vulkan {
83 81
84class Shader { 82class ComputePipeline;
85public: 83class Device;
86 explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, 84class DescriptorPool;
87 Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, 85class RasterizerVulkan;
88 VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); 86class RenderPassCache;
89 ~Shader(); 87class VKScheduler;
90 88class VKUpdateDescriptorQueue;
91 GPUVAddr GetGpuAddr() const {
92 return gpu_addr;
93 }
94
95 VideoCommon::Shader::ShaderIR& GetIR() {
96 return shader_ir;
97 }
98
99 const VideoCommon::Shader::ShaderIR& GetIR() const {
100 return shader_ir;
101 }
102 89
103 const VideoCommon::Shader::Registry& GetRegistry() const { 90using VideoCommon::ShaderInfo;
104 return registry;
105 }
106 91
107 const ShaderEntries& GetEntries() const { 92struct ShaderPools {
108 return entries; 93 void ReleaseContents() {
94 flow_block.ReleaseContents();
95 block.ReleaseContents();
96 inst.ReleaseContents();
109 } 97 }
110 98
111private: 99 Shader::ObjectPool<Shader::IR::Inst> inst;
112 GPUVAddr gpu_addr{}; 100 Shader::ObjectPool<Shader::IR::Block> block;
113 VideoCommon::Shader::ProgramCode program_code; 101 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
114 VideoCommon::Shader::Registry registry;
115 VideoCommon::Shader::ShaderIR shader_ir;
116 ShaderEntries entries;
117}; 102};
118 103
119class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { 104class PipelineCache : public VideoCommon::ShaderCache {
120public: 105public:
121 explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 106 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
122 Tegra::Engines::Maxwell3D& maxwell3d, 107 Tegra::Engines::KeplerCompute& kepler_compute,
123 Tegra::Engines::KeplerCompute& kepler_compute, 108 Tegra::MemoryManager& gpu_memory, const Device& device,
124 Tegra::MemoryManager& gpu_memory, const Device& device, 109 VKScheduler& scheduler, DescriptorPool& descriptor_pool,
125 VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, 110 VKUpdateDescriptorQueue& update_descriptor_queue,
126 VKUpdateDescriptorQueue& update_descriptor_queue); 111 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
127 ~VKPipelineCache() override; 112 TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
113 ~PipelineCache();
114
115 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
128 116
129 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 117 [[nodiscard]] ComputePipeline* CurrentComputePipeline();
130 118
131 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, 119 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
132 u32 num_color_buffers, 120 const VideoCore::DiskResourceLoadCallback& callback);
133 VideoCommon::Shader::AsyncShaders& async_shaders);
134 121
135 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 122private:
123 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath();
136 124
137 void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); 125 [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept;
138 126
139protected: 127 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
140 void OnShaderRemoval(Shader* shader) final;
141 128
142private: 129 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
143 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 130 ShaderPools& pools, const GraphicsPipelineCacheKey& key,
144 const FixedPipelineState& fixed_state); 131 std::span<Shader::Environment* const> envs, bool build_in_parallel);
145 132
146 Tegra::GPU& gpu; 133 std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key,
147 Tegra::Engines::Maxwell3D& maxwell3d; 134 const ShaderInfo* shader);
148 Tegra::Engines::KeplerCompute& kepler_compute; 135
149 Tegra::MemoryManager& gpu_memory; 136 std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
137 const ComputePipelineCacheKey& key,
138 Shader::Environment& env,
139 bool build_in_parallel);
150 140
151 const Device& device; 141 const Device& device;
152 VKScheduler& scheduler; 142 VKScheduler& scheduler;
153 VKDescriptorPool& descriptor_pool; 143 DescriptorPool& descriptor_pool;
154 VKUpdateDescriptorQueue& update_descriptor_queue; 144 VKUpdateDescriptorQueue& update_descriptor_queue;
145 RenderPassCache& render_pass_cache;
146 BufferCache& buffer_cache;
147 TextureCache& texture_cache;
148 VideoCore::ShaderNotify& shader_notify;
149 bool use_asynchronous_shaders{};
155 150
156 std::unique_ptr<Shader> null_shader; 151 GraphicsPipelineCacheKey graphics_key{};
157 std::unique_ptr<Shader> null_kernel; 152 GraphicsPipeline* current_pipeline{};
158 153
159 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; 154 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
155 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
160 156
161 GraphicsPipelineCacheKey last_graphics_key; 157 ShaderPools main_pools;
162 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
163 158
164 std::mutex pipeline_cache; 159 Shader::Profile profile;
165 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> 160 Shader::HostTranslateInfo host_info;
166 graphics_cache;
167 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
168};
169 161
170void FillDescriptorUpdateTemplateEntries( 162 std::filesystem::path pipeline_cache_filename;
171 const ShaderEntries& entries, u32& binding, u32& offset, 163
172 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); 164 Common::ThreadWorker workers;
165 Common::ThreadWorker serialization_thread;
166};
173 167
174} // namespace Vulkan 168} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 7cadd5147..c9cb32d71 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -114,14 +114,10 @@ void HostCounter::EndQuery() {
114} 114}
115 115
116u64 HostCounter::BlockingQuery() const { 116u64 HostCounter::BlockingQuery() const {
117 if (tick >= cache.GetScheduler().CurrentTick()) { 117 cache.GetScheduler().Wait(tick);
118 cache.GetScheduler().Flush();
119 }
120
121 u64 data; 118 u64 data;
122 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( 119 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
123 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 120 query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT);
124 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
125 121
126 switch (query_result) { 122 switch (query_result) {
127 case VK_SUCCESS: 123 case VK_SUCCESS:
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index f57c15b37..c7a07fdd8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -24,7 +24,6 @@
24#include "video_core/renderer_vulkan/vk_buffer_cache.h" 24#include "video_core/renderer_vulkan/vk_buffer_cache.h"
25#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 25#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
26#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 26#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
27#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
28#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 27#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
29#include "video_core/renderer_vulkan/vk_rasterizer.h" 28#include "video_core/renderer_vulkan/vk_rasterizer.h"
30#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -55,11 +54,10 @@ struct DrawParams {
55 u32 num_instances; 54 u32 num_instances;
56 u32 base_vertex; 55 u32 base_vertex;
57 u32 num_vertices; 56 u32 num_vertices;
57 u32 first_index;
58 bool is_indexed; 58 bool is_indexed;
59}; 59};
60 60
61constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute);
62
63VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { 61VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) {
64 const auto& src = regs.viewport_transform[index]; 62 const auto& src = regs.viewport_transform[index];
65 const float width = src.scale_x * 2.0f; 63 const float width = src.scale_x * 2.0f;
@@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
97 return scissor; 95 return scissor;
98} 96}
99 97
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
105 }
106 return addresses;
107}
108
109struct TextureHandle {
110 constexpr TextureHandle(u32 data, bool via_header_index) {
111 const Tegra::Texture::TextureHandle handle{data};
112 image = handle.tic_id;
113 sampler = via_header_index ? image : handle.tsc_id.Value();
114 }
115
116 u32 image;
117 u32 sampler;
118};
119
120template <typename Engine, typename Entry>
121TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
122 size_t stage, size_t index = 0) {
123 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
124 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
125 if (entry.is_separated) {
126 const u32 buffer_1 = entry.buffer;
127 const u32 buffer_2 = entry.secondary_buffer;
128 const u32 offset_1 = entry.offset;
129 const u32 offset_2 = entry.secondary_offset;
130 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
131 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
132 return TextureHandle(handle_1 | handle_2, via_header_index);
133 }
134 }
135 if (entry.is_bindless) {
136 const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
137 return TextureHandle(raw, via_header_index);
138 }
139 const u32 buffer = engine.GetBoundBuffer();
140 const u64 offset = (entry.offset + index) * sizeof(u32);
141 return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
142}
143
144ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
145 if (entry.is_buffer) {
146 return ImageViewType::e2D;
147 }
148 switch (entry.type) {
149 case Tegra::Shader::TextureType::Texture1D:
150 return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
151 case Tegra::Shader::TextureType::Texture2D:
152 return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
153 case Tegra::Shader::TextureType::Texture3D:
154 return ImageViewType::e3D;
155 case Tegra::Shader::TextureType::TextureCube:
156 return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
157 }
158 UNREACHABLE();
159 return ImageViewType::e2D;
160}
161
162ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
163 switch (entry.type) {
164 case Tegra::Shader::ImageType::Texture1D:
165 return ImageViewType::e1D;
166 case Tegra::Shader::ImageType::Texture1DArray:
167 return ImageViewType::e1DArray;
168 case Tegra::Shader::ImageType::Texture2D:
169 return ImageViewType::e2D;
170 case Tegra::Shader::ImageType::Texture2DArray:
171 return ImageViewType::e2DArray;
172 case Tegra::Shader::ImageType::Texture3D:
173 return ImageViewType::e3D;
174 case Tegra::Shader::ImageType::TextureBuffer:
175 return ImageViewType::Buffer;
176 }
177 UNREACHABLE();
178 return ImageViewType::e2D;
179}
180
181void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
184 for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
185 const ImageViewId image_view_id = *image_view_id_ptr++;
186 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
187 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
188 }
189 for (const auto& entry : entries.samplers) {
190 for (size_t i = 0; i < entry.size; ++i) {
191 const VkSampler sampler = *sampler_ptr++;
192 const ImageViewId image_view_id = *image_view_id_ptr++;
193 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
194 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
195 update_descriptor_queue.AddSampledImage(handle, sampler);
196 }
197 }
198 for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
199 const ImageViewId image_view_id = *image_view_id_ptr++;
200 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
201 update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
202 }
203 for (const auto& entry : entries.images) {
204 // TODO: Mark as modified
205 const ImageViewId image_view_id = *image_view_id_ptr++;
206 const ImageView& image_view = texture_cache.GetImageView(image_view_id);
207 const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
208 update_descriptor_queue.AddImage(handle);
209 }
210}
211
212DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, 98DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
213 bool is_indexed) { 99 bool is_indexed) {
214 DrawParams params{ 100 DrawParams params{
@@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
216 .num_instances = is_instanced ? num_instances : 1, 102 .num_instances = is_instanced ? num_instances : 1,
217 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, 103 .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first,
218 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, 104 .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count,
105 .first_index = is_indexed ? regs.index_array.first : 0,
219 .is_indexed = is_indexed, 106 .is_indexed = is_indexed,
220 }; 107 };
221 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { 108 if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
@@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
243 blit_image(device, scheduler, state_tracker, descriptor_pool), 130 blit_image(device, scheduler, state_tracker, descriptor_pool),
244 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, 131 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
245 memory_allocator), 132 memory_allocator),
246 texture_cache_runtime{device, scheduler, memory_allocator, 133 render_pass_cache(device), texture_cache_runtime{device, scheduler,
247 staging_pool, blit_image, astc_decoder_pass}, 134 memory_allocator, staging_pool,
135 blit_image, astc_decoder_pass,
136 render_pass_cache},
248 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 137 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
249 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 138 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
250 update_descriptor_queue, descriptor_pool), 139 update_descriptor_queue, descriptor_pool),
251 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 140 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
252 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 141 pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
253 descriptor_pool, update_descriptor_queue), 142 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
143 texture_cache, gpu.ShaderNotify()),
254 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, 144 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
255 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 145 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
256 wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { 146 wfi_event(device.GetLogical().CreateEvent()) {
257 scheduler.SetQueryCache(query_cache); 147 scheduler.SetQueryCache(query_cache);
258 if (device.UseAsynchronousShaders()) {
259 async_shaders.AllocateWorkers();
260 }
261} 148}
262 149
263RasterizerVulkan::~RasterizerVulkan() = default; 150RasterizerVulkan::~RasterizerVulkan() = default;
@@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
270 157
271 query_cache.UpdateCounters(); 158 query_cache.UpdateCounters();
272 159
273 graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); 160 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
274 161 if (!pipeline) {
275 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
276
277 texture_cache.SynchronizeGraphicsDescriptors();
278 texture_cache.UpdateRenderTargets(false);
279
280 const auto shaders = pipeline_cache.GetShaders();
281 graphics_key.shaders = GetShaderAddresses(shaders);
282
283 SetupShaderDescriptors(shaders, is_indexed);
284
285 const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
286 graphics_key.renderpass = framebuffer->RenderPass();
287
288 VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
289 graphics_key, framebuffer->NumColorBuffers(), async_shaders);
290 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
291 // Async graphics pipeline was not ready.
292 return; 162 return;
293 } 163 }
164 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
165 pipeline->Configure(is_indexed);
294 166
295 BeginTransformFeedback(); 167 BeginTransformFeedback();
296 168
297 scheduler.RequestRenderpass(framebuffer);
298 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
299 UpdateDynamicStates(); 169 UpdateDynamicStates();
300 170
301 const auto& regs = maxwell3d.regs; 171 const auto& regs{maxwell3d.regs};
302 const u32 num_instances = maxwell3d.mme_draw.instance_count; 172 const u32 num_instances{maxwell3d.mme_draw.instance_count};
303 const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); 173 const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
304 const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); 174 scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
305 const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
306 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
307 if (descriptor_set) {
308 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
309 DESCRIPTOR_SET, descriptor_set, nullptr);
310 }
311 if (draw_params.is_indexed) { 175 if (draw_params.is_indexed) {
312 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, 176 cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
313 draw_params.base_vertex, draw_params.base_instance); 177 draw_params.first_index, draw_params.base_vertex,
178 draw_params.base_instance);
314 } else { 179 } else {
315 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, 180 cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
316 draw_params.base_vertex, draw_params.base_instance); 181 draw_params.base_vertex, draw_params.base_instance);
317 } 182 }
318 }); 183 });
319
320 EndTransformFeedback(); 184 EndTransformFeedback();
321} 185}
322 186
@@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() {
326 if (!maxwell3d.ShouldExecute()) { 190 if (!maxwell3d.ShouldExecute()) {
327 return; 191 return;
328 } 192 }
193 FlushWork();
329 194
330 query_cache.UpdateCounters(); 195 query_cache.UpdateCounters();
331 196
@@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() {
395 }); 260 });
396} 261}
397 262
398void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 263void RasterizerVulkan::DispatchCompute() {
399 MICROPROFILE_SCOPE(Vulkan_Compute); 264 FlushWork();
400
401 query_cache.UpdateCounters();
402 265
403 const auto& launch_desc = kepler_compute.launch_description; 266 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
404 auto& pipeline = pipeline_cache.GetComputePipeline({ 267 if (!pipeline) {
405 .shader = code_addr, 268 return;
406 .shared_memory_size = launch_desc.shared_alloc, 269 }
407 .workgroup_size{ 270 std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
408 launch_desc.block_dim_x, 271 pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
409 launch_desc.block_dim_y,
410 launch_desc.block_dim_z,
411 },
412 });
413 272
414 // Compute dispatches can't be executed inside a renderpass 273 const auto& qmd{kepler_compute.launch_description};
274 const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
415 scheduler.RequestOutsideRenderPassOperationContext(); 275 scheduler.RequestOutsideRenderPassOperationContext();
416 276 scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
417 image_view_indices.clear();
418 sampler_handles.clear();
419
420 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
421
422 const auto& entries = pipeline.GetEntries();
423 buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
424 buffer_cache.UnbindComputeStorageBuffers();
425 u32 ssbo_index = 0;
426 for (const auto& buffer : entries.global_buffers) {
427 buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
428 buffer.is_written);
429 ++ssbo_index;
430 }
431 buffer_cache.UpdateComputeBuffers();
432
433 texture_cache.SynchronizeComputeDescriptors();
434
435 SetupComputeUniformTexels(entries);
436 SetupComputeTextures(entries);
437 SetupComputeStorageTexels(entries);
438 SetupComputeImages(entries);
439
440 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
441 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
442
443 update_descriptor_queue.Acquire();
444
445 buffer_cache.BindHostComputeBuffers();
446
447 ImageViewId* image_view_id_ptr = image_view_ids.data();
448 VkSampler* sampler_ptr = sampler_handles.data();
449 PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
450 sampler_ptr);
451
452 const VkPipeline pipeline_handle = pipeline.GetHandle();
453 const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
454 const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
455 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
456 grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
457 descriptor_set](vk::CommandBuffer cmdbuf) {
458 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
459 if (descriptor_set) {
460 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
461 DESCRIPTOR_SET, descriptor_set, nullptr);
462 }
463 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
464 });
465} 277}
466 278
467void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 279void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() {
626 438
627void RasterizerVulkan::FragmentBarrier() { 439void RasterizerVulkan::FragmentBarrier() {
628 // We already put barriers when a render pass finishes 440 // We already put barriers when a render pass finishes
441 scheduler.RequestOutsideRenderPassOperationContext();
629} 442}
630 443
631void RasterizerVulkan::TiledCacheBarrier() { 444void RasterizerVulkan::TiledCacheBarrier() {
@@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() {
633} 446}
634 447
635void RasterizerVulkan::FlushCommands() { 448void RasterizerVulkan::FlushCommands() {
636 if (draw_counter > 0) { 449 if (draw_counter == 0) {
637 draw_counter = 0; 450 return;
638 scheduler.Flush();
639 } 451 }
452 draw_counter = 0;
453 scheduler.Flush();
640} 454}
641 455
642void RasterizerVulkan::TickFrame() { 456void RasterizerVulkan::TickFrame() {
@@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
676 if (!image_view) { 490 if (!image_view) {
677 return false; 491 return false;
678 } 492 }
679 screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); 493 screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D);
680 screen_info.width = image_view->size.width; 494 screen_info.width = image_view->size.width;
681 screen_info.height = image_view->size.height; 495 screen_info.height = image_view->size.height;
682 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 496 screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
683 return true; 497 return true;
684} 498}
685 499
500void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
501 const VideoCore::DiskResourceLoadCallback& callback) {
502 pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
503}
504
686void RasterizerVulkan::FlushWork() { 505void RasterizerVulkan::FlushWork() {
687 static constexpr u32 DRAWS_TO_DISPATCH = 4096; 506 static constexpr u32 DRAWS_TO_DISPATCH = 4096;
688 507
@@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() {
691 if ((++draw_counter & 7) != 7) { 510 if ((++draw_counter & 7) != 7) {
692 return; 511 return;
693 } 512 }
694
695 if (draw_counter < DRAWS_TO_DISPATCH) { 513 if (draw_counter < DRAWS_TO_DISPATCH) {
696 // Send recorded tasks to the worker thread 514 // Send recorded tasks to the worker thread
697 scheduler.DispatchWork(); 515 scheduler.DispatchWork();
698 return; 516 return;
699 } 517 }
700
701 // Otherwise (every certain number of draws) flush execution. 518 // Otherwise (every certain number of draws) flush execution.
702 // This submits commands to the Vulkan driver. 519 // This submits commands to the Vulkan driver.
703 scheduler.Flush(); 520 scheduler.Flush();
@@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
716 return buffer_cache.DMACopy(src_address, dest_address, amount); 533 return buffer_cache.DMACopy(src_address, dest_address, amount);
717} 534}
718 535
719void RasterizerVulkan::SetupShaderDescriptors(
720 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
721 image_view_indices.clear();
722 sampler_handles.clear();
723 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
724 Shader* const shader = shaders[stage + 1];
725 if (!shader) {
726 continue;
727 }
728 const ShaderEntries& entries = shader->GetEntries();
729 SetupGraphicsUniformTexels(entries, stage);
730 SetupGraphicsTextures(entries, stage);
731 SetupGraphicsStorageTexels(entries, stage);
732 SetupGraphicsImages(entries, stage);
733
734 buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
735 buffer_cache.UnbindGraphicsStorageBuffers(stage);
736 u32 ssbo_index = 0;
737 for (const auto& buffer : entries.global_buffers) {
738 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
739 buffer.cbuf_offset, buffer.is_written);
740 ++ssbo_index;
741 }
742 }
743 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
744 buffer_cache.UpdateGraphicsBuffers(is_indexed);
745 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
746
747 buffer_cache.BindHostGeometryBuffers(is_indexed);
748
749 update_descriptor_queue.Acquire();
750
751 ImageViewId* image_view_id_ptr = image_view_ids.data();
752 VkSampler* sampler_ptr = sampler_handles.data();
753 for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
754 // Skip VertexA stage
755 Shader* const shader = shaders[stage + 1];
756 if (!shader) {
757 continue;
758 }
759 buffer_cache.BindHostStageBuffers(stage);
760 PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
761 image_view_id_ptr, sampler_ptr);
762 }
763}
764
765void RasterizerVulkan::UpdateDynamicStates() { 536void RasterizerVulkan::UpdateDynamicStates() {
766 auto& regs = maxwell3d.regs; 537 auto& regs = maxwell3d.regs;
767 UpdateViewportsState(regs); 538 UpdateViewportsState(regs);
@@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
770 UpdateBlendConstants(regs); 541 UpdateBlendConstants(regs);
771 UpdateDepthBounds(regs); 542 UpdateDepthBounds(regs);
772 UpdateStencilFaces(regs); 543 UpdateStencilFaces(regs);
544 UpdateLineWidth(regs);
773 if (device.IsExtExtendedDynamicStateSupported()) { 545 if (device.IsExtExtendedDynamicStateSupported()) {
774 UpdateCullMode(regs); 546 UpdateCullMode(regs);
775 UpdateDepthBoundsTestEnable(regs); 547 UpdateDepthBoundsTestEnable(regs);
@@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() {
779 UpdateFrontFace(regs); 551 UpdateFrontFace(regs);
780 UpdateStencilOp(regs); 552 UpdateStencilOp(regs);
781 UpdateStencilTestEnable(regs); 553 UpdateStencilTestEnable(regs);
554 if (device.IsExtVertexInputDynamicStateSupported()) {
555 UpdateVertexInput(regs);
556 }
782 } 557 }
783} 558}
784 559
@@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() {
810 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 585 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
811} 586}
812 587
813void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
814 const auto& regs = maxwell3d.regs;
815 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
816 for (const auto& entry : entries.uniform_texels) {
817 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
818 image_view_indices.push_back(handle.image);
819 }
820}
821
822void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
823 const auto& regs = maxwell3d.regs;
824 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
825 for (const auto& entry : entries.samplers) {
826 for (size_t index = 0; index < entry.size; ++index) {
827 const TextureHandle handle =
828 GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
829 image_view_indices.push_back(handle.image);
830
831 Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
832 sampler_handles.push_back(sampler->Handle());
833 }
834 }
835}
836
837void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
838 const auto& regs = maxwell3d.regs;
839 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
840 for (const auto& entry : entries.storage_texels) {
841 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
842 image_view_indices.push_back(handle.image);
843 }
844}
845
846void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
847 const auto& regs = maxwell3d.regs;
848 const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
849 for (const auto& entry : entries.images) {
850 const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
851 image_view_indices.push_back(handle.image);
852 }
853}
854
855void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
856 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
857 for (const auto& entry : entries.uniform_texels) {
858 const TextureHandle handle =
859 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
860 image_view_indices.push_back(handle.image);
861 }
862}
863
864void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
865 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
866 for (const auto& entry : entries.samplers) {
867 for (size_t index = 0; index < entry.size; ++index) {
868 const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
869 COMPUTE_SHADER_INDEX, index);
870 image_view_indices.push_back(handle.image);
871
872 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
873 sampler_handles.push_back(sampler->Handle());
874 }
875 }
876}
877
878void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
879 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
880 for (const auto& entry : entries.storage_texels) {
881 const TextureHandle handle =
882 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
883 image_view_indices.push_back(handle.image);
884 }
885}
886
887void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
888 const bool via_header_index = kepler_compute.launch_description.linked_tsc;
889 for (const auto& entry : entries.images) {
890 const TextureHandle handle =
891 GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
892 image_view_indices.push_back(handle.image);
893 }
894}
895
896void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { 588void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
897 if (!state_tracker.TouchViewports()) { 589 if (!state_tracker.TouchViewports()) {
898 return; 590 return;
@@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
985 } 677 }
986} 678}
987 679
680void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
681 if (!state_tracker.TouchLineWidth()) {
682 return;
683 }
684 const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased;
685 scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
686}
687
988void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { 688void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
989 if (!state_tracker.TouchCullMode()) { 689 if (!state_tracker.TouchCullMode()) {
990 return; 690 return;
@@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re
999 if (!state_tracker.TouchDepthBoundsTestEnable()) { 699 if (!state_tracker.TouchDepthBoundsTestEnable()) {
1000 return; 700 return;
1001 } 701 }
702 bool enabled = regs.depth_bounds_enable;
703 if (enabled && !device.IsDepthBoundsSupported()) {
704 LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
705 enabled = false;
706 }
1002 scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { 707 scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) {
1003 cmdbuf.SetDepthBoundsTestEnableEXT(enable); 708 cmdbuf.SetDepthBoundsTestEnableEXT(enable);
1004 }); 709 });
@@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
1086 }); 791 });
1087} 792}
1088 793
794void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
795 auto& dirty{maxwell3d.dirty.flags};
796 if (!dirty[Dirty::VertexInput]) {
797 return;
798 }
799 dirty[Dirty::VertexInput] = false;
800
801 boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
802 boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
803
804 // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
805 // generating dirty state. Track the highest dirty attribute and update all attributes until
806 // that one.
807 size_t highest_dirty_attr{};
808 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
809 if (dirty[Dirty::VertexAttribute0 + index]) {
810 highest_dirty_attr = index;
811 }
812 }
813 for (size_t index = 0; index < highest_dirty_attr; ++index) {
814 const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
815 const u32 binding{attribute.buffer};
816 dirty[Dirty::VertexAttribute0 + index] = false;
817 dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
818 if (!attribute.constant) {
819 attributes.push_back({
820 .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
821 .pNext = nullptr,
822 .location = static_cast<u32>(index),
823 .binding = binding,
824 .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size),
825 .offset = attribute.offset,
826 });
827 }
828 }
829 for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
830 if (!dirty[Dirty::VertexBinding0 + index]) {
831 continue;
832 }
833 dirty[Dirty::VertexBinding0 + index] = false;
834
835 const u32 binding{static_cast<u32>(index)};
836 const auto& input_binding{regs.vertex_array[binding]};
837 const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)};
838 bindings.push_back({
839 .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
840 .pNext = nullptr,
841 .binding = binding,
842 .stride = input_binding.stride,
843 .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
844 .divisor = is_instanced ? input_binding.divisor : 1,
845 });
846 }
847 scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
848 cmdbuf.SetVertexInputEXT(bindings, attributes);
849 });
850}
851
1089} // namespace Vulkan 852} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 2065209be..866827247 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,14 +21,13 @@
21#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 22#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
23#include "video_core/renderer_vulkan/vk_fence_manager.h" 23#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 24#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 25#include "video_core/renderer_vulkan/vk_query_cache.h"
26#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 27#include "video_core/renderer_vulkan/vk_scheduler.h"
28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 28#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
29#include "video_core/renderer_vulkan/vk_texture_cache.h" 29#include "video_core/renderer_vulkan/vk_texture_cache.h"
30#include "video_core/renderer_vulkan/vk_update_descriptor.h" 30#include "video_core/renderer_vulkan/vk_update_descriptor.h"
31#include "video_core/shader/async_shaders.h"
32#include "video_core/vulkan_common/vulkan_memory_allocator.h" 31#include "video_core/vulkan_common/vulkan_memory_allocator.h"
33#include "video_core/vulkan_common/vulkan_wrapper.h" 32#include "video_core/vulkan_common/vulkan_wrapper.h"
34 33
@@ -73,7 +72,7 @@ public:
73 72
74 void Draw(bool is_indexed, bool is_instanced) override; 73 void Draw(bool is_indexed, bool is_instanced) override;
75 void Clear() override; 74 void Clear() override;
76 void DispatchCompute(GPUVAddr code_addr) override; 75 void DispatchCompute() override;
77 void ResetCounter(VideoCore::QueryType type) override; 76 void ResetCounter(VideoCore::QueryType type) override;
78 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 77 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
79 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 78 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@@ -102,19 +101,8 @@ public:
102 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; 101 Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override;
103 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 102 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
104 u32 pixel_stride) override; 103 u32 pixel_stride) override;
105 104 void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
106 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { 105 const VideoCore::DiskResourceLoadCallback& callback) override;
107 return async_shaders;
108 }
109
110 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
111 return async_shaders;
112 }
113
114 /// Maximum supported size that a constbuffer can have in bytes.
115 static constexpr size_t MaxConstbufferSize = 0x10000;
116 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
117 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
118 106
119private: 107private:
120 static constexpr size_t MAX_TEXTURES = 192; 108 static constexpr size_t MAX_TEXTURES = 192;
@@ -125,46 +113,19 @@ private:
125 113
126 void FlushWork(); 114 void FlushWork();
127 115
128 /// Setup descriptors in the graphics pipeline.
129 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
130 bool is_indexed);
131
132 void UpdateDynamicStates(); 116 void UpdateDynamicStates();
133 117
134 void BeginTransformFeedback(); 118 void BeginTransformFeedback();
135 119
136 void EndTransformFeedback(); 120 void EndTransformFeedback();
137 121
138 /// Setup uniform texels in the graphics pipeline.
139 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
140
141 /// Setup textures in the graphics pipeline.
142 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
143
144 /// Setup storage texels in the graphics pipeline.
145 void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
146
147 /// Setup images in the graphics pipeline.
148 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
149
150 /// Setup texel buffers in the compute pipeline.
151 void SetupComputeUniformTexels(const ShaderEntries& entries);
152
153 /// Setup textures in the compute pipeline.
154 void SetupComputeTextures(const ShaderEntries& entries);
155
156 /// Setup storage texels in the compute pipeline.
157 void SetupComputeStorageTexels(const ShaderEntries& entries);
158
159 /// Setup images in the compute pipeline.
160 void SetupComputeImages(const ShaderEntries& entries);
161
162 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 122 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
163 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); 123 void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
164 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); 124 void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
165 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); 125 void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs);
166 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); 126 void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs);
167 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); 127 void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs);
128 void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs);
168 129
169 void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); 130 void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs);
170 void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 131 void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -175,6 +136,8 @@ private:
175 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); 136 void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
176 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); 137 void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
177 138
139 void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
140
178 Tegra::GPU& gpu; 141 Tegra::GPU& gpu;
179 Tegra::MemoryManager& gpu_memory; 142 Tegra::MemoryManager& gpu_memory;
180 Tegra::Engines::Maxwell3D& maxwell3d; 143 Tegra::Engines::Maxwell3D& maxwell3d;
@@ -187,24 +150,22 @@ private:
187 VKScheduler& scheduler; 150 VKScheduler& scheduler;
188 151
189 StagingBufferPool staging_pool; 152 StagingBufferPool staging_pool;
190 VKDescriptorPool descriptor_pool; 153 DescriptorPool descriptor_pool;
191 VKUpdateDescriptorQueue update_descriptor_queue; 154 VKUpdateDescriptorQueue update_descriptor_queue;
192 BlitImageHelper blit_image; 155 BlitImageHelper blit_image;
193 ASTCDecoderPass astc_decoder_pass; 156 ASTCDecoderPass astc_decoder_pass;
194 157 RenderPassCache render_pass_cache;
195 GraphicsPipelineCacheKey graphics_key;
196 158
197 TextureCacheRuntime texture_cache_runtime; 159 TextureCacheRuntime texture_cache_runtime;
198 TextureCache texture_cache; 160 TextureCache texture_cache;
199 BufferCacheRuntime buffer_cache_runtime; 161 BufferCacheRuntime buffer_cache_runtime;
200 BufferCache buffer_cache; 162 BufferCache buffer_cache;
201 VKPipelineCache pipeline_cache; 163 PipelineCache pipeline_cache;
202 VKQueryCache query_cache; 164 VKQueryCache query_cache;
203 AccelerateDMA accelerate_dma; 165 AccelerateDMA accelerate_dma;
204 VKFenceManager fence_manager; 166 VKFenceManager fence_manager;
205 167
206 vk::Event wfi_event; 168 vk::Event wfi_event;
207 VideoCommon::Shader::AsyncShaders async_shaders;
208 169
209 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; 170 boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
210 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; 171 std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
new file mode 100644
index 000000000..451ffe019
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <unordered_map>
6
7#include <boost/container/static_vector.hpp>
8
9#include "video_core/renderer_vulkan/maxwell_to_vk.h"
10#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
11#include "video_core/surface.h"
12#include "video_core/vulkan_common/vulkan_device.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14
15namespace Vulkan {
16namespace {
17using VideoCore::Surface::PixelFormat;
18
19VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
20 VkSampleCountFlagBits samples) {
21 using MaxwellToVK::SurfaceFormat;
22 return {
23 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
24 .format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
25 .samples = samples,
26 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
27 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
28 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
29 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
30 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
31 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
32 };
33}
34} // Anonymous namespace
35
36RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {}
37
38VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
39 std::lock_guard lock{mutex};
40 const auto [pair, is_new] = cache.try_emplace(key);
41 if (!is_new) {
42 return *pair->second;
43 }
44 boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
45 std::array<VkAttachmentReference, 8> references{};
46 u32 num_attachments{};
47 u32 num_colors{};
48 for (size_t index = 0; index < key.color_formats.size(); ++index) {
49 const PixelFormat format{key.color_formats[index]};
50 const bool is_valid{format != PixelFormat::Invalid};
51 references[index] = VkAttachmentReference{
52 .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
53 .layout = VK_IMAGE_LAYOUT_GENERAL,
54 };
55 if (is_valid) {
56 descriptions.push_back(AttachmentDescription(*device, format, key.samples));
57 num_attachments = static_cast<u32>(index + 1);
58 ++num_colors;
59 }
60 }
61 const bool has_depth{key.depth_format != PixelFormat::Invalid};
62 VkAttachmentReference depth_reference{};
63 if (key.depth_format != PixelFormat::Invalid) {
64 depth_reference = VkAttachmentReference{
65 .attachment = num_colors,
66 .layout = VK_IMAGE_LAYOUT_GENERAL,
67 };
68 descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
69 }
70 const VkSubpassDescription subpass{
71 .flags = 0,
72 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
73 .inputAttachmentCount = 0,
74 .pInputAttachments = nullptr,
75 .colorAttachmentCount = num_attachments,
76 .pColorAttachments = references.data(),
77 .pResolveAttachments = nullptr,
78 .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr,
79 .preserveAttachmentCount = 0,
80 .pPreserveAttachments = nullptr,
81 };
82 pair->second = device->GetLogical().CreateRenderPass({
83 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
84 .pNext = nullptr,
85 .flags = 0,
86 .attachmentCount = static_cast<u32>(descriptions.size()),
87 .pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
88 .subpassCount = 1,
89 .pSubpasses = &subpass,
90 .dependencyCount = 0,
91 .pDependencies = nullptr,
92 });
93 return *pair->second;
94}
95
96} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
new file mode 100644
index 000000000..eaa0ed775
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <unordered_map>
9
10#include "video_core/surface.h"
11#include "video_core/vulkan_common/vulkan_wrapper.h"
12
13namespace Vulkan {
14
15struct RenderPassKey {
16 auto operator<=>(const RenderPassKey&) const noexcept = default;
17
18 std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
19 VideoCore::Surface::PixelFormat depth_format;
20 VkSampleCountFlagBits samples;
21};
22
23} // namespace Vulkan
24
25namespace std {
26template <>
27struct hash<Vulkan::RenderPassKey> {
28 [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
29 size_t value = static_cast<size_t>(key.depth_format) << 48;
30 value ^= static_cast<size_t>(key.samples) << 52;
31 for (size_t i = 0; i < key.color_formats.size(); ++i) {
32 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
33 }
34 return value;
35 }
36};
37} // namespace std
38
39namespace Vulkan {
40
41class Device;
42
43class RenderPassCache {
44public:
45 explicit RenderPassCache(const Device& device_);
46
47 VkRenderPass Get(const RenderPassKey& key);
48
49private:
50 const Device* device{};
51 std::unordered_map<RenderPassKey, vk::RenderPass> cache;
52 std::mutex mutex;
53};
54
55} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
index a8bf7bda8..2dd514968 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -10,18 +10,16 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) 12ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
13 : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} 13 : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
14
15ResourcePool::~ResourcePool() = default;
16 14
17size_t ResourcePool::CommitResource() { 15size_t ResourcePool::CommitResource() {
18 // Refresh semaphore to query updated results 16 // Refresh semaphore to query updated results
19 master_semaphore.Refresh(); 17 master_semaphore->Refresh();
20 const u64 gpu_tick = master_semaphore.KnownGpuTick(); 18 const u64 gpu_tick = master_semaphore->KnownGpuTick();
21 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { 19 const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> {
22 for (size_t iterator = begin; iterator < end; ++iterator) { 20 for (size_t iterator = begin; iterator < end; ++iterator) {
23 if (gpu_tick >= ticks[iterator]) { 21 if (gpu_tick >= ticks[iterator]) {
24 ticks[iterator] = master_semaphore.CurrentTick(); 22 ticks[iterator] = master_semaphore->CurrentTick();
25 return iterator; 23 return iterator;
26 } 24 }
27 } 25 }
@@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() {
36 // Both searches failed, the pool is full; handle it. 34 // Both searches failed, the pool is full; handle it.
37 const size_t free_resource = ManageOverflow(); 35 const size_t free_resource = ManageOverflow();
38 36
39 ticks[free_resource] = master_semaphore.CurrentTick(); 37 ticks[free_resource] = master_semaphore->CurrentTick();
40 found = free_resource; 38 found = free_resource;
41 } 39 }
42 } 40 }
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
index 9d0bb3b4d..f0b80ad59 100644
--- a/src/video_core/renderer_vulkan/vk_resource_pool.h
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -18,8 +18,16 @@ class MasterSemaphore;
18 */ 18 */
19class ResourcePool { 19class ResourcePool {
20public: 20public:
21 explicit ResourcePool() = default;
21 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); 22 explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
22 virtual ~ResourcePool(); 23
24 virtual ~ResourcePool() = default;
25
26 ResourcePool& operator=(ResourcePool&&) noexcept = default;
27 ResourcePool(ResourcePool&&) noexcept = default;
28
29 ResourcePool& operator=(const ResourcePool&) = default;
30 ResourcePool(const ResourcePool&) = default;
23 31
24protected: 32protected:
25 size_t CommitResource(); 33 size_t CommitResource();
@@ -34,7 +42,7 @@ private:
34 /// Allocates a new page of resources. 42 /// Allocates a new page of resources.
35 void Grow(); 43 void Grow();
36 44
37 MasterSemaphore& master_semaphore; 45 MasterSemaphore* master_semaphore{};
38 size_t grow_step = 0; ///< Number of new resources created after an overflow 46 size_t grow_step = 0; ///< Number of new resources created after an overflow
39 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found 47 size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
40 std::vector<u64> ticks; ///< Ticks for each resource 48 std::vector<u64> ticks; ///< Ticks for each resource
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
31 command->~Command(); 31 command->~Command();
32 command = next; 32 command = next;
33 } 33 }
34 34 submit = false;
35 command_offset = 0; 35 command_offset = 0;
36 first = nullptr; 36 first = nullptr;
37 last = nullptr; 37 last = nullptr;
@@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
42 master_semaphore{std::make_unique<MasterSemaphore>(device)}, 42 master_semaphore{std::make_unique<MasterSemaphore>(device)},
43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { 43 command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
44 AcquireNewChunk(); 44 AcquireNewChunk();
45 AllocateNewContext(); 45 AllocateWorkerCommandBuffer();
46 worker_thread = std::thread(&VKScheduler::WorkerThread, this); 46 worker_thread = std::thread(&VKScheduler::WorkerThread, this);
47} 47}
48 48
49VKScheduler::~VKScheduler() { 49VKScheduler::~VKScheduler() {
50 quit = true; 50 {
51 cv.notify_all(); 51 std::lock_guard lock{work_mutex};
52 quit = true;
53 }
54 work_cv.notify_all();
52 worker_thread.join(); 55 worker_thread.join();
53} 56}
54 57
@@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
60void VKScheduler::Finish(VkSemaphore semaphore) { 63void VKScheduler::Finish(VkSemaphore semaphore) {
61 const u64 presubmit_tick = CurrentTick(); 64 const u64 presubmit_tick = CurrentTick();
62 SubmitExecution(semaphore); 65 SubmitExecution(semaphore);
66 WaitWorker();
63 Wait(presubmit_tick); 67 Wait(presubmit_tick);
64 AllocateNewContext(); 68 AllocateNewContext();
65} 69}
@@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() {
68 MICROPROFILE_SCOPE(Vulkan_WaitForWorker); 72 MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
69 DispatchWork(); 73 DispatchWork();
70 74
71 bool finished = false; 75 std::unique_lock lock{work_mutex};
72 do { 76 wait_cv.wait(lock, [this] { return work_queue.empty(); });
73 cv.notify_all();
74 std::unique_lock lock{mutex};
75 finished = chunk_queue.Empty();
76 } while (!finished);
77} 77}
78 78
79void VKScheduler::DispatchWork() { 79void VKScheduler::DispatchWork() {
80 if (chunk->Empty()) { 80 if (chunk->Empty()) {
81 return; 81 return;
82 } 82 }
83 chunk_queue.Push(std::move(chunk)); 83 {
84 cv.notify_all(); 84 std::lock_guard lock{work_mutex};
85 work_queue.push(std::move(chunk));
86 }
87 work_cv.notify_one();
85 AcquireNewChunk(); 88 AcquireNewChunk();
86} 89}
87 90
@@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
124 EndRenderPass(); 127 EndRenderPass();
125} 128}
126 129
127void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { 130bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
128 if (state.graphics_pipeline == pipeline) { 131 if (state.graphics_pipeline == pipeline) {
129 return; 132 return false;
130 } 133 }
131 state.graphics_pipeline = pipeline; 134 state.graphics_pipeline = pipeline;
132 Record([pipeline](vk::CommandBuffer cmdbuf) { 135 return true;
133 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
134 });
135} 136}
136 137
137void VKScheduler::WorkerThread() { 138void VKScheduler::WorkerThread() {
138 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 139 Common::SetCurrentThreadName("yuzu:VulkanWorker");
139 std::unique_lock lock{mutex};
140 do { 140 do {
141 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); 141 if (work_queue.empty()) {
142 if (quit) { 142 wait_cv.notify_all();
143 continue; 143 }
144 std::unique_ptr<CommandChunk> work;
145 {
146 std::unique_lock lock{work_mutex};
147 work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
148 if (quit) {
149 continue;
150 }
151 work = std::move(work_queue.front());
152 work_queue.pop();
153 }
154 const bool has_submit = work->HasSubmit();
155 work->ExecuteAll(current_cmdbuf);
156 if (has_submit) {
157 AllocateWorkerCommandBuffer();
144 } 158 }
145 auto extracted_chunk = std::move(chunk_queue.Front()); 159 std::lock_guard reserve_lock{reserve_mutex};
146 chunk_queue.Pop(); 160 chunk_reserve.push_back(std::move(work));
147 extracted_chunk->ExecuteAll(current_cmdbuf);
148 chunk_reserve.Push(std::move(extracted_chunk));
149 } while (!quit); 161 } while (!quit);
150} 162}
151 163
164void VKScheduler::AllocateWorkerCommandBuffer() {
165 current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
166 current_cmdbuf.Begin({
167 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
168 .pNext = nullptr,
169 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
170 .pInheritanceInfo = nullptr,
171 });
172}
173
152void VKScheduler::SubmitExecution(VkSemaphore semaphore) { 174void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
153 EndPendingOperations(); 175 EndPendingOperations();
154 InvalidateState(); 176 InvalidateState();
155 WaitWorker();
156 177
157 std::unique_lock lock{mutex}; 178 const u64 signal_value = master_semaphore->NextTick();
179 Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
180 cmdbuf.End();
158 181
159 current_cmdbuf.End(); 182 const u32 num_signal_semaphores = semaphore ? 2U : 1U;
160 183
161 const VkSemaphore timeline_semaphore = master_semaphore->Handle(); 184 const u64 wait_value = signal_value - 1;
162 const u32 num_signal_semaphores = semaphore ? 2U : 1U; 185 const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
163 186
164 const u64 signal_value = master_semaphore->CurrentTick(); 187 const VkSemaphore timeline_semaphore = master_semaphore->Handle();
165 const u64 wait_value = signal_value - 1; 188 const std::array signal_values{signal_value, u64(0)};
166 const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; 189 const std::array signal_semaphores{timeline_semaphore, semaphore};
167 190
168 master_semaphore->NextTick(); 191 const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
169 192 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
170 const std::array signal_values{signal_value, u64(0)}; 193 .pNext = nullptr,
171 const std::array signal_semaphores{timeline_semaphore, semaphore}; 194 .waitSemaphoreValueCount = 1,
172 195 .pWaitSemaphoreValues = &wait_value,
173 const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ 196 .signalSemaphoreValueCount = num_signal_semaphores,
174 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, 197 .pSignalSemaphoreValues = signal_values.data(),
175 .pNext = nullptr, 198 };
176 .waitSemaphoreValueCount = 1, 199 const VkSubmitInfo submit_info{
177 .pWaitSemaphoreValues = &wait_value, 200 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
178 .signalSemaphoreValueCount = num_signal_semaphores, 201 .pNext = &timeline_si,
179 .pSignalSemaphoreValues = signal_values.data(), 202 .waitSemaphoreCount = 1,
180 }; 203 .pWaitSemaphores = &timeline_semaphore,
181 const VkSubmitInfo submit_info{ 204 .pWaitDstStageMask = &wait_stage_mask,
182 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 205 .commandBufferCount = 1,
183 .pNext = &timeline_si, 206 .pCommandBuffers = cmdbuf.address(),
184 .waitSemaphoreCount = 1, 207 .signalSemaphoreCount = num_signal_semaphores,
185 .pWaitSemaphores = &timeline_semaphore, 208 .pSignalSemaphores = signal_semaphores.data(),
186 .pWaitDstStageMask = &wait_stage_mask, 209 };
187 .commandBufferCount = 1, 210 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
188 .pCommandBuffers = current_cmdbuf.address(), 211 case VK_SUCCESS:
189 .signalSemaphoreCount = num_signal_semaphores, 212 break;
190 .pSignalSemaphores = signal_semaphores.data(), 213 case VK_ERROR_DEVICE_LOST:
191 }; 214 device.ReportLoss();
192 switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { 215 [[fallthrough]];
193 case VK_SUCCESS: 216 default:
194 break; 217 vk::Check(result);
195 case VK_ERROR_DEVICE_LOST: 218 }
196 device.ReportLoss(); 219 });
197 [[fallthrough]]; 220 chunk->MarkSubmit();
198 default: 221 DispatchWork();
199 vk::Check(result);
200 }
201} 222}
202 223
203void VKScheduler::AllocateNewContext() { 224void VKScheduler::AllocateNewContext() {
204 std::unique_lock lock{mutex};
205
206 current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
207 current_cmdbuf.Begin({
208 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
209 .pNext = nullptr,
210 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
211 .pInheritanceInfo = nullptr,
212 });
213
214 // Enable counters once again. These are disabled when a command buffer is finished. 225 // Enable counters once again. These are disabled when a command buffer is finished.
215 if (query_cache) { 226 if (query_cache) {
216 query_cache->UpdateCounters(); 227 query_cache->UpdateCounters();
@@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() {
265} 276}
266 277
267void VKScheduler::AcquireNewChunk() { 278void VKScheduler::AcquireNewChunk() {
268 if (chunk_reserve.Empty()) { 279 std::lock_guard lock{reserve_mutex};
280 if (chunk_reserve.empty()) {
269 chunk = std::make_unique<CommandChunk>(); 281 chunk = std::make_unique<CommandChunk>();
270 return; 282 return;
271 } 283 }
272 chunk = std::move(chunk_reserve.Front()); 284 chunk = std::move(chunk_reserve.back());
273 chunk_reserve.Pop(); 285 chunk_reserve.pop_back();
274} 286}
275 287
276} // namespace Vulkan 288} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 3ce48e9d2..cf39a2363 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -8,12 +8,12 @@
8#include <condition_variable> 8#include <condition_variable>
9#include <cstddef> 9#include <cstddef>
10#include <memory> 10#include <memory>
11#include <stack>
12#include <thread> 11#include <thread>
13#include <utility> 12#include <utility>
13#include <queue>
14
14#include "common/alignment.h" 15#include "common/alignment.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "common/threadsafe_queue.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h" 17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
19 19
@@ -22,6 +22,7 @@ namespace Vulkan {
22class CommandPool; 22class CommandPool;
23class Device; 23class Device;
24class Framebuffer; 24class Framebuffer;
25class GraphicsPipeline;
25class StateTracker; 26class StateTracker;
26class VKQueryCache; 27class VKQueryCache;
27 28
@@ -52,8 +53,8 @@ public:
52 /// of a renderpass. 53 /// of a renderpass.
53 void RequestOutsideRenderPassOperationContext(); 54 void RequestOutsideRenderPassOperationContext();
54 55
55 /// Binds a pipeline to the current execution context. 56 /// Update the pipeline to the current execution context.
56 void BindGraphicsPipeline(VkPipeline pipeline); 57 bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline);
57 58
58 /// Invalidates current command buffer state except for render passes 59 /// Invalidates current command buffer state except for render passes
59 void InvalidateState(); 60 void InvalidateState();
@@ -85,6 +86,10 @@ public:
85 86
86 /// Waits for the given tick to trigger on the GPU. 87 /// Waits for the given tick to trigger on the GPU.
87 void Wait(u64 tick) { 88 void Wait(u64 tick) {
89 if (tick >= master_semaphore->CurrentTick()) {
90 // Make sure we are not waiting for the current tick without signalling
91 Flush();
92 }
88 master_semaphore->Wait(tick); 93 master_semaphore->Wait(tick);
89 } 94 }
90 95
@@ -154,15 +159,24 @@ private:
154 return true; 159 return true;
155 } 160 }
156 161
162 void MarkSubmit() {
163 submit = true;
164 }
165
157 bool Empty() const { 166 bool Empty() const {
158 return command_offset == 0; 167 return command_offset == 0;
159 } 168 }
160 169
170 bool HasSubmit() const {
171 return submit;
172 }
173
161 private: 174 private:
162 Command* first = nullptr; 175 Command* first = nullptr;
163 Command* last = nullptr; 176 Command* last = nullptr;
164 177
165 size_t command_offset = 0; 178 size_t command_offset = 0;
179 bool submit = false;
166 alignas(std::max_align_t) std::array<u8, 0x8000> data{}; 180 alignas(std::max_align_t) std::array<u8, 0x8000> data{};
167 }; 181 };
168 182
@@ -170,11 +184,13 @@ private:
170 VkRenderPass renderpass = nullptr; 184 VkRenderPass renderpass = nullptr;
171 VkFramebuffer framebuffer = nullptr; 185 VkFramebuffer framebuffer = nullptr;
172 VkExtent2D render_area = {0, 0}; 186 VkExtent2D render_area = {0, 0};
173 VkPipeline graphics_pipeline = nullptr; 187 GraphicsPipeline* graphics_pipeline = nullptr;
174 }; 188 };
175 189
176 void WorkerThread(); 190 void WorkerThread();
177 191
192 void AllocateWorkerCommandBuffer();
193
178 void SubmitExecution(VkSemaphore semaphore); 194 void SubmitExecution(VkSemaphore semaphore);
179 195
180 void AllocateNewContext(); 196 void AllocateNewContext();
@@ -204,11 +220,13 @@ private:
204 std::array<VkImage, 9> renderpass_images{}; 220 std::array<VkImage, 9> renderpass_images{};
205 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; 221 std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{};
206 222
207 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; 223 std::queue<std::unique_ptr<CommandChunk>> work_queue;
208 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 224 std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
209 std::mutex mutex; 225 std::mutex reserve_mutex;
210 std::condition_variable cv; 226 std::mutex work_mutex;
211 bool quit = false; 227 std::condition_variable work_cv;
228 std::condition_variable wait_cv;
229 std::atomic_bool quit{};
212}; 230};
213 231
214} // namespace Vulkan 232} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
deleted file mode 100644
index c6846d886..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ /dev/null
@@ -1,3166 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <functional>
6#include <limits>
7#include <map>
8#include <optional>
9#include <type_traits>
10#include <unordered_map>
11#include <utility>
12
13#include <fmt/format.h>
14
15#include <sirit/sirit.h>
16
17#include "common/alignment.h"
18#include "common/assert.h"
19#include "common/common_types.h"
20#include "common/logging/log.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/engines/shader_bytecode.h"
23#include "video_core/engines/shader_header.h"
24#include "video_core/engines/shader_type.h"
25#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
26#include "video_core/shader/node.h"
27#include "video_core/shader/shader_ir.h"
28#include "video_core/shader/transform_feedback.h"
29#include "video_core/vulkan_common/vulkan_device.h"
30
31namespace Vulkan {
32
33namespace {
34
35using Sirit::Id;
36using Tegra::Engines::ShaderType;
37using Tegra::Shader::Attribute;
38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register;
40using namespace VideoCommon::Shader;
41
42using Maxwell = Tegra::Engines::Maxwell3D::Regs;
43using Operation = const OperationNode&;
44
45class ASTDecompiler;
46class ExprDecompiler;
47
48// TODO(Rodrigo): Use rasterizer's value
49constexpr u32 MaxConstBufferFloats = 0x4000;
50constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4;
51
52constexpr u32 NumInputPatches = 32; // This value seems to be the standard
53
54enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
55
56class Expression final {
57public:
58 Expression(Id id_, Type type_) : id{id_}, type{type_} {
59 ASSERT(type_ != Type::Void);
60 }
61 Expression() : type{Type::Void} {}
62
63 Id id{};
64 Type type{};
65};
66static_assert(std::is_standard_layout_v<Expression>);
67
68struct TexelBuffer {
69 Id image_type{};
70 Id image{};
71};
72
73struct SampledImage {
74 Id image_type{};
75 Id sampler_type{};
76 Id sampler_pointer_type{};
77 Id variable{};
78};
79
80struct StorageImage {
81 Id image_type{};
82 Id image{};
83};
84
85struct AttributeType {
86 Type type;
87 Id scalar;
88 Id vector;
89};
90
91struct VertexIndices {
92 std::optional<u32> position;
93 std::optional<u32> layer;
94 std::optional<u32> viewport;
95 std::optional<u32> point_size;
96 std::optional<u32> clip_distances;
97};
98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
105spv::Dim GetSamplerDim(const SamplerEntry& sampler) {
106 ASSERT(!sampler.is_buffer);
107 switch (sampler.type) {
108 case Tegra::Shader::TextureType::Texture1D:
109 return spv::Dim::Dim1D;
110 case Tegra::Shader::TextureType::Texture2D:
111 return spv::Dim::Dim2D;
112 case Tegra::Shader::TextureType::Texture3D:
113 return spv::Dim::Dim3D;
114 case Tegra::Shader::TextureType::TextureCube:
115 return spv::Dim::Cube;
116 default:
117 UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type);
118 return spv::Dim::Dim2D;
119 }
120}
121
122std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) {
123 switch (image.type) {
124 case Tegra::Shader::ImageType::Texture1D:
125 return {spv::Dim::Dim1D, false};
126 case Tegra::Shader::ImageType::TextureBuffer:
127 return {spv::Dim::Buffer, false};
128 case Tegra::Shader::ImageType::Texture1DArray:
129 return {spv::Dim::Dim1D, true};
130 case Tegra::Shader::ImageType::Texture2D:
131 return {spv::Dim::Dim2D, false};
132 case Tegra::Shader::ImageType::Texture2DArray:
133 return {spv::Dim::Dim2D, true};
134 case Tegra::Shader::ImageType::Texture3D:
135 return {spv::Dim::Dim3D, false};
136 default:
137 UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type);
138 return {spv::Dim::Dim2D, false};
139 }
140}
141
142/// Returns the number of vertices present in a primitive topology.
143u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) {
144 switch (primitive_topology) {
145 case Maxwell::PrimitiveTopology::Points:
146 return 1;
147 case Maxwell::PrimitiveTopology::Lines:
148 case Maxwell::PrimitiveTopology::LineLoop:
149 case Maxwell::PrimitiveTopology::LineStrip:
150 return 2;
151 case Maxwell::PrimitiveTopology::Triangles:
152 case Maxwell::PrimitiveTopology::TriangleStrip:
153 case Maxwell::PrimitiveTopology::TriangleFan:
154 return 3;
155 case Maxwell::PrimitiveTopology::LinesAdjacency:
156 case Maxwell::PrimitiveTopology::LineStripAdjacency:
157 return 4;
158 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
159 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
160 return 6;
161 case Maxwell::PrimitiveTopology::Quads:
162 UNIMPLEMENTED_MSG("Quads");
163 return 3;
164 case Maxwell::PrimitiveTopology::QuadStrip:
165 UNIMPLEMENTED_MSG("QuadStrip");
166 return 3;
167 case Maxwell::PrimitiveTopology::Polygon:
168 UNIMPLEMENTED_MSG("Polygon");
169 return 3;
170 case Maxwell::PrimitiveTopology::Patches:
171 UNIMPLEMENTED_MSG("Patches");
172 return 3;
173 default:
174 UNREACHABLE();
175 return 3;
176 }
177}
178
179spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) {
180 switch (primitive) {
181 case Maxwell::TessellationPrimitive::Isolines:
182 return spv::ExecutionMode::Isolines;
183 case Maxwell::TessellationPrimitive::Triangles:
184 return spv::ExecutionMode::Triangles;
185 case Maxwell::TessellationPrimitive::Quads:
186 return spv::ExecutionMode::Quads;
187 }
188 UNREACHABLE();
189 return spv::ExecutionMode::Triangles;
190}
191
192spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) {
193 switch (spacing) {
194 case Maxwell::TessellationSpacing::Equal:
195 return spv::ExecutionMode::SpacingEqual;
196 case Maxwell::TessellationSpacing::FractionalOdd:
197 return spv::ExecutionMode::SpacingFractionalOdd;
198 case Maxwell::TessellationSpacing::FractionalEven:
199 return spv::ExecutionMode::SpacingFractionalEven;
200 }
201 UNREACHABLE();
202 return spv::ExecutionMode::SpacingEqual;
203}
204
205spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) {
206 switch (input_topology) {
207 case Maxwell::PrimitiveTopology::Points:
208 return spv::ExecutionMode::InputPoints;
209 case Maxwell::PrimitiveTopology::Lines:
210 case Maxwell::PrimitiveTopology::LineLoop:
211 case Maxwell::PrimitiveTopology::LineStrip:
212 return spv::ExecutionMode::InputLines;
213 case Maxwell::PrimitiveTopology::Triangles:
214 case Maxwell::PrimitiveTopology::TriangleStrip:
215 case Maxwell::PrimitiveTopology::TriangleFan:
216 return spv::ExecutionMode::Triangles;
217 case Maxwell::PrimitiveTopology::LinesAdjacency:
218 case Maxwell::PrimitiveTopology::LineStripAdjacency:
219 return spv::ExecutionMode::InputLinesAdjacency;
220 case Maxwell::PrimitiveTopology::TrianglesAdjacency:
221 case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
222 return spv::ExecutionMode::InputTrianglesAdjacency;
223 case Maxwell::PrimitiveTopology::Quads:
224 UNIMPLEMENTED_MSG("Quads");
225 return spv::ExecutionMode::Triangles;
226 case Maxwell::PrimitiveTopology::QuadStrip:
227 UNIMPLEMENTED_MSG("QuadStrip");
228 return spv::ExecutionMode::Triangles;
229 case Maxwell::PrimitiveTopology::Polygon:
230 UNIMPLEMENTED_MSG("Polygon");
231 return spv::ExecutionMode::Triangles;
232 case Maxwell::PrimitiveTopology::Patches:
233 UNIMPLEMENTED_MSG("Patches");
234 return spv::ExecutionMode::Triangles;
235 }
236 UNREACHABLE();
237 return spv::ExecutionMode::Triangles;
238}
239
240spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) {
241 switch (output_topology) {
242 case Tegra::Shader::OutputTopology::PointList:
243 return spv::ExecutionMode::OutputPoints;
244 case Tegra::Shader::OutputTopology::LineStrip:
245 return spv::ExecutionMode::OutputLineStrip;
246 case Tegra::Shader::OutputTopology::TriangleStrip:
247 return spv::ExecutionMode::OutputTriangleStrip;
248 default:
249 UNREACHABLE();
250 return spv::ExecutionMode::OutputPoints;
251 }
252}
253
254/// Returns true if an attribute index is one of the 32 generic attributes
255constexpr bool IsGenericAttribute(Attribute::Index attribute) {
256 return attribute >= Attribute::Index::Attribute_0 &&
257 attribute <= Attribute::Index::Attribute_31;
258}
259
260/// Returns the location of a generic attribute
261u32 GetGenericAttributeLocation(Attribute::Index attribute) {
262 ASSERT(IsGenericAttribute(attribute));
263 return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0);
264}
265
266/// Returns true if an object has to be treated as precise
267bool IsPrecise(Operation operand) {
268 const auto& meta{operand.GetMeta()};
269 if (std::holds_alternative<MetaArithmetic>(meta)) {
270 return std::get<MetaArithmetic>(meta).precise;
271 }
272 return false;
273}
274
275class SPIRVDecompiler final : public Sirit::Module {
276public:
277 explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_,
278 const Registry& registry_, const Specialization& specialization_)
279 : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()},
280 registry{registry_}, specialization{specialization_} {
281 if (stage_ != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo());
283 }
284
285 AddCapability(spv::Capability::Shader);
286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
287 AddCapability(spv::Capability::ImageQuery);
288 AddCapability(spv::Capability::Image1D);
289 AddCapability(spv::Capability::ImageBuffer);
290 AddCapability(spv::Capability::ImageGatherExtended);
291 AddCapability(spv::Capability::SampledBuffer);
292 AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
293 AddCapability(spv::Capability::DrawParameters);
294 AddCapability(spv::Capability::SubgroupBallotKHR);
295 AddCapability(spv::Capability::SubgroupVoteKHR);
296 AddExtension("SPV_KHR_16bit_storage");
297 AddExtension("SPV_KHR_shader_ballot");
298 AddExtension("SPV_KHR_subgroup_vote");
299 AddExtension("SPV_KHR_storage_buffer_storage_class");
300 AddExtension("SPV_KHR_variable_pointers");
301 AddExtension("SPV_KHR_shader_draw_parameters");
302
303 if (!transform_feedback.empty()) {
304 if (device.IsExtTransformFeedbackSupported()) {
305 AddCapability(spv::Capability::TransformFeedback);
306 } else {
307 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
308 "supported on this device");
309 }
310 }
311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
312 if (ir.UsesViewportIndex()) {
313 AddCapability(spv::Capability::MultiViewport);
314 }
315 if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
316 AddExtension("SPV_EXT_shader_viewport_index_layer");
317 AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
318 }
319 }
320 if (device.IsFormatlessImageLoadSupported()) {
321 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
322 }
323 if (device.IsFloat16Supported()) {
324 AddCapability(spv::Capability::Float16);
325 }
326 t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half");
327 t_half = Name(TypeVector(t_scalar_half, 2), "half");
328
329 const Id main = Decompile();
330
331 switch (stage) {
332 case ShaderType::Vertex:
333 AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces);
334 break;
335 case ShaderType::TesselationControl:
336 AddCapability(spv::Capability::Tessellation);
337 AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces);
338 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
339 header.common2.threads_per_input_primitive);
340 break;
341 case ShaderType::TesselationEval: {
342 const auto& info = registry.GetGraphicsInfo();
343 AddCapability(spv::Capability::Tessellation);
344 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
345 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
346 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
347 AddExecutionMode(main, info.tessellation_clockwise
348 ? spv::ExecutionMode::VertexOrderCw
349 : spv::ExecutionMode::VertexOrderCcw);
350 break;
351 }
352 case ShaderType::Geometry: {
353 const auto& info = registry.GetGraphicsInfo();
354 AddCapability(spv::Capability::Geometry);
355 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
356 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
357 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
358 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
359 header.common4.max_output_vertices);
360 // TODO(Rodrigo): Where can we get this info from?
361 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
362 break;
363 }
364 case ShaderType::Fragment:
365 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
366 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
367 if (header.ps.omap.depth) {
368 AddExecutionMode(main, spv::ExecutionMode::DepthReplacing);
369 }
370 if (specialization.early_fragment_tests) {
371 AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests);
372 }
373 break;
374 case ShaderType::Compute:
375 const auto workgroup_size = specialization.workgroup_size;
376 AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
377 workgroup_size[1], workgroup_size[2]);
378 AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces);
379 break;
380 }
381 }
382
383private:
384 Id Decompile() {
385 DeclareCommon();
386 DeclareVertex();
387 DeclareTessControl();
388 DeclareTessEval();
389 DeclareGeometry();
390 DeclareFragment();
391 DeclareCompute();
392 DeclareRegisters();
393 DeclareCustomVariables();
394 DeclarePredicates();
395 DeclareLocalMemory();
396 DeclareSharedMemory();
397 DeclareInternalFlags();
398 DeclareInputAttributes();
399 DeclareOutputAttributes();
400
401 u32 binding = specialization.base_binding;
402 binding = DeclareConstantBuffers(binding);
403 binding = DeclareGlobalBuffers(binding);
404 binding = DeclareUniformTexels(binding);
405 binding = DeclareSamplers(binding);
406 binding = DeclareStorageTexels(binding);
407 binding = DeclareImages(binding);
408
409 const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
410 AddLabel();
411
412 if (ir.IsDecompiled()) {
413 DeclareFlowVariables();
414 DecompileAST();
415 } else {
416 AllocateLabels();
417 DecompileBranchMode();
418 }
419
420 OpReturn();
421 OpFunctionEnd();
422
423 return main;
424 }
425
426 void DefinePrologue() {
427 if (stage == ShaderType::Vertex) {
428 // Clear Position to avoid reading trash on the Z conversion.
429 const auto position_index = out_indices.position.value();
430 const Id position = AccessElement(t_out_float4, out_vertex, position_index);
431 OpStore(position, v_varying_default);
432
433 if (specialization.point_size) {
434 const u32 point_size_index = out_indices.point_size.value();
435 const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index);
436 OpStore(out_point_size, Constant(t_float, *specialization.point_size));
437 }
438 }
439 }
440
441 void DecompileAST();
442
443 void DecompileBranchMode() {
444 const u32 first_address = ir.GetBasicBlocks().begin()->first;
445 const Id loop_label = OpLabel("loop");
446 const Id merge_label = OpLabel("merge");
447 const Id dummy_label = OpLabel();
448 const Id jump_label = OpLabel();
449 continue_label = OpLabel("continue");
450
451 std::vector<Sirit::Literal> literals;
452 std::vector<Id> branch_labels;
453 for (const auto& [literal, label] : labels) {
454 literals.push_back(literal);
455 branch_labels.push_back(label);
456 }
457
458 jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint),
459 spv::StorageClass::Function, Constant(t_uint, first_address));
460 AddLocalVariable(jmp_to);
461
462 std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack();
463 std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack();
464
465 Name(jmp_to, "jmp_to");
466 Name(ssy_flow_stack, "ssy_flow_stack");
467 Name(ssy_flow_stack_top, "ssy_flow_stack_top");
468 Name(pbk_flow_stack, "pbk_flow_stack");
469 Name(pbk_flow_stack_top, "pbk_flow_stack_top");
470
471 DefinePrologue();
472
473 OpBranch(loop_label);
474 AddLabel(loop_label);
475 OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone);
476 OpBranch(dummy_label);
477
478 AddLabel(dummy_label);
479 const Id default_branch = OpLabel();
480 const Id jmp_to_load = OpLoad(t_uint, jmp_to);
481 OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone);
482 OpSwitch(jmp_to_load, default_branch, literals, branch_labels);
483
484 AddLabel(default_branch);
485 OpReturn();
486
487 for (const auto& [address, bb] : ir.GetBasicBlocks()) {
488 AddLabel(labels.at(address));
489
490 VisitBasicBlock(bb);
491
492 const auto next_it = labels.lower_bound(address + 1);
493 const Id next_label = next_it != labels.end() ? next_it->second : default_branch;
494 OpBranch(next_label);
495 }
496
497 AddLabel(jump_label);
498 OpBranch(continue_label);
499 AddLabel(continue_label);
500 OpBranch(loop_label);
501 AddLabel(merge_label);
502 }
503
504private:
505 friend class ASTDecompiler;
506 friend class ExprDecompiler;
507
508 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
509
510 void AllocateLabels() {
511 for (const auto& pair : ir.GetBasicBlocks()) {
512 const u32 address = pair.first;
513 labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address)));
514 }
515 }
516
517 void DeclareCommon() {
518 thread_id =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
520 thread_masks[0] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
522 thread_masks[1] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
524 thread_masks[2] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
526 thread_masks[3] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
528 thread_masks[4] =
529 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
530 }
531
532 void DeclareVertex() {
533 if (stage != ShaderType::Vertex) {
534 return;
535 }
536 Id out_vertex_struct;
537 std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
538 const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
539 out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output);
540 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
541
542 // Declare input attributes
543 vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index");
544 instance_index =
545 DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index");
546 base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex");
547 base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance");
548 }
549
550 void DeclareTessControl() {
551 if (stage != ShaderType::TesselationControl) {
552 return;
553 }
554 DeclareInputVertexArray(NumInputPatches);
555 DeclareOutputVertexArray(header.common2.threads_per_input_primitive);
556
557 tess_level_outer = DeclareBuiltIn(
558 spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output,
559 TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))),
560 "tess_level_outer");
561 Decorate(tess_level_outer, spv::Decoration::Patch);
562
563 tess_level_inner = DeclareBuiltIn(
564 spv::BuiltIn::TessLevelInner, spv::StorageClass::Output,
565 TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))),
566 "tess_level_inner");
567 Decorate(tess_level_inner, spv::Decoration::Patch);
568
569 invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id");
570 }
571
572 void DeclareTessEval() {
573 if (stage != ShaderType::TesselationEval) {
574 return;
575 }
576 DeclareInputVertexArray(NumInputPatches);
577 DeclareOutputVertex();
578
579 tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord");
580 }
581
582 void DeclareGeometry() {
583 if (stage != ShaderType::Geometry) {
584 return;
585 }
586 const auto& info = registry.GetGraphicsInfo();
587 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
588 DeclareInputVertexArray(num_input);
589 DeclareOutputVertex();
590 }
591
592 void DeclareFragment() {
593 if (stage != ShaderType::Fragment) {
594 return;
595 }
596
597 for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
598 if (!IsRenderTargetEnabled(rt)) {
599 continue;
600 }
601 const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
602 Name(id, fmt::format("frag_color{}", rt));
603 Decorate(id, spv::Decoration::Location, rt);
604
605 frag_colors[rt] = id;
606 interfaces.push_back(id);
607 }
608
609 if (header.ps.omap.depth) {
610 frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output));
611 Name(frag_depth, "frag_depth");
612 Decorate(frag_depth, spv::Decoration::BuiltIn,
613 static_cast<u32>(spv::BuiltIn::FragDepth));
614
615 interfaces.push_back(frag_depth);
616 }
617
618 frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord");
619 front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing");
620 point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord");
621 }
622
623 void DeclareCompute() {
624 if (stage != ShaderType::Compute) {
625 return;
626 }
627
628 workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id");
629 local_invocation_id =
630 DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id");
631 }
632
633 void DeclareRegisters() {
634 for (const u32 gpr : ir.GetRegisters()) {
635 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
636 Name(id, fmt::format("gpr_{}", gpr));
637 registers.emplace(gpr, AddGlobalVariable(id));
638 }
639 }
640
641 void DeclareCustomVariables() {
642 const u32 num_custom_variables = ir.GetNumCustomVariables();
643 for (u32 i = 0; i < num_custom_variables; ++i) {
644 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
645 Name(id, fmt::format("custom_var_{}", i));
646 custom_variables.emplace(i, AddGlobalVariable(id));
647 }
648 }
649
650 void DeclarePredicates() {
651 for (const auto pred : ir.GetPredicates()) {
652 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
653 Name(id, fmt::format("pred_{}", static_cast<u32>(pred)));
654 predicates.emplace(pred, AddGlobalVariable(id));
655 }
656 }
657
658 void DeclareFlowVariables() {
659 for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
660 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
661 Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
662 flow_variables.emplace(i, AddGlobalVariable(id));
663 }
664 }
665
666 void DeclareLocalMemory() {
667 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
668 // specialization time.
669 const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize();
670 if (lmem_size == 0) {
671 return;
672 }
673 const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4);
674 const Id type_array = TypeArray(t_float, Constant(t_uint, element_count));
675 const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array);
676 Name(type_pointer, "LocalMemory");
677
678 local_memory =
679 OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array));
680 AddGlobalVariable(Name(local_memory, "local_memory"));
681 }
682
683 void DeclareSharedMemory() {
684 if (stage != ShaderType::Compute) {
685 return;
686 }
687 t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint);
688
689 u32 smem_size = specialization.shared_memory_size * 4;
690 if (smem_size == 0) {
691 // Avoid declaring an empty array.
692 return;
693 }
694 const u32 limit = device.GetMaxComputeSharedMemorySize();
695 if (smem_size > limit) {
696 LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}",
697 smem_size, limit);
698 smem_size = limit;
699 }
700
701 const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4));
702 const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array);
703 Name(type_pointer, "SharedMemory");
704
705 shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup);
706 AddGlobalVariable(Name(shared_memory, "shared_memory"));
707 }
708
709 void DeclareInternalFlags() {
710 static constexpr std::array names{"zero", "sign", "carry", "overflow"};
711
712 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
713 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
714 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
715 }
716 }
717
718 void DeclareInputVertexArray(u32 length) {
719 constexpr auto storage = spv::StorageClass::Input;
720 std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length);
721 }
722
723 void DeclareOutputVertexArray(u32 length) {
724 constexpr auto storage = spv::StorageClass::Output;
725 std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length);
726 }
727
728 std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class,
729 std::string name, u32 length) {
730 const auto [struct_id, indices] = DeclareVertexStruct();
731 const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length));
732 const Id vertex_ptr = TypePointer(storage_class, vertex_array);
733 const Id vertex = OpVariable(vertex_ptr, storage_class);
734 AddGlobalVariable(Name(vertex, std::move(name)));
735 interfaces.push_back(vertex);
736 return {indices, vertex};
737 }
738
739 void DeclareOutputVertex() {
740 Id out_vertex_struct;
741 std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct();
742 const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct);
743 out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output);
744 interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex")));
745 }
746
747 void DeclareInputAttributes() {
748 for (const auto index : ir.GetInputAttributes()) {
749 if (!IsGenericAttribute(index)) {
750 continue;
751 }
752 const u32 location = GetGenericAttributeLocation(index);
753 if (!IsAttributeEnabled(location)) {
754 continue;
755 }
756 const auto type_descriptor = GetAttributeType(location);
757 Id type;
758 if (IsInputAttributeArray()) {
759 type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3);
760 type = TypeArray(type, Constant(t_uint, GetNumInputVertices()));
761 type = TypePointer(spv::StorageClass::Input, type);
762 } else {
763 type = type_descriptor.vector;
764 }
765 const Id id = OpVariable(type, spv::StorageClass::Input);
766 AddGlobalVariable(Name(id, fmt::format("in_attr{}", location)));
767 input_attributes.emplace(index, id);
768 interfaces.push_back(id);
769
770 Decorate(id, spv::Decoration::Location, location);
771
772 if (stage != ShaderType::Fragment) {
773 continue;
774 }
775 switch (header.ps.GetPixelImap(location)) {
776 case PixelImap::Constant:
777 Decorate(id, spv::Decoration::Flat);
778 break;
779 case PixelImap::Perspective:
780 // Default
781 break;
782 case PixelImap::ScreenLinear:
783 Decorate(id, spv::Decoration::NoPerspective);
784 break;
785 default:
786 UNREACHABLE_MSG("Unused attribute being fetched");
787 }
788 }
789 }
790
791 void DeclareOutputAttributes() {
792 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
793 return;
794 }
795
796 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
797 for (const auto index : ir.GetOutputAttributes()) {
798 if (!IsGenericAttribute(index)) {
799 continue;
800 }
801 DeclareOutputAttribute(index);
802 }
803 }
804
805 void DeclareOutputAttribute(Attribute::Index index) {
806 static constexpr std::string_view swizzle = "xyzw";
807
808 const u32 location = GetGenericAttributeLocation(index);
809 u8 element = 0;
810 while (element < 4) {
811 const std::size_t remainder = 4 - element;
812
813 std::size_t num_components = remainder;
814 const std::optional tfb = GetTransformFeedbackInfo(index, element);
815 if (tfb) {
816 num_components = tfb->components;
817 }
818
819 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
820 Id varying_default = v_varying_default;
821 if (IsOutputAttributeArray()) {
822 const u32 num = GetNumOutputVertices();
823 type = TypeArray(type, Constant(t_uint, num));
824 if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
825 // Intel's proprietary driver fails to setup defaults for arrayed output
826 // attributes.
827 varying_default = ConstantComposite(type, std::vector(num, varying_default));
828 }
829 }
830 type = TypePointer(spv::StorageClass::Output, type);
831
832 std::string name = fmt::format("out_attr{}", location);
833 if (num_components < 4 || element > 0) {
834 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
835 }
836
837 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
838 Name(AddGlobalVariable(id), name);
839
840 GenericVaryingDescription description;
841 description.id = id;
842 description.first_element = element;
843 description.is_scalar = num_components == 1;
844 for (u32 i = 0; i < num_components; ++i) {
845 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
846 output_attributes.emplace(offset, description);
847 }
848 interfaces.push_back(id);
849
850 Decorate(id, spv::Decoration::Location, location);
851 if (element > 0) {
852 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
853 }
854 if (tfb && device.IsExtTransformFeedbackSupported()) {
855 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
856 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
857 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
858 }
859
860 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
861 }
862 }
863
864 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
865 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
866 const auto it = transform_feedback.find(location);
867 if (it == transform_feedback.end()) {
868 return {};
869 }
870 return it->second;
871 }
872
873 u32 DeclareConstantBuffers(u32 binding) {
874 for (const auto& [index, size] : ir.GetConstantBuffers()) {
875 const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
876 : t_cbuf_std140_ubo;
877 const Id id = OpVariable(type, spv::StorageClass::Uniform);
878 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
879
880 Decorate(id, spv::Decoration::Binding, binding++);
881 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
882 constant_buffers.emplace(index, id);
883 }
884 return binding;
885 }
886
887 u32 DeclareGlobalBuffers(u32 binding) {
888 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
889 const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer);
890 AddGlobalVariable(
891 Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset)));
892
893 Decorate(id, spv::Decoration::Binding, binding++);
894 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
895 global_buffers.emplace(base, id);
896 }
897 return binding;
898 }
899
900 u32 DeclareUniformTexels(u32 binding) {
901 for (const auto& sampler : ir.GetSamplers()) {
902 if (!sampler.is_buffer) {
903 continue;
904 }
905 ASSERT(!sampler.is_array);
906 ASSERT(!sampler.is_shadow);
907
908 constexpr auto dim = spv::Dim::Buffer;
909 constexpr int depth = 0;
910 constexpr int arrayed = 0;
911 constexpr bool ms = false;
912 constexpr int sampled = 1;
913 constexpr auto format = spv::ImageFormat::Unknown;
914 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
915 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
916 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
917 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
918 Decorate(id, spv::Decoration::Binding, binding++);
919 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
920
921 uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
922 }
923 return binding;
924 }
925
926 u32 DeclareSamplers(u32 binding) {
927 for (const auto& sampler : ir.GetSamplers()) {
928 if (sampler.is_buffer) {
929 continue;
930 }
931 const auto dim = GetSamplerDim(sampler);
932 const int depth = sampler.is_shadow ? 1 : 0;
933 const int arrayed = sampler.is_array ? 1 : 0;
934 constexpr bool ms = false;
935 constexpr int sampled = 1;
936 constexpr auto format = spv::ImageFormat::Unknown;
937 const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
938 const Id sampler_type = TypeSampledImage(image_type);
939 const Id sampler_pointer_type =
940 TypePointer(spv::StorageClass::UniformConstant, sampler_type);
941 const Id type = sampler.is_indexed
942 ? TypeArray(sampler_type, Constant(t_uint, sampler.size))
943 : sampler_type;
944 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
945 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
946 AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index)));
947 Decorate(id, spv::Decoration::Binding, binding++);
948 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
949
950 sampled_images.emplace(
951 sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id});
952 }
953 return binding;
954 }
955
956 u32 DeclareStorageTexels(u32 binding) {
957 for (const auto& image : ir.GetImages()) {
958 if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
959 continue;
960 }
961 DeclareImage(image, binding);
962 }
963 return binding;
964 }
965
966 u32 DeclareImages(u32 binding) {
967 for (const auto& image : ir.GetImages()) {
968 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
969 continue;
970 }
971 DeclareImage(image, binding);
972 }
973 return binding;
974 }
975
976 void DeclareImage(const ImageEntry& image, u32& binding) {
977 const auto [dim, arrayed] = GetImageDim(image);
978 constexpr int depth = 0;
979 constexpr bool ms = false;
980 constexpr int sampled = 2; // This won't be accessed with a sampler
981 const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
982 const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
983 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
984 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
985 AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
986
987 Decorate(id, spv::Decoration::Binding, binding++);
988 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
989 if (image.is_read && !image.is_written) {
990 Decorate(id, spv::Decoration::NonWritable);
991 } else if (image.is_written && !image.is_read) {
992 Decorate(id, spv::Decoration::NonReadable);
993 }
994
995 images.emplace(image.index, StorageImage{image_type, id});
996 }
997
998 bool IsRenderTargetEnabled(u32 rt) const {
999 for (u32 component = 0; component < 4; ++component) {
1000 if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
1001 return true;
1002 }
1003 }
1004 return false;
1005 }
1006
1007 bool IsInputAttributeArray() const {
1008 return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
1009 stage == ShaderType::Geometry;
1010 }
1011
1012 bool IsOutputAttributeArray() const {
1013 return stage == ShaderType::TesselationControl;
1014 }
1015
1016 bool IsAttributeEnabled(u32 location) const {
1017 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
1018 }
1019
1020 u32 GetNumInputVertices() const {
1021 switch (stage) {
1022 case ShaderType::Geometry:
1023 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
1024 case ShaderType::TesselationControl:
1025 case ShaderType::TesselationEval:
1026 return NumInputPatches;
1027 default:
1028 UNREACHABLE();
1029 return 1;
1030 }
1031 }
1032
1033 u32 GetNumOutputVertices() const {
1034 switch (stage) {
1035 case ShaderType::TesselationControl:
1036 return header.common2.threads_per_input_primitive;
1037 default:
1038 UNREACHABLE();
1039 return 1;
1040 }
1041 }
1042
1043 std::tuple<Id, VertexIndices> DeclareVertexStruct() {
1044 struct BuiltIn {
1045 Id type;
1046 spv::BuiltIn builtin;
1047 const char* name;
1048 };
1049 std::vector<BuiltIn> members;
1050 members.reserve(4);
1051
1052 const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) {
1053 const auto index = static_cast<u32>(members.size());
1054 members.push_back(BuiltIn{type, builtin, name});
1055 return index;
1056 };
1057
1058 VertexIndices indices;
1059 indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
1060
1061 if (ir.UsesLayer()) {
1062 if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
1063 indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
1064 } else {
1065 LOG_ERROR(
1066 Render_Vulkan,
1067 "Shader requires Layer but it's not supported on this stage with this device.");
1068 }
1069 }
1070
1071 if (ir.UsesViewportIndex()) {
1072 if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
1073 indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
1074 } else {
1075 LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
1076 "this stage with this device.");
1077 }
1078 }
1079
1080 if (ir.UsesPointSize() || specialization.point_size) {
1081 indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size");
1082 }
1083
1084 const auto& ir_output_attributes = ir.GetOutputAttributes();
1085 const bool declare_clip_distances = std::any_of(
1086 ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) {
1087 return index == Attribute::Index::ClipDistances0123 ||
1088 index == Attribute::Index::ClipDistances4567;
1089 });
1090 if (declare_clip_distances) {
1091 indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)),
1092 spv::BuiltIn::ClipDistance, "clip_distances");
1093 }
1094
1095 std::vector<Id> member_types;
1096 member_types.reserve(members.size());
1097 for (std::size_t i = 0; i < members.size(); ++i) {
1098 member_types.push_back(members[i].type);
1099 }
1100 const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex");
1101 Decorate(per_vertex_struct, spv::Decoration::Block);
1102
1103 for (std::size_t index = 0; index < members.size(); ++index) {
1104 const auto& member = members[index];
1105 MemberName(per_vertex_struct, static_cast<u32>(index), member.name);
1106 MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn,
1107 static_cast<u32>(member.builtin));
1108 }
1109
1110 return {per_vertex_struct, indices};
1111 }
1112
1113 void VisitBasicBlock(const NodeBlock& bb) {
1114 for (const auto& node : bb) {
1115 Visit(node);
1116 }
1117 }
1118
1119 Expression Visit(const Node& node) {
1120 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1121 if (const auto amend_index = operation->GetAmendIndex()) {
1122 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1123 ASSERT(type == Type::Void);
1124 }
1125 const auto operation_index = static_cast<std::size_t>(operation->GetCode());
1126 const auto decompiler = operation_decompilers[operation_index];
1127 if (decompiler == nullptr) {
1128 UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index);
1129 }
1130 return (this->*decompiler)(*operation);
1131 }
1132
1133 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1134 const u32 index = gpr->GetIndex();
1135 if (index == Register::ZeroIndex) {
1136 return {v_float_zero, Type::Float};
1137 }
1138 return {OpLoad(t_float, registers.at(index)), Type::Float};
1139 }
1140
1141 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1142 const u32 index = cv->GetIndex();
1143 return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
1144 }
1145
1146 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1147 return {Constant(t_uint, immediate->GetValue()), Type::Uint};
1148 }
1149
1150 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1151 const auto value = [&]() -> Id {
1152 switch (const auto index = predicate->GetIndex(); index) {
1153 case Tegra::Shader::Pred::UnusedIndex:
1154 return v_true;
1155 case Tegra::Shader::Pred::NeverExecute:
1156 return v_false;
1157 default:
1158 return OpLoad(t_bool, predicates.at(index));
1159 }
1160 }();
1161 if (predicate->IsNegated()) {
1162 return {OpLogicalNot(t_bool, value), Type::Bool};
1163 }
1164 return {value, Type::Bool};
1165 }
1166
1167 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1168 const auto attribute = abuf->GetIndex();
1169 const u32 element = abuf->GetElement();
1170 const auto& buffer = abuf->GetBuffer();
1171
1172 const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
1173 std::vector<Id> members;
1174 members.reserve(std::size(indices) + 1);
1175
1176 if (buffer && IsInputAttributeArray()) {
1177 members.push_back(AsUint(Visit(buffer)));
1178 }
1179 for (const u32 index : indices) {
1180 members.push_back(Constant(t_uint, index));
1181 }
1182 return OpAccessChain(pointer_type, composite, members);
1183 };
1184
1185 switch (attribute) {
1186 case Attribute::Index::Position: {
1187 if (stage == ShaderType::Fragment) {
1188 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
1189 Type::Float};
1190 }
1191 const std::vector elements = {in_indices.position.value(), element};
1192 return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float};
1193 }
1194 case Attribute::Index::PointCoord: {
1195 switch (element) {
1196 case 0:
1197 case 1:
1198 return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element),
1199 Type::Float};
1200 }
1201 UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element);
1202 return {v_float_zero, Type::Float};
1203 }
1204 case Attribute::Index::TessCoordInstanceIDVertexID:
1205 // TODO(Subv): Find out what the values are for the first two elements when inside a
1206 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
1207 // shader.
1208 switch (element) {
1209 case 0:
1210 case 1:
1211 return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)),
1212 Type::Float};
1213 case 2:
1214 return {
1215 OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)),
1216 Type::Int};
1217 case 3:
1218 return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)),
1219 Type::Int};
1220 }
1221 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1222 return {Constant(t_uint, 0U), Type::Uint};
1223 case Attribute::Index::FrontFacing:
1224 // TODO(Subv): Find out what the values are for the other elements.
1225 ASSERT(stage == ShaderType::Fragment);
1226 if (element == 3) {
1227 const Id is_front_facing = OpLoad(t_bool, front_facing);
1228 const Id true_value = Constant(t_int, static_cast<s32>(-1));
1229 const Id false_value = Constant(t_int, 0);
1230 return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int};
1231 }
1232 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1233 return {v_float_zero, Type::Float};
1234 default:
1235 if (!IsGenericAttribute(attribute)) {
1236 break;
1237 }
1238 const u32 location = GetGenericAttributeLocation(attribute);
1239 if (!IsAttributeEnabled(location)) {
1240 // Disabled attributes (also known as constant attributes) always return zero.
1241 return {v_float_zero, Type::Float};
1242 }
1243 const auto type_descriptor = GetAttributeType(location);
1244 const Type type = type_descriptor.type;
1245 const Id attribute_id = input_attributes.at(attribute);
1246 const std::vector elements = {element};
1247 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1248 return {OpLoad(GetTypeDefinition(type), pointer), type};
1249 }
1250 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute);
1251 return {v_float_zero, Type::Float};
1252 }
1253
1254 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1255 const Node& offset = cbuf->GetOffset();
1256 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
1257
1258 Id pointer{};
1259 if (device.IsKhrUniformBufferStandardLayoutSupported()) {
1260 const Id buffer_offset =
1261 OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U));
1262 pointer =
1263 OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset);
1264 } else {
1265 Id buffer_index{};
1266 Id buffer_element{};
1267 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1268 // Direct access
1269 const u32 offset_imm = immediate->GetValue();
1270 ASSERT(offset_imm % 4 == 0);
1271 buffer_index = Constant(t_uint, offset_imm / 16);
1272 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
1273 } else if (std::holds_alternative<OperationNode>(*offset)) {
1274 // Indirect access
1275 const Id offset_id = AsUint(Visit(offset));
1276 const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4));
1277 const Id final_offset =
1278 OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1));
1279 buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4));
1280 buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4));
1281 } else {
1282 UNREACHABLE_MSG("Unmanaged offset node type");
1283 }
1284 pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
1285 buffer_element);
1286 }
1287 return {OpLoad(t_float, pointer), Type::Float};
1288 }
1289
1290 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1291 return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
1292 }
1293
1294 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1295 Id address = AsUint(Visit(lmem->GetAddress()));
1296 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1297 const Id pointer = OpAccessChain(t_prv_float, local_memory, address);
1298 return {OpLoad(t_float, pointer), Type::Float};
1299 }
1300
1301 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1302 return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
1303 }
1304
1305 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1306 const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag()));
1307 return {OpLoad(t_bool, flag), Type::Bool};
1308 }
1309
1310 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1311 if (const auto amend_index = conditional->GetAmendIndex()) {
1312 [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type;
1313 ASSERT(type == Type::Void);
1314 }
1315 // It's invalid to call conditional on nested nodes, use an operation instead
1316 const Id true_label = OpLabel();
1317 const Id skip_label = OpLabel();
1318 const Id condition = AsBool(Visit(conditional->GetCondition()));
1319 OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone);
1320 OpBranchConditional(condition, true_label, skip_label);
1321 AddLabel(true_label);
1322
1323 conditional_branch_set = true;
1324 inside_branch = false;
1325 VisitBasicBlock(conditional->GetCode());
1326 conditional_branch_set = false;
1327 if (!inside_branch) {
1328 OpBranch(skip_label);
1329 } else {
1330 inside_branch = false;
1331 }
1332 AddLabel(skip_label);
1333 return {};
1334 }
1335
1336 if (const auto comment = std::get_if<CommentNode>(&*node)) {
1337 if (device.HasDebuggingToolAttached()) {
1338 // We should insert comments with OpString instead of using named variables
1339 Name(OpUndef(t_int), comment->GetText());
1340 }
1341 return {};
1342 }
1343
1344 UNREACHABLE();
1345 return {};
1346 }
1347
1348 template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type>
1349 Expression Unary(Operation operation) {
1350 const Id type_def = GetTypeDefinition(result_type);
1351 const Id op_a = As(Visit(operation[0]), type_a);
1352
1353 const Id value = (this->*func)(type_def, op_a);
1354 if (IsPrecise(operation)) {
1355 Decorate(value, spv::Decoration::NoContraction);
1356 }
1357 return {value, result_type};
1358 }
1359
1360 template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type,
1361 Type type_b = type_a>
1362 Expression Binary(Operation operation) {
1363 const Id type_def = GetTypeDefinition(result_type);
1364 const Id op_a = As(Visit(operation[0]), type_a);
1365 const Id op_b = As(Visit(operation[1]), type_b);
1366
1367 const Id value = (this->*func)(type_def, op_a, op_b);
1368 if (IsPrecise(operation)) {
1369 Decorate(value, spv::Decoration::NoContraction);
1370 }
1371 return {value, result_type};
1372 }
1373
1374 template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type,
1375 Type type_b = type_a, Type type_c = type_b>
1376 Expression Ternary(Operation operation) {
1377 const Id type_def = GetTypeDefinition(result_type);
1378 const Id op_a = As(Visit(operation[0]), type_a);
1379 const Id op_b = As(Visit(operation[1]), type_b);
1380 const Id op_c = As(Visit(operation[2]), type_c);
1381
1382 const Id value = (this->*func)(type_def, op_a, op_b, op_c);
1383 if (IsPrecise(operation)) {
1384 Decorate(value, spv::Decoration::NoContraction);
1385 }
1386 return {value, result_type};
1387 }
1388
1389 template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type,
1390 Type type_b = type_a, Type type_c = type_b, Type type_d = type_c>
1391 Expression Quaternary(Operation operation) {
1392 const Id type_def = GetTypeDefinition(result_type);
1393 const Id op_a = As(Visit(operation[0]), type_a);
1394 const Id op_b = As(Visit(operation[1]), type_b);
1395 const Id op_c = As(Visit(operation[2]), type_c);
1396 const Id op_d = As(Visit(operation[3]), type_d);
1397
1398 const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d);
1399 if (IsPrecise(operation)) {
1400 Decorate(value, spv::Decoration::NoContraction);
1401 }
1402 return {value, result_type};
1403 }
1404
1405 Expression Assign(Operation operation) {
1406 const Node& dest = operation[0];
1407 const Node& src = operation[1];
1408
1409 Expression target{};
1410 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1411 if (gpr->GetIndex() == Register::ZeroIndex) {
1412 // Writing to Register::ZeroIndex is a no op but we still have to visit its source
1413 // because it might have side effects.
1414 Visit(src);
1415 return {};
1416 }
1417 target = {registers.at(gpr->GetIndex()), Type::Float};
1418
1419 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1420 const auto& buffer = abuf->GetBuffer();
1421 const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) {
1422 std::vector<Id> members;
1423 members.reserve(std::size(indices) + 1);
1424
1425 if (buffer && IsOutputAttributeArray()) {
1426 members.push_back(AsUint(Visit(buffer)));
1427 }
1428 for (const u32 index : indices) {
1429 members.push_back(Constant(t_uint, index));
1430 }
1431 return OpAccessChain(pointer_type, composite, members);
1432 };
1433
1434 target = [&]() -> Expression {
1435 const u32 element = abuf->GetElement();
1436 switch (const auto attribute = abuf->GetIndex(); attribute) {
1437 case Attribute::Index::Position: {
1438 const u32 index = out_indices.position.value();
1439 return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float};
1440 }
1441 case Attribute::Index::LayerViewportPointSize:
1442 switch (element) {
1443 case 1: {
1444 if (!out_indices.layer) {
1445 return {};
1446 }
1447 const u32 index = out_indices.layer.value();
1448 return {AccessElement(t_out_int, out_vertex, index), Type::Int};
1449 }
1450 case 2: {
1451 if (!out_indices.viewport) {
1452 return {};
1453 }
1454 const u32 index = out_indices.viewport.value();
1455 return {AccessElement(t_out_int, out_vertex, index), Type::Int};
1456 }
1457 case 3: {
1458 const auto index = out_indices.point_size.value();
1459 return {AccessElement(t_out_float, out_vertex, index), Type::Float};
1460 }
1461 default:
1462 UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement());
1463 return {};
1464 }
1465 case Attribute::Index::ClipDistances0123: {
1466 const u32 index = out_indices.clip_distances.value();
1467 return {AccessElement(t_out_float, out_vertex, index, element), Type::Float};
1468 }
1469 case Attribute::Index::ClipDistances4567: {
1470 const u32 index = out_indices.clip_distances.value();
1471 return {AccessElement(t_out_float, out_vertex, index, element + 4),
1472 Type::Float};
1473 }
1474 default:
1475 if (IsGenericAttribute(attribute)) {
1476 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1477 const GenericVaryingDescription description = output_attributes.at(offset);
1478 const Id composite = description.id;
1479 std::vector<u32> indices;
1480 if (!description.is_scalar) {
1481 indices.push_back(element - description.first_element);
1482 }
1483 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1484 }
1485 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1486 static_cast<u32>(attribute));
1487 return {};
1488 }
1489 }();
1490
1491 } else if (const auto patch = std::get_if<PatchNode>(&*dest)) {
1492 target = [&]() -> Expression {
1493 const u32 offset = patch->GetOffset();
1494 switch (offset) {
1495 case 0:
1496 case 1:
1497 case 2:
1498 case 3:
1499 return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float};
1500 case 4:
1501 case 5:
1502 return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float};
1503 }
1504 UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset);
1505 return {};
1506 }();
1507
1508 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1509 Id address = AsUint(Visit(lmem->GetAddress()));
1510 address = OpUDiv(t_uint, address, Constant(t_uint, 4));
1511 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
1512
1513 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1514 target = {GetSharedMemoryPointer(*smem), Type::Uint};
1515
1516 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1517 target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
1518
1519 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1520 target = {custom_variables.at(cv->GetIndex()), Type::Float};
1521
1522 } else {
1523 UNIMPLEMENTED();
1524 }
1525
1526 if (!target.id) {
1527 // On failure we return a nullptr target.id, skip these stores.
1528 return {};
1529 }
1530
1531 OpStore(target.id, As(Visit(src), target.type));
1532 return {};
1533 }
1534
1535 template <u32 offset>
1536 Expression FCastHalf(Operation operation) {
1537 const Id value = AsHalfFloat(Visit(operation[0]));
1538 return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)),
1539 Type::Float};
1540 }
1541
1542 Expression FSwizzleAdd(Operation operation) {
1543 const Id minus = Constant(t_float, -1.0f);
1544 const Id plus = v_float_one;
1545 const Id zero = v_float_zero;
1546 const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero);
1547 const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus);
1548
1549 Id mask = OpLoad(t_uint, thread_id);
1550 mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
1551 mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1));
1552 mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask);
1553 mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3));
1554
1555 const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask);
1556 const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask);
1557
1558 const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a);
1559 const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b);
1560 return {OpFAdd(t_float, op_a, op_b), Type::Float};
1561 }
1562
1563 Expression HNegate(Operation operation) {
1564 const bool is_f16 = device.IsFloat16Supported();
1565 const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000);
1566 const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000);
1567 const auto GetNegate = [&](std::size_t index) {
1568 return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one);
1569 };
1570 const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2));
1571 return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat};
1572 }
1573
1574 Expression HClamp(Operation operation) {
1575 const auto Pack = [&](std::size_t index) {
1576 const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index])));
1577 return OpCompositeConstruct(t_half, scalar, scalar);
1578 };
1579 const Id value = AsHalfFloat(Visit(operation[0]));
1580 const Id min = Pack(1);
1581 const Id max = Pack(2);
1582
1583 const Id clamped = OpFClamp(t_half, value, min, max);
1584 if (IsPrecise(operation)) {
1585 Decorate(clamped, spv::Decoration::NoContraction);
1586 }
1587 return {clamped, Type::HalfFloat};
1588 }
1589
1590 Expression HCastFloat(Operation operation) {
1591 const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
1592 return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat};
1593 }
1594
1595 Expression HUnpack(Operation operation) {
1596 Expression operand = Visit(operation[0]);
1597 const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta());
1598 if (type == Tegra::Shader::HalfType::H0_H1) {
1599 return operand;
1600 }
1601 const auto value = [&] {
1602 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1603 case Tegra::Shader::HalfType::F32:
1604 return GetHalfScalarFromFloat(AsFloat(operand));
1605 case Tegra::Shader::HalfType::H0_H0:
1606 return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0);
1607 case Tegra::Shader::HalfType::H1_H1:
1608 return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1);
1609 default:
1610 UNREACHABLE();
1611 return ConstantNull(t_half);
1612 }
1613 }();
1614 return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat};
1615 }
1616
1617 Expression HMergeF32(Operation operation) {
1618 const Id value = AsHalfFloat(Visit(operation[0]));
1619 return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float};
1620 }
1621
1622 template <u32 offset>
1623 Expression HMergeHN(Operation operation) {
1624 const Id target = AsHalfFloat(Visit(operation[0]));
1625 const Id source = AsHalfFloat(Visit(operation[1]));
1626 const Id object = OpCompositeExtract(t_scalar_half, source, offset);
1627 return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat};
1628 }
1629
1630 Expression HPack2(Operation operation) {
1631 const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0])));
1632 const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1])));
1633 return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat};
1634 }
1635
1636 Expression LogicalAddCarry(Operation operation) {
1637 const Id op_a = AsUint(Visit(operation[0]));
1638 const Id op_b = AsUint(Visit(operation[1]));
1639
1640 const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
1641 const Id carry = OpCompositeExtract(t_uint, result, 1);
1642 return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
1643 }
1644
1645 Expression LogicalAssign(Operation operation) {
1646 const Node& dest = operation[0];
1647 const Node& src = operation[1];
1648
1649 Id target{};
1650 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1651 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1652
1653 const auto index = pred->GetIndex();
1654 switch (index) {
1655 case Tegra::Shader::Pred::NeverExecute:
1656 case Tegra::Shader::Pred::UnusedIndex:
1657 // Writing to these predicates is a no-op
1658 return {};
1659 }
1660 target = predicates.at(index);
1661
1662 } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
1663 target = internal_flags.at(static_cast<u32>(flag->GetFlag()));
1664 }
1665
1666 OpStore(target, AsBool(Visit(src)));
1667 return {};
1668 }
1669
1670 Expression LogicalFOrdered(Operation operation) {
1671 // Emulate SPIR-V's OpOrdered
1672 const Id op_a = AsFloat(Visit(operation[0]));
1673 const Id op_b = AsFloat(Visit(operation[1]));
1674 const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
1675 const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
1676 return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
1677 }
1678
1679 Expression LogicalFUnordered(Operation operation) {
1680 // Emulate SPIR-V's OpUnordered
1681 const Id op_a = AsFloat(Visit(operation[0]));
1682 const Id op_b = AsFloat(Visit(operation[1]));
1683 const Id is_nan_a = OpIsNan(t_bool, op_a);
1684 const Id is_nan_b = OpIsNan(t_bool, op_b);
1685 return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
1686 }
1687
1688 Id GetTextureSampler(Operation operation) {
1689 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1690 ASSERT(!meta.sampler.is_buffer);
1691
1692 const auto& entry = sampled_images.at(meta.sampler.index);
1693 Id sampler = entry.variable;
1694 if (meta.sampler.is_indexed) {
1695 const Id index = AsInt(Visit(meta.index));
1696 sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
1697 }
1698 return OpLoad(entry.sampler_type, sampler);
1699 }
1700
1701 Id GetTextureImage(Operation operation) {
1702 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1703 const u32 index = meta.sampler.index;
1704 if (meta.sampler.is_buffer) {
1705 const auto& entry = uniform_texels.at(index);
1706 return OpLoad(entry.image_type, entry.image);
1707 } else {
1708 const auto& entry = sampled_images.at(index);
1709 return OpImage(entry.image_type, GetTextureSampler(operation));
1710 }
1711 }
1712
1713 Id GetImage(Operation operation) {
1714 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1715 const auto entry = images.at(meta.image.index);
1716 return OpLoad(entry.image_type, entry.image);
1717 }
1718
1719 Id AssembleVector(const std::vector<Id>& coords, Type type) {
1720 const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1);
1721 return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords);
1722 }
1723
1724 Id GetCoordinates(Operation operation, Type type) {
1725 std::vector<Id> coords;
1726 for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) {
1727 coords.push_back(As(Visit(operation[i]), type));
1728 }
1729 if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) {
1730 // Add array coordinate for textures
1731 if (meta->sampler.is_array) {
1732 Id array = AsInt(Visit(meta->array));
1733 if (type == Type::Float) {
1734 array = OpConvertSToF(t_float, array);
1735 }
1736 coords.push_back(array);
1737 }
1738 }
1739 return AssembleVector(coords, type);
1740 }
1741
1742 Id GetOffsetCoordinates(Operation operation) {
1743 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1744 std::vector<Id> coords;
1745 coords.reserve(meta.aoffi.size());
1746 for (const auto& coord : meta.aoffi) {
1747 coords.push_back(AsInt(Visit(coord)));
1748 }
1749 return AssembleVector(coords, Type::Int);
1750 }
1751
1752 std::pair<Id, Id> GetDerivatives(Operation operation) {
1753 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1754 const auto& derivatives = meta.derivates;
1755 ASSERT(derivatives.size() % 2 == 0);
1756
1757 const std::size_t components = derivatives.size() / 2;
1758 std::vector<Id> dx, dy;
1759 dx.reserve(components);
1760 dy.reserve(components);
1761 for (std::size_t index = 0; index < components; ++index) {
1762 dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0))));
1763 dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1))));
1764 }
1765 return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)};
1766 }
1767
1768 Expression GetTextureElement(Operation operation, Id sample_value, Type type) {
1769 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1770 const auto type_def = GetTypeDefinition(type);
1771 return {OpCompositeExtract(type_def, sample_value, meta.element), type};
1772 }
1773
1774 Expression Texture(Operation operation) {
1775 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1776
1777 const bool can_implicit = stage == ShaderType::Fragment;
1778 const Id sampler = GetTextureSampler(operation);
1779 const Id coords = GetCoordinates(operation, Type::Float);
1780
1781 std::vector<Id> operands;
1782 spv::ImageOperandsMask mask{};
1783 if (meta.bias) {
1784 mask = mask | spv::ImageOperandsMask::Bias;
1785 operands.push_back(AsFloat(Visit(meta.bias)));
1786 }
1787
1788 if (!can_implicit) {
1789 mask = mask | spv::ImageOperandsMask::Lod;
1790 operands.push_back(v_float_zero);
1791 }
1792
1793 if (!meta.aoffi.empty()) {
1794 mask = mask | spv::ImageOperandsMask::Offset;
1795 operands.push_back(GetOffsetCoordinates(operation));
1796 }
1797
1798 if (meta.depth_compare) {
1799 // Depth sampling
1800 UNIMPLEMENTED_IF(meta.bias);
1801 const Id dref = AsFloat(Visit(meta.depth_compare));
1802 if (can_implicit) {
1803 return {
1804 OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands),
1805 Type::Float};
1806 } else {
1807 return {
1808 OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
1809 Type::Float};
1810 }
1811 }
1812
1813 Id texture;
1814 if (can_implicit) {
1815 texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands);
1816 } else {
1817 texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
1818 }
1819 return GetTextureElement(operation, texture, Type::Float);
1820 }
1821
1822 Expression TextureLod(Operation operation) {
1823 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1824
1825 const Id sampler = GetTextureSampler(operation);
1826 const Id coords = GetCoordinates(operation, Type::Float);
1827 const Id lod = AsFloat(Visit(meta.lod));
1828
1829 spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod;
1830 std::vector<Id> operands{lod};
1831
1832 if (!meta.aoffi.empty()) {
1833 mask = mask | spv::ImageOperandsMask::Offset;
1834 operands.push_back(GetOffsetCoordinates(operation));
1835 }
1836
1837 if (meta.sampler.is_shadow) {
1838 const Id dref = AsFloat(Visit(meta.depth_compare));
1839 return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands),
1840 Type::Float};
1841 }
1842 const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands);
1843 return GetTextureElement(operation, texture, Type::Float);
1844 }
1845
1846 Expression TextureGather(Operation operation) {
1847 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1848
1849 const Id coords = GetCoordinates(operation, Type::Float);
1850
1851 spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
1852 std::vector<Id> operands;
1853 Id texture{};
1854
1855 if (!meta.aoffi.empty()) {
1856 mask = mask | spv::ImageOperandsMask::Offset;
1857 operands.push_back(GetOffsetCoordinates(operation));
1858 }
1859
1860 if (meta.sampler.is_shadow) {
1861 texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords,
1862 AsFloat(Visit(meta.depth_compare)), mask, operands);
1863 } else {
1864 u32 component_value = 0;
1865 if (meta.component) {
1866 const auto component = std::get_if<ImmediateNode>(&*meta.component);
1867 ASSERT_MSG(component, "Component is not an immediate value");
1868 component_value = component->GetValue();
1869 }
1870 texture = OpImageGather(t_float4, GetTextureSampler(operation), coords,
1871 Constant(t_uint, component_value), mask, operands);
1872 }
1873 return GetTextureElement(operation, texture, Type::Float);
1874 }
1875
1876 Expression TextureQueryDimensions(Operation operation) {
1877 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1878 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1879 UNIMPLEMENTED_IF(meta.depth_compare);
1880
1881 const auto image_id = GetTextureImage(operation);
1882 if (meta.element == 3) {
1883 return {OpImageQueryLevels(t_int, image_id), Type::Int};
1884 }
1885
1886 const Id lod = AsUint(Visit(operation[0]));
1887 const std::size_t coords_count = [&meta] {
1888 switch (const auto type = meta.sampler.type) {
1889 case Tegra::Shader::TextureType::Texture1D:
1890 return 1;
1891 case Tegra::Shader::TextureType::Texture2D:
1892 case Tegra::Shader::TextureType::TextureCube:
1893 return 2;
1894 case Tegra::Shader::TextureType::Texture3D:
1895 return 3;
1896 default:
1897 UNREACHABLE_MSG("Invalid texture type={}", type);
1898 return 2;
1899 }
1900 }();
1901
1902 if (meta.element >= coords_count) {
1903 return {v_float_zero, Type::Float};
1904 }
1905
1906 const std::array<Id, 3> types = {t_int, t_int2, t_int3};
1907 const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod);
1908 const Id size = OpCompositeExtract(t_int, sizes, meta.element);
1909 return {size, Type::Int};
1910 }
1911
1912 Expression TextureQueryLod(Operation operation) {
1913 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1914 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1915 UNIMPLEMENTED_IF(meta.depth_compare);
1916
1917 if (meta.element >= 2) {
1918 UNREACHABLE_MSG("Invalid element");
1919 return {v_float_zero, Type::Float};
1920 }
1921 const auto sampler_id = GetTextureSampler(operation);
1922
1923 const Id multiplier = Constant(t_float, 256.0f);
1924 const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier);
1925
1926 const Id coords = GetCoordinates(operation, Type::Float);
1927 Id size = OpImageQueryLod(t_float2, sampler_id, coords);
1928 size = OpFMul(t_float2, size, multipliers);
1929 size = OpConvertFToS(t_int2, size);
1930 return GetTextureElement(operation, size, Type::Int);
1931 }
1932
1933 Expression TexelFetch(Operation operation) {
1934 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1935 UNIMPLEMENTED_IF(meta.depth_compare);
1936
1937 const Id image = GetTextureImage(operation);
1938 const Id coords = GetCoordinates(operation, Type::Int);
1939
1940 spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone;
1941 std::vector<Id> operands;
1942 Id fetch;
1943
1944 if (meta.lod && !meta.sampler.is_buffer) {
1945 mask = mask | spv::ImageOperandsMask::Lod;
1946 operands.push_back(AsInt(Visit(meta.lod)));
1947 }
1948
1949 if (!meta.aoffi.empty()) {
1950 mask = mask | spv::ImageOperandsMask::Offset;
1951 operands.push_back(GetOffsetCoordinates(operation));
1952 }
1953
1954 fetch = OpImageFetch(t_float4, image, coords, mask, operands);
1955 return GetTextureElement(operation, fetch, Type::Float);
1956 }
1957
1958 Expression TextureGradient(Operation operation) {
1959 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1960 UNIMPLEMENTED_IF(!meta.aoffi.empty());
1961
1962 const Id sampler = GetTextureSampler(operation);
1963 const Id coords = GetCoordinates(operation, Type::Float);
1964 const auto [dx, dy] = GetDerivatives(operation);
1965 const std::vector grad = {dx, dy};
1966
1967 static constexpr auto mask = spv::ImageOperandsMask::Grad;
1968 const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad);
1969 return GetTextureElement(operation, texture, Type::Float);
1970 }
1971
1972 Expression ImageLoad(Operation operation) {
1973 if (!device.IsFormatlessImageLoadSupported()) {
1974 return {v_float_zero, Type::Float};
1975 }
1976
1977 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1978
1979 const Id coords = GetCoordinates(operation, Type::Int);
1980 const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
1981
1982 return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
1983 }
1984
1985 Expression ImageStore(Operation operation) {
1986 const auto meta{std::get<MetaImage>(operation.GetMeta())};
1987 std::vector<Id> colors;
1988 for (const auto& value : meta.values) {
1989 colors.push_back(AsUint(Visit(value)));
1990 }
1991
1992 const Id coords = GetCoordinates(operation, Type::Int);
1993 const Id texel = OpCompositeConstruct(t_uint4, colors);
1994
1995 OpImageWrite(GetImage(operation), coords, texel, {});
1996 return {};
1997 }
1998
1999 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2000 Expression AtomicImage(Operation operation) {
2001 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
2002 ASSERT(meta.values.size() == 1);
2003
2004 const Id coordinate = GetCoordinates(operation, Type::Int);
2005 const Id image = images.at(meta.image.index).image;
2006 const Id sample = v_uint_zero;
2007 const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
2008
2009 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
2010 const Id semantics = v_uint_zero;
2011 const Id value = AsUint(Visit(meta.values[0]));
2012 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
2013 }
2014
2015 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2016 Expression Atomic(Operation operation) {
2017 Id pointer;
2018 if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
2019 pointer = GetSharedMemoryPointer(*smem);
2020 } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
2021 pointer = GetGlobalMemoryPointer(*gmem);
2022 } else {
2023 UNREACHABLE();
2024 return {v_float_zero, Type::Float};
2025 }
2026 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
2027 const Id semantics = v_uint_zero;
2028 const Id value = AsUint(Visit(operation[1]));
2029
2030 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
2031 }
2032
2033 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
2034 Expression Reduce(Operation operation) {
2035 Atomic<func>(operation);
2036 return {};
2037 }
2038
2039 Expression Branch(Operation operation) {
2040 const auto& target = std::get<ImmediateNode>(*operation[0]);
2041 OpStore(jmp_to, Constant(t_uint, target.GetValue()));
2042 OpBranch(continue_label);
2043 inside_branch = true;
2044 if (!conditional_branch_set) {
2045 AddLabel();
2046 }
2047 return {};
2048 }
2049
2050 Expression BranchIndirect(Operation operation) {
2051 const Id op_a = AsUint(Visit(operation[0]));
2052
2053 OpStore(jmp_to, op_a);
2054 OpBranch(continue_label);
2055 inside_branch = true;
2056 if (!conditional_branch_set) {
2057 AddLabel();
2058 }
2059 return {};
2060 }
2061
2062 Expression PushFlowStack(Operation operation) {
2063 const auto& target = std::get<ImmediateNode>(*operation[0]);
2064 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
2065 const Id current = OpLoad(t_uint, flow_stack_top);
2066 const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1));
2067 const Id access = OpAccessChain(t_func_uint, flow_stack, current);
2068
2069 OpStore(access, Constant(t_uint, target.GetValue()));
2070 OpStore(flow_stack_top, next);
2071 return {};
2072 }
2073
2074 Expression PopFlowStack(Operation operation) {
2075 const auto [flow_stack, flow_stack_top] = GetFlowStack(operation);
2076 const Id current = OpLoad(t_uint, flow_stack_top);
2077 const Id previous = OpISub(t_uint, current, Constant(t_uint, 1));
2078 const Id access = OpAccessChain(t_func_uint, flow_stack, previous);
2079 const Id target = OpLoad(t_uint, access);
2080
2081 OpStore(flow_stack_top, previous);
2082 OpStore(jmp_to, target);
2083 OpBranch(continue_label);
2084 inside_branch = true;
2085 if (!conditional_branch_set) {
2086 AddLabel();
2087 }
2088 return {};
2089 }
2090
2091 Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) {
2092 using Compare = Maxwell::ComparisonOp;
2093 switch (compare_op) {
2094 case Compare::NeverOld:
2095 return v_false; // Never let the test pass
2096 case Compare::LessOld:
2097 return OpFOrdLessThan(t_bool, operand_1, operand_2);
2098 case Compare::EqualOld:
2099 return OpFOrdEqual(t_bool, operand_1, operand_2);
2100 case Compare::LessEqualOld:
2101 return OpFOrdLessThanEqual(t_bool, operand_1, operand_2);
2102 case Compare::GreaterOld:
2103 return OpFOrdGreaterThan(t_bool, operand_1, operand_2);
2104 case Compare::NotEqualOld:
2105 return OpFOrdNotEqual(t_bool, operand_1, operand_2);
2106 case Compare::GreaterEqualOld:
2107 return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2);
2108 default:
2109 UNREACHABLE();
2110 return v_true;
2111 }
2112 }
2113
2114 void AlphaTest(Id pointer) {
2115 if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) {
2116 return;
2117 }
2118 const Id true_label = OpLabel();
2119 const Id discard_label = OpLabel();
2120 const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref);
2121 const Id alpha_value = OpLoad(t_float, pointer);
2122 const Id condition =
2123 MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference);
2124
2125 OpBranchConditional(condition, true_label, discard_label);
2126 AddLabel(discard_label);
2127 OpKill();
2128 AddLabel(true_label);
2129 }
2130
2131 void PreExit() {
2132 if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) {
2133 const u32 position_index = out_indices.position.value();
2134 const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U);
2135 const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U);
2136 Id depth = OpLoad(t_float, z_pointer);
2137 depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer));
2138 depth = OpFMul(t_float, depth, Constant(t_float, 0.5f));
2139 OpStore(z_pointer, depth);
2140 }
2141 if (stage == ShaderType::Fragment) {
2142 const auto SafeGetRegister = [this](u32 reg) {
2143 if (const auto it = registers.find(reg); it != registers.end()) {
2144 return OpLoad(t_float, it->second);
2145 }
2146 return v_float_zero;
2147 };
2148
2149 UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0,
2150 "Sample mask write is unimplemented");
2151
2152 // Write the color outputs using the data in the shader registers, disabled
2153 // rendertargets/components are skipped in the register assignment.
2154 u32 current_reg = 0;
2155 for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
2156 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
2157 for (u32 component = 0; component < 4; ++component) {
2158 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
2159 continue;
2160 }
2161 const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
2162 OpStore(pointer, SafeGetRegister(current_reg));
2163 if (rt == 0 && component == 3) {
2164 AlphaTest(pointer);
2165 }
2166 ++current_reg;
2167 }
2168 }
2169 if (header.ps.omap.depth) {
2170 // The depth output is always 2 registers after the last color output, and
2171 // current_reg already contains one past the last color register.
2172 OpStore(frag_depth, SafeGetRegister(current_reg + 1));
2173 }
2174 }
2175 }
2176
2177 Expression Exit(Operation operation) {
2178 PreExit();
2179 inside_branch = true;
2180 if (conditional_branch_set) {
2181 OpReturn();
2182 } else {
2183 const Id dummy = OpLabel();
2184 OpBranch(dummy);
2185 AddLabel(dummy);
2186 OpReturn();
2187 AddLabel();
2188 }
2189 return {};
2190 }
2191
2192 Expression Discard(Operation operation) {
2193 inside_branch = true;
2194 if (conditional_branch_set) {
2195 OpKill();
2196 } else {
2197 const Id dummy = OpLabel();
2198 OpBranch(dummy);
2199 AddLabel(dummy);
2200 OpKill();
2201 AddLabel();
2202 }
2203 return {};
2204 }
2205
2206 Expression EmitVertex(Operation) {
2207 OpEmitVertex();
2208 return {};
2209 }
2210
2211 Expression EndPrimitive(Operation operation) {
2212 OpEndPrimitive();
2213 return {};
2214 }
2215
2216 Expression InvocationId(Operation) {
2217 return {OpLoad(t_int, invocation_id), Type::Int};
2218 }
2219
2220 Expression YNegate(Operation) {
2221 LOG_WARNING(Render_Vulkan, "(STUBBED)");
2222 return {Constant(t_float, 1.0f), Type::Float};
2223 }
2224
2225 template <u32 element>
2226 Expression LocalInvocationId(Operation) {
2227 const Id id = OpLoad(t_uint3, local_invocation_id);
2228 return {OpCompositeExtract(t_uint, id, element), Type::Uint};
2229 }
2230
2231 template <u32 element>
2232 Expression WorkGroupId(Operation operation) {
2233 const Id id = OpLoad(t_uint3, workgroup_id);
2234 return {OpCompositeExtract(t_uint, id, element), Type::Uint};
2235 }
2236
2237 Expression BallotThread(Operation operation) {
2238 const Id predicate = AsBool(Visit(operation[0]));
2239 const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate);
2240
2241 if (!device.IsWarpSizePotentiallyBiggerThanGuest()) {
2242 // Guest-like devices can just return the first index.
2243 return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint};
2244 }
2245
2246 // The others will have to return what is local to the current thread.
2247 // For instance a device with a warp size of 64 will return the upper uint when the current
2248 // thread is 38.
2249 const Id tid = OpLoad(t_uint, thread_id);
2250 const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5));
2251 return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint};
2252 }
2253
2254 template <Id (Module::*func)(Id, Id)>
2255 Expression Vote(Operation operation) {
2256 // TODO(Rodrigo): Handle devices with different warp sizes
2257 const Id predicate = AsBool(Visit(operation[0]));
2258 return {(this->*func)(t_bool, predicate), Type::Bool};
2259 }
2260
2261 Expression ThreadId(Operation) {
2262 return {OpLoad(t_uint, thread_id), Type::Uint};
2263 }
2264
2265 template <std::size_t index>
2266 Expression ThreadMask(Operation) {
2267 // TODO(Rodrigo): Handle devices with different warp sizes
2268 const Id mask = thread_masks[index];
2269 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2270 }
2271
2272 Expression ShuffleIndexed(Operation operation) {
2273 const Id value = AsFloat(Visit(operation[0]));
2274 const Id index = AsUint(Visit(operation[1]));
2275 return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
2276 }
2277
2278 Expression Barrier(Operation) {
2279 if (!ir.IsDecompiled()) {
2280 LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
2281 return {};
2282 }
2283
2284 const auto scope = spv::Scope::Workgroup;
2285 const auto memory = spv::Scope::Workgroup;
2286 const auto semantics =
2287 spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
2288 OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
2289 Constant(t_uint, static_cast<u32>(memory)),
2290 Constant(t_uint, static_cast<u32>(semantics)));
2291 return {};
2292 }
2293
2294 template <spv::Scope scope>
2295 Expression MemoryBarrier(Operation) {
2296 const auto semantics =
2297 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2298 spv::MemorySemanticsMask::WorkgroupMemory |
2299 spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory;
2300
2301 OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)),
2302 Constant(t_uint, static_cast<u32>(semantics)));
2303 return {};
2304 }
2305
2306 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) {
2307 const Id id = OpVariable(type, storage);
2308 Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin));
2309 AddGlobalVariable(Name(id, std::move(name)));
2310 interfaces.push_back(id);
2311 return id;
2312 }
2313
2314 Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) {
2315 return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name));
2316 }
2317
2318 template <typename... Args>
2319 Id AccessElement(Id pointer_type, Id composite, Args... elements_) {
2320 std::vector<Id> members;
2321 auto elements = {elements_...};
2322 for (const auto element : elements) {
2323 members.push_back(Constant(t_uint, element));
2324 }
2325
2326 return OpAccessChain(pointer_type, composite, members);
2327 }
2328
2329 Id As(Expression expr, Type wanted_type) {
2330 switch (wanted_type) {
2331 case Type::Bool:
2332 return AsBool(expr);
2333 case Type::Bool2:
2334 return AsBool2(expr);
2335 case Type::Float:
2336 return AsFloat(expr);
2337 case Type::Int:
2338 return AsInt(expr);
2339 case Type::Uint:
2340 return AsUint(expr);
2341 case Type::HalfFloat:
2342 return AsHalfFloat(expr);
2343 default:
2344 UNREACHABLE();
2345 return expr.id;
2346 }
2347 }
2348
2349 Id AsBool(Expression expr) {
2350 ASSERT(expr.type == Type::Bool);
2351 return expr.id;
2352 }
2353
2354 Id AsBool2(Expression expr) {
2355 ASSERT(expr.type == Type::Bool2);
2356 return expr.id;
2357 }
2358
2359 Id AsFloat(Expression expr) {
2360 switch (expr.type) {
2361 case Type::Float:
2362 return expr.id;
2363 case Type::Int:
2364 case Type::Uint:
2365 return OpBitcast(t_float, expr.id);
2366 case Type::HalfFloat:
2367 if (device.IsFloat16Supported()) {
2368 return OpBitcast(t_float, expr.id);
2369 }
2370 return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id));
2371 default:
2372 UNREACHABLE();
2373 return expr.id;
2374 }
2375 }
2376
2377 Id AsInt(Expression expr) {
2378 switch (expr.type) {
2379 case Type::Int:
2380 return expr.id;
2381 case Type::Float:
2382 case Type::Uint:
2383 return OpBitcast(t_int, expr.id);
2384 case Type::HalfFloat:
2385 if (device.IsFloat16Supported()) {
2386 return OpBitcast(t_int, expr.id);
2387 }
2388 return OpPackHalf2x16(t_int, expr.id);
2389 default:
2390 UNREACHABLE();
2391 return expr.id;
2392 }
2393 }
2394
2395 Id AsUint(Expression expr) {
2396 switch (expr.type) {
2397 case Type::Uint:
2398 return expr.id;
2399 case Type::Float:
2400 case Type::Int:
2401 return OpBitcast(t_uint, expr.id);
2402 case Type::HalfFloat:
2403 if (device.IsFloat16Supported()) {
2404 return OpBitcast(t_uint, expr.id);
2405 }
2406 return OpPackHalf2x16(t_uint, expr.id);
2407 default:
2408 UNREACHABLE();
2409 return expr.id;
2410 }
2411 }
2412
2413 Id AsHalfFloat(Expression expr) {
2414 switch (expr.type) {
2415 case Type::HalfFloat:
2416 return expr.id;
2417 case Type::Float:
2418 case Type::Int:
2419 case Type::Uint:
2420 if (device.IsFloat16Supported()) {
2421 return OpBitcast(t_half, expr.id);
2422 }
2423 return OpUnpackHalf2x16(t_half, AsUint(expr));
2424 default:
2425 UNREACHABLE();
2426 return expr.id;
2427 }
2428 }
2429
2430 Id GetHalfScalarFromFloat(Id value) {
2431 if (device.IsFloat16Supported()) {
2432 return OpFConvert(t_scalar_half, value);
2433 }
2434 return value;
2435 }
2436
2437 Id GetFloatFromHalfScalar(Id value) {
2438 if (device.IsFloat16Supported()) {
2439 return OpFConvert(t_float, value);
2440 }
2441 return value;
2442 }
2443
2444 AttributeType GetAttributeType(u32 location) const {
2445 if (stage != ShaderType::Vertex) {
2446 return {Type::Float, t_in_float, t_in_float4};
2447 }
2448 switch (specialization.attribute_types.at(location)) {
2449 case Maxwell::VertexAttribute::Type::SignedNorm:
2450 case Maxwell::VertexAttribute::Type::UnsignedNorm:
2451 case Maxwell::VertexAttribute::Type::UnsignedScaled:
2452 case Maxwell::VertexAttribute::Type::SignedScaled:
2453 case Maxwell::VertexAttribute::Type::Float:
2454 return {Type::Float, t_in_float, t_in_float4};
2455 case Maxwell::VertexAttribute::Type::SignedInt:
2456 return {Type::Int, t_in_int, t_in_int4};
2457 case Maxwell::VertexAttribute::Type::UnsignedInt:
2458 return {Type::Uint, t_in_uint, t_in_uint4};
2459 default:
2460 UNREACHABLE();
2461 return {Type::Float, t_in_float, t_in_float4};
2462 }
2463 }
2464
2465 Id GetTypeDefinition(Type type) const {
2466 switch (type) {
2467 case Type::Bool:
2468 return t_bool;
2469 case Type::Bool2:
2470 return t_bool2;
2471 case Type::Float:
2472 return t_float;
2473 case Type::Int:
2474 return t_int;
2475 case Type::Uint:
2476 return t_uint;
2477 case Type::HalfFloat:
2478 return t_half;
2479 default:
2480 UNREACHABLE();
2481 return {};
2482 }
2483 }
2484
2485 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2486 switch (type) {
2487 case Type::Float:
2488 return {t_float, t_float2, t_float3, t_float4};
2489 case Type::Int:
2490 return {t_int, t_int2, t_int3, t_int4};
2491 case Type::Uint:
2492 return {t_uint, t_uint2, t_uint3, t_uint4};
2493 default:
2494 UNIMPLEMENTED();
2495 return {};
2496 }
2497 }
2498
2499 std::tuple<Id, Id> CreateFlowStack() {
2500 // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
2501 // that shaders will use 20 nested SSYs and PBKs.
2502 constexpr u32 FLOW_STACK_SIZE = 20;
2503 constexpr auto storage_class = spv::StorageClass::Function;
2504
2505 const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE));
2506 const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class,
2507 ConstantNull(flow_stack_type));
2508 const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0));
2509 AddLocalVariable(stack);
2510 AddLocalVariable(top);
2511 return std::tie(stack, top);
2512 }
2513
2514 std::pair<Id, Id> GetFlowStack(Operation operation) {
2515 const auto stack_class = std::get<MetaStackClass>(operation.GetMeta());
2516 switch (stack_class) {
2517 case MetaStackClass::Ssy:
2518 return {ssy_flow_stack, ssy_flow_stack_top};
2519 case MetaStackClass::Pbk:
2520 return {pbk_flow_stack, pbk_flow_stack_top};
2521 }
2522 UNREACHABLE();
2523 return {};
2524 }
2525
2526 Id GetGlobalMemoryPointer(const GmemNode& gmem) {
2527 const Id real = AsUint(Visit(gmem.GetRealAddress()));
2528 const Id base = AsUint(Visit(gmem.GetBaseAddress()));
2529 const Id diff = OpISub(t_uint, real, base);
2530 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
2531 const Id buffer = global_buffers.at(gmem.GetDescriptor());
2532 return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
2533 }
2534
2535 Id GetSharedMemoryPointer(const SmemNode& smem) {
2536 ASSERT(stage == ShaderType::Compute);
2537 Id address = AsUint(Visit(smem.GetAddress()));
2538 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
2539 return OpAccessChain(t_smem_uint, shared_memory, address);
2540 }
2541
2542 static constexpr std::array operation_decompilers = {
2543 &SPIRVDecompiler::Assign,
2544
2545 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
2546 Type::Float>,
2547
2548 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>,
2549 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>,
2550 &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>,
2551 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>,
2552 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
2553 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
2554 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
2555 &SPIRVDecompiler::FCastHalf<0>,
2556 &SPIRVDecompiler::FCastHalf<1>,
2557 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
2558 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
2559 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
2560 &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>,
2561 &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>,
2562 &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>,
2563 &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>,
2564 &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>,
2565 &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>,
2566 &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>,
2567 &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>,
2568 &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
2569 &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
2570 &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
2571 &SPIRVDecompiler::FSwizzleAdd,
2572
2573 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
2574 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
2575 &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>,
2576 &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>,
2577 &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>,
2578 &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>,
2579 &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>,
2580
2581 &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>,
2582 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>,
2583 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>,
2584 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>,
2585 &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>,
2586 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>,
2587 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>,
2588 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>,
2589 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>,
2590 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>,
2591 &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>,
2592 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>,
2593 &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>,
2594
2595 &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>,
2596 &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>,
2597 &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>,
2598 &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>,
2599 &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>,
2600 &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>,
2601 &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>,
2602 &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>,
2603 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
2604 &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>,
2605 &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>,
2606 &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>,
2607 &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>,
2608 &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>,
2609 &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>,
2610 &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>,
2611 &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>,
2612 &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>,
2613
2614 &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>,
2615 &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>,
2616 &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>,
2617 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
2618 &SPIRVDecompiler::HNegate,
2619 &SPIRVDecompiler::HClamp,
2620 &SPIRVDecompiler::HCastFloat,
2621 &SPIRVDecompiler::HUnpack,
2622 &SPIRVDecompiler::HMergeF32,
2623 &SPIRVDecompiler::HMergeHN<0>,
2624 &SPIRVDecompiler::HMergeHN<1>,
2625 &SPIRVDecompiler::HPack2,
2626
2627 &SPIRVDecompiler::LogicalAssign,
2628 &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>,
2629 &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>,
2630 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
2631 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
2632 &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2,
2633 Type::Uint>,
2634 &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>,
2635
2636 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
2637 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
2638 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>,
2639 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
2640 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
2641 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
2642 &SPIRVDecompiler::LogicalFOrdered,
2643 &SPIRVDecompiler::LogicalFUnordered,
2644 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
2645 &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
2646 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
2647 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
2648 &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
2649 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
2650
2651 &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
2652 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
2653 &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>,
2654 &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>,
2655 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>,
2656 &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>,
2657
2658 &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>,
2659 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>,
2660 &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>,
2661 &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>,
2662 &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>,
2663 &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>,
2664
2665 &SPIRVDecompiler::LogicalAddCarry,
2666
2667 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
2668 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
2669 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
2670 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
2671 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
2672 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
2673 // TODO(Rodrigo): Should these use the OpFUnord* variants?
2674 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>,
2675 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>,
2676 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>,
2677 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>,
2678 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>,
2679 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>,
2680
2681 &SPIRVDecompiler::Texture,
2682 &SPIRVDecompiler::TextureLod,
2683 &SPIRVDecompiler::TextureGather,
2684 &SPIRVDecompiler::TextureQueryDimensions,
2685 &SPIRVDecompiler::TextureQueryLod,
2686 &SPIRVDecompiler::TexelFetch,
2687 &SPIRVDecompiler::TextureGradient,
2688
2689 &SPIRVDecompiler::ImageLoad,
2690 &SPIRVDecompiler::ImageStore,
2691 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
2692 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
2693 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
2694 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
2695 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
2696
2697 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2698 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
2699 &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
2700 &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
2701 &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
2702 &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
2703 &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
2704
2705 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2706 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
2707 &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
2708 &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
2709 &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
2710 &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
2711 &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
2712
2713 &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
2714 &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
2715 &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
2716 &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
2717 &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
2718 &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
2719
2720 &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
2721 &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
2722 &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
2723 &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
2724 &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
2725 &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
2726
2727 &SPIRVDecompiler::Branch,
2728 &SPIRVDecompiler::BranchIndirect,
2729 &SPIRVDecompiler::PushFlowStack,
2730 &SPIRVDecompiler::PopFlowStack,
2731 &SPIRVDecompiler::Exit,
2732 &SPIRVDecompiler::Discard,
2733
2734 &SPIRVDecompiler::EmitVertex,
2735 &SPIRVDecompiler::EndPrimitive,
2736
2737 &SPIRVDecompiler::InvocationId,
2738 &SPIRVDecompiler::YNegate,
2739 &SPIRVDecompiler::LocalInvocationId<0>,
2740 &SPIRVDecompiler::LocalInvocationId<1>,
2741 &SPIRVDecompiler::LocalInvocationId<2>,
2742 &SPIRVDecompiler::WorkGroupId<0>,
2743 &SPIRVDecompiler::WorkGroupId<1>,
2744 &SPIRVDecompiler::WorkGroupId<2>,
2745
2746 &SPIRVDecompiler::BallotThread,
2747 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>,
2748 &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>,
2749 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2750
2751 &SPIRVDecompiler::ThreadId,
2752 &SPIRVDecompiler::ThreadMask<0>, // Eq
2753 &SPIRVDecompiler::ThreadMask<1>, // Ge
2754 &SPIRVDecompiler::ThreadMask<2>, // Gt
2755 &SPIRVDecompiler::ThreadMask<3>, // Le
2756 &SPIRVDecompiler::ThreadMask<4>, // Lt
2757 &SPIRVDecompiler::ShuffleIndexed,
2758
2759 &SPIRVDecompiler::Barrier,
2760 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2761 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2762 };
2763 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2764
2765 const Device& device;
2766 const ShaderIR& ir;
2767 const ShaderType stage;
2768 const Tegra::Shader::Header header;
2769 const Registry& registry;
2770 const Specialization& specialization;
2771 std::unordered_map<u8, VaryingTFB> transform_feedback;
2772
2773 const Id t_void = Name(TypeVoid(), "void");
2774
2775 const Id t_bool = Name(TypeBool(), "bool");
2776 const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2");
2777
2778 const Id t_int = Name(TypeInt(32, true), "int");
2779 const Id t_int2 = Name(TypeVector(t_int, 2), "int2");
2780 const Id t_int3 = Name(TypeVector(t_int, 3), "int3");
2781 const Id t_int4 = Name(TypeVector(t_int, 4), "int4");
2782
2783 const Id t_uint = Name(TypeInt(32, false), "uint");
2784 const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2");
2785 const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3");
2786 const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4");
2787
2788 const Id t_float = Name(TypeFloat(32), "float");
2789 const Id t_float2 = Name(TypeVector(t_float, 2), "float2");
2790 const Id t_float3 = Name(TypeVector(t_float, 3), "float3");
2791 const Id t_float4 = Name(TypeVector(t_float, 4), "float4");
2792
2793 const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool");
2794 const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float");
2795
2796 const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint");
2797
2798 const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool");
2799 const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int");
2800 const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4");
2801 const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint");
2802 const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3");
2803 const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4");
2804 const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float");
2805 const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2");
2806 const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3");
2807 const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4");
2808
2809 const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int");
2810
2811 const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float");
2812 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
2813
2814 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
2815 const Id t_cbuf_std140 = Decorate(
2816 Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"),
2817 spv::Decoration::ArrayStride, 16U);
2818 const Id t_cbuf_scalar = Decorate(
2819 Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"),
2820 spv::Decoration::ArrayStride, 4U);
2821 const Id t_cbuf_std140_struct = MemberDecorate(
2822 Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2823 const Id t_cbuf_scalar_struct = MemberDecorate(
2824 Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2825 const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
2826 const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
2827
2828 Id t_smem_uint{};
2829
2830 const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
2831 const Id t_gmem_array =
2832 Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
2833 const Id t_gmem_struct = MemberDecorate(
2834 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2835 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
2836
2837 const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
2838
2839 const Id v_float_zero = Constant(t_float, 0.0f);
2840 const Id v_float_one = Constant(t_float, 1.0f);
2841 const Id v_uint_zero = Constant(t_uint, 0);
2842
2843 // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
2844 const Id v_varying_default =
2845 ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one);
2846
2847 const Id v_true = ConstantTrue(t_bool);
2848 const Id v_false = ConstantFalse(t_bool);
2849
2850 Id t_scalar_half{};
2851 Id t_half{};
2852
2853 Id out_vertex{};
2854 Id in_vertex{};
2855 std::map<u32, Id> registers;
2856 std::map<u32, Id> custom_variables;
2857 std::map<Tegra::Shader::Pred, Id> predicates;
2858 std::map<u32, Id> flow_variables;
2859 Id local_memory{};
2860 Id shared_memory{};
2861 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2862 std::map<Attribute::Index, Id> input_attributes;
2863 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2864 std::map<u32, Id> constant_buffers;
2865 std::map<GlobalMemoryBase, Id> global_buffers;
2866 std::map<u32, TexelBuffer> uniform_texels;
2867 std::map<u32, SampledImage> sampled_images;
2868 std::map<u32, StorageImage> images;
2869
2870 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
2871 Id instance_index{};
2872 Id vertex_index{};
2873 Id base_instance{};
2874 Id base_vertex{};
2875 Id frag_depth{};
2876 Id frag_coord{};
2877 Id front_facing{};
2878 Id point_coord{};
2879 Id tess_level_outer{};
2880 Id tess_level_inner{};
2881 Id tess_coord{};
2882 Id invocation_id{};
2883 Id workgroup_id{};
2884 Id local_invocation_id{};
2885 Id thread_id{};
2886 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2887
2888 VertexIndices in_indices;
2889 VertexIndices out_indices;
2890
2891 std::vector<Id> interfaces;
2892
2893 Id jmp_to{};
2894 Id ssy_flow_stack_top{};
2895 Id pbk_flow_stack_top{};
2896 Id ssy_flow_stack{};
2897 Id pbk_flow_stack{};
2898 Id continue_label{};
2899 std::map<u32, Id> labels;
2900
2901 bool conditional_branch_set{};
2902 bool inside_branch{};
2903};
2904
2905class ExprDecompiler {
2906public:
2907 explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2908
2909 Id operator()(const ExprAnd& expr) {
2910 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2911 const Id op1 = Visit(expr.operand1);
2912 const Id op2 = Visit(expr.operand2);
2913 return decomp.OpLogicalAnd(type_def, op1, op2);
2914 }
2915
2916 Id operator()(const ExprOr& expr) {
2917 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2918 const Id op1 = Visit(expr.operand1);
2919 const Id op2 = Visit(expr.operand2);
2920 return decomp.OpLogicalOr(type_def, op1, op2);
2921 }
2922
2923 Id operator()(const ExprNot& expr) {
2924 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
2925 const Id op1 = Visit(expr.operand1);
2926 return decomp.OpLogicalNot(type_def, op1);
2927 }
2928
2929 Id operator()(const ExprPredicate& expr) {
2930 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2931 return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred));
2932 }
2933
2934 Id operator()(const ExprCondCode& expr) {
2935 return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc)));
2936 }
2937
2938 Id operator()(const ExprVar& expr) {
2939 return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index));
2940 }
2941
2942 Id operator()(const ExprBoolean& expr) {
2943 return expr.value ? decomp.v_true : decomp.v_false;
2944 }
2945
2946 Id operator()(const ExprGprEqual& expr) {
2947 const Id target = decomp.Constant(decomp.t_uint, expr.value);
2948 Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr));
2949 gpr = decomp.OpBitcast(decomp.t_uint, gpr);
2950 return decomp.OpIEqual(decomp.t_bool, gpr, target);
2951 }
2952
2953 Id Visit(const Expr& node) {
2954 return std::visit(*this, *node);
2955 }
2956
2957private:
2958 SPIRVDecompiler& decomp;
2959};
2960
2961class ASTDecompiler {
2962public:
2963 explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {}
2964
2965 void operator()(const ASTProgram& ast) {
2966 ASTNode current = ast.nodes.GetFirst();
2967 while (current) {
2968 Visit(current);
2969 current = current->GetNext();
2970 }
2971 }
2972
2973 void operator()(const ASTIfThen& ast) {
2974 ExprDecompiler expr_parser{decomp};
2975 const Id condition = expr_parser.Visit(ast.condition);
2976 const Id then_label = decomp.OpLabel();
2977 const Id endif_label = decomp.OpLabel();
2978 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
2979 decomp.OpBranchConditional(condition, then_label, endif_label);
2980 decomp.AddLabel(then_label);
2981 ASTNode current = ast.nodes.GetFirst();
2982 while (current) {
2983 Visit(current);
2984 current = current->GetNext();
2985 }
2986 decomp.OpBranch(endif_label);
2987 decomp.AddLabel(endif_label);
2988 }
2989
2990 void operator()([[maybe_unused]] const ASTIfElse& ast) {
2991 UNREACHABLE();
2992 }
2993
2994 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2995 UNREACHABLE();
2996 }
2997
2998 void operator()(const ASTBlockDecoded& ast) {
2999 decomp.VisitBasicBlock(ast.nodes);
3000 }
3001
3002 void operator()(const ASTVarSet& ast) {
3003 ExprDecompiler expr_parser{decomp};
3004 const Id condition = expr_parser.Visit(ast.condition);
3005 decomp.OpStore(decomp.flow_variables.at(ast.index), condition);
3006 }
3007
3008 void operator()([[maybe_unused]] const ASTLabel& ast) {
3009 // Do nothing
3010 }
3011
3012 void operator()([[maybe_unused]] const ASTGoto& ast) {
3013 UNREACHABLE();
3014 }
3015
3016 void operator()(const ASTDoWhile& ast) {
3017 const Id loop_label = decomp.OpLabel();
3018 const Id endloop_label = decomp.OpLabel();
3019 const Id loop_start_block = decomp.OpLabel();
3020 const Id loop_continue_block = decomp.OpLabel();
3021 current_loop_exit = endloop_label;
3022 decomp.OpBranch(loop_label);
3023 decomp.AddLabel(loop_label);
3024 decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone);
3025 decomp.OpBranch(loop_start_block);
3026 decomp.AddLabel(loop_start_block);
3027 ASTNode current = ast.nodes.GetFirst();
3028 while (current) {
3029 Visit(current);
3030 current = current->GetNext();
3031 }
3032 decomp.OpBranch(loop_continue_block);
3033 decomp.AddLabel(loop_continue_block);
3034 ExprDecompiler expr_parser{decomp};
3035 const Id condition = expr_parser.Visit(ast.condition);
3036 decomp.OpBranchConditional(condition, loop_label, endloop_label);
3037 decomp.AddLabel(endloop_label);
3038 }
3039
3040 void operator()(const ASTReturn& ast) {
3041 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
3042 ExprDecompiler expr_parser{decomp};
3043 const Id condition = expr_parser.Visit(ast.condition);
3044 const Id then_label = decomp.OpLabel();
3045 const Id endif_label = decomp.OpLabel();
3046 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
3047 decomp.OpBranchConditional(condition, then_label, endif_label);
3048 decomp.AddLabel(then_label);
3049 if (ast.kills) {
3050 decomp.OpKill();
3051 } else {
3052 decomp.PreExit();
3053 decomp.OpReturn();
3054 }
3055 decomp.AddLabel(endif_label);
3056 } else {
3057 const Id next_block = decomp.OpLabel();
3058 decomp.OpBranch(next_block);
3059 decomp.AddLabel(next_block);
3060 if (ast.kills) {
3061 decomp.OpKill();
3062 } else {
3063 decomp.PreExit();
3064 decomp.OpReturn();
3065 }
3066 decomp.AddLabel(decomp.OpLabel());
3067 }
3068 }
3069
3070 void operator()(const ASTBreak& ast) {
3071 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
3072 ExprDecompiler expr_parser{decomp};
3073 const Id condition = expr_parser.Visit(ast.condition);
3074 const Id then_label = decomp.OpLabel();
3075 const Id endif_label = decomp.OpLabel();
3076 decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone);
3077 decomp.OpBranchConditional(condition, then_label, endif_label);
3078 decomp.AddLabel(then_label);
3079 decomp.OpBranch(current_loop_exit);
3080 decomp.AddLabel(endif_label);
3081 } else {
3082 const Id next_block = decomp.OpLabel();
3083 decomp.OpBranch(next_block);
3084 decomp.AddLabel(next_block);
3085 decomp.OpBranch(current_loop_exit);
3086 decomp.AddLabel(decomp.OpLabel());
3087 }
3088 }
3089
3090 void Visit(const ASTNode& node) {
3091 std::visit(*this, *node->GetInnerData());
3092 }
3093
3094private:
3095 SPIRVDecompiler& decomp;
3096 Id current_loop_exit{};
3097};
3098
3099void SPIRVDecompiler::DecompileAST() {
3100 const u32 num_flow_variables = ir.GetASTNumVariables();
3101 for (u32 i = 0; i < num_flow_variables; i++) {
3102 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
3103 Name(id, fmt::format("flow_var_{}", i));
3104 flow_variables.emplace(i, AddGlobalVariable(id));
3105 }
3106
3107 DefinePrologue();
3108
3109 const ASTNode program = ir.GetASTProgram();
3110 ASTDecompiler decompiler{*this};
3111 decompiler.Visit(program);
3112
3113 const Id next_block = OpLabel();
3114 OpBranch(next_block);
3115 AddLabel(next_block);
3116}
3117
3118} // Anonymous namespace
3119
3120ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3121 ShaderEntries entries;
3122 for (const auto& cbuf : ir.GetConstantBuffers()) {
3123 entries.const_buffers.emplace_back(cbuf.second, cbuf.first);
3124 }
3125 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
3126 entries.global_buffers.emplace_back(GlobalBufferEntry{
3127 .cbuf_index = base.cbuf_index,
3128 .cbuf_offset = base.cbuf_offset,
3129 .is_written = usage.is_written,
3130 });
3131 }
3132 for (const auto& sampler : ir.GetSamplers()) {
3133 if (sampler.is_buffer) {
3134 entries.uniform_texels.emplace_back(sampler);
3135 } else {
3136 entries.samplers.emplace_back(sampler);
3137 }
3138 }
3139 for (const auto& image : ir.GetImages()) {
3140 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
3141 entries.storage_texels.emplace_back(image);
3142 } else {
3143 entries.images.emplace_back(image);
3144 }
3145 }
3146 for (const auto& attribute : ir.GetInputAttributes()) {
3147 if (IsGenericAttribute(attribute)) {
3148 entries.attributes.insert(GetGenericAttributeLocation(attribute));
3149 }
3150 }
3151 for (const auto& buffer : entries.const_buffers) {
3152 entries.enabled_uniform_buffers |= 1U << buffer.GetIndex();
3153 }
3154 entries.clip_distances = ir.GetClipDistances();
3155 entries.shader_length = ir.GetLength();
3156 entries.uses_warps = ir.UsesWarps();
3157 return entries;
3158}
3159
3160std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
3161 ShaderType stage, const VideoCommon::Shader::Registry& registry,
3162 const Specialization& specialization) {
3163 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
3164}
3165
3166} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
deleted file mode 100644
index 5d94132a5..000000000
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <set>
9#include <vector>
10
11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/engines/shader_type.h"
14#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace Vulkan {
18
19class Device;
20
21using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22using UniformTexelEntry = VideoCommon::Shader::SamplerEntry;
23using SamplerEntry = VideoCommon::Shader::SamplerEntry;
24using StorageTexelEntry = VideoCommon::Shader::ImageEntry;
25using ImageEntry = VideoCommon::Shader::ImageEntry;
26
27constexpr u32 DESCRIPTOR_SET = 0;
28
29class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
30public:
31 explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_)
32 : ConstBuffer{entry_}, index{index_} {}
33
34 constexpr u32 GetIndex() const {
35 return index;
36 }
37
38private:
39 u32 index{};
40};
41
42struct GlobalBufferEntry {
43 u32 cbuf_index{};
44 u32 cbuf_offset{};
45 bool is_written{};
46};
47
48struct ShaderEntries {
49 u32 NumBindings() const {
50 return static_cast<u32>(const_buffers.size() + global_buffers.size() +
51 uniform_texels.size() + samplers.size() + storage_texels.size() +
52 images.size());
53 }
54
55 std::vector<ConstBufferEntry> const_buffers;
56 std::vector<GlobalBufferEntry> global_buffers;
57 std::vector<UniformTexelEntry> uniform_texels;
58 std::vector<SamplerEntry> samplers;
59 std::vector<StorageTexelEntry> storage_texels;
60 std::vector<ImageEntry> images;
61 std::set<u32> attributes;
62 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
63 std::size_t shader_length{};
64 u32 enabled_uniform_buffers{};
65 bool uses_warps{};
66};
67
68struct Specialization final {
69 u32 base_binding{};
70
71 // Compute specific
72 std::array<u32, 3> workgroup_size{};
73 u32 shared_memory_size{};
74
75 // Graphics specific
76 std::optional<float> point_size;
77 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
78 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
79 bool ndc_minus_one_to_one{};
80 bool early_fragment_tests{};
81 float alpha_test_ref{};
82 Maxwell::ComparisonOp alpha_test_func{};
83};
84// Old gcc versions don't consider this trivially copyable.
85// static_assert(std::is_trivially_copyable_v<Specialization>);
86
87struct SPIRVShader {
88 std::vector<u32> code;
89 ShaderEntries entries;
90};
91
92ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
93
94std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
95 Tegra::Engines::ShaderType stage,
96 const VideoCommon::Shader::Registry& registry,
97 const Specialization& specialization);
98
99} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0412b5234..555b12ed7 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
91 .flags = 0, 91 .flags = 0,
92 .size = STREAM_BUFFER_SIZE, 92 .size = STREAM_BUFFER_SIZE,
93 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | 93 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
94 VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 94 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 95 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
96 .queueFamilyIndexCount = 0, 96 .queueFamilyIndexCount = 0,
97 .pQueueFamilyIndices = nullptr, 97 .pQueueFamilyIndices = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 956f86845..e3b7dd61c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags;
29 29
30Flags MakeInvalidationFlags() { 30Flags MakeInvalidationFlags() {
31 static constexpr int INVALIDATION_FLAGS[]{ 31 static constexpr int INVALIDATION_FLAGS[]{
32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, 32 Viewports, Scissors, DepthBias, BlendConstants, DepthBounds,
33 StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, 33 StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable,
34 DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, 34 DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable,
35 VertexBuffers, VertexInput,
35 }; 36 };
36 Flags flags{}; 37 Flags flags{};
37 for (const int flag : INVALIDATION_FLAGS) { 38 for (const int flag : INVALIDATION_FLAGS) {
@@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() {
40 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { 41 for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) {
41 flags[index] = true; 42 flags[index] = true;
42 } 43 }
44 for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) {
45 flags[index] = true;
46 }
47 for (int index = VertexBinding0; index <= VertexBinding31; ++index) {
48 flags[index] = true;
49 }
43 return flags; 50 return flags;
44} 51}
45 52
@@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) {
79 table[OFF(stencil_back_func_mask)] = StencilProperties; 86 table[OFF(stencil_back_func_mask)] = StencilProperties;
80} 87}
81 88
89void SetupDirtyLineWidth(Tables& tables) {
90 tables[0][OFF(line_width_smooth)] = LineWidth;
91 tables[0][OFF(line_width_aliased)] = LineWidth;
92}
93
82void SetupDirtyCullMode(Tables& tables) { 94void SetupDirtyCullMode(Tables& tables) {
83 auto& table = tables[0]; 95 auto& table = tables[0];
84 table[OFF(cull_face)] = CullMode; 96 table[OFF(cull_face)] = CullMode;
@@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) {
134 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); 146 FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending);
135} 147}
136 148
137void SetupDirtyInstanceDivisors(Tables& tables) { 149void SetupDirtyViewportSwizzles(Tables& tables) {
138 static constexpr size_t divisor_offset = 3; 150 static constexpr size_t swizzle_offset = 6;
139 for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { 151 for (size_t index = 0; index < Regs::NumViewports; ++index) {
140 tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; 152 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] =
141 tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = 153 ViewportSwizzles;
142 InstanceDivisors;
143 } 154 }
144} 155}
145 156
146void SetupDirtyVertexAttributes(Tables& tables) { 157void SetupDirtyVertexAttributes(Tables& tables) {
147 FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); 158 for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
159 const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
160 FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i);
161 }
162 FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput);
148} 163}
149 164
150void SetupDirtyViewportSwizzles(Tables& tables) { 165void SetupDirtyVertexBindings(Tables& tables) {
151 static constexpr size_t swizzle_offset = 6; 166 // Do NOT include stride here, it's implicit in VertexBuffer
152 for (size_t index = 0; index < Regs::NumViewports; ++index) { 167 static constexpr size_t divisor_offset = 3;
153 tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = 168 for (size_t i = 0; i < Regs::NumVertexArrays; ++i) {
154 ViewportSwizzles; 169 const u8 flag = static_cast<u8>(VertexBinding0 + i);
170 tables[0][OFF(instanced_arrays) + i] = VertexInput;
171 tables[1][OFF(instanced_arrays) + i] = flag;
172 tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput;
173 tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag;
155 } 174 }
156} 175}
157} // Anonymous namespace 176} // Anonymous namespace
158 177
159StateTracker::StateTracker(Tegra::GPU& gpu) 178StateTracker::StateTracker(Tegra::GPU& gpu)
160 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { 179 : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
161 auto& tables = gpu.Maxwell3D().dirty.tables; 180 auto& tables{gpu.Maxwell3D().dirty.tables};
162 SetupDirtyFlags(tables); 181 SetupDirtyFlags(tables);
163 SetupDirtyViewports(tables); 182 SetupDirtyViewports(tables);
164 SetupDirtyScissors(tables); 183 SetupDirtyScissors(tables);
@@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
166 SetupDirtyBlendConstants(tables); 185 SetupDirtyBlendConstants(tables);
167 SetupDirtyDepthBounds(tables); 186 SetupDirtyDepthBounds(tables);
168 SetupDirtyStencilProperties(tables); 187 SetupDirtyStencilProperties(tables);
188 SetupDirtyLineWidth(tables);
169 SetupDirtyCullMode(tables); 189 SetupDirtyCullMode(tables);
170 SetupDirtyDepthBoundsEnable(tables); 190 SetupDirtyDepthBoundsEnable(tables);
171 SetupDirtyDepthTestEnable(tables); 191 SetupDirtyDepthTestEnable(tables);
@@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
175 SetupDirtyStencilOp(tables); 195 SetupDirtyStencilOp(tables);
176 SetupDirtyStencilTestEnable(tables); 196 SetupDirtyStencilTestEnable(tables);
177 SetupDirtyBlending(tables); 197 SetupDirtyBlending(tables);
178 SetupDirtyInstanceDivisors(tables);
179 SetupDirtyVertexAttributes(tables);
180 SetupDirtyViewportSwizzles(tables); 198 SetupDirtyViewportSwizzles(tables);
199 SetupDirtyVertexAttributes(tables);
200 SetupDirtyVertexBindings(tables);
181} 201}
182 202
183} // namespace Vulkan 203} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 84e918a71..5f78f6950 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -19,12 +19,19 @@ namespace Dirty {
19enum : u8 { 19enum : u8 {
20 First = VideoCommon::Dirty::LastCommonEntry, 20 First = VideoCommon::Dirty::LastCommonEntry,
21 21
22 VertexInput,
23 VertexAttribute0,
24 VertexAttribute31 = VertexAttribute0 + 31,
25 VertexBinding0,
26 VertexBinding31 = VertexBinding0 + 31,
27
22 Viewports, 28 Viewports,
23 Scissors, 29 Scissors,
24 DepthBias, 30 DepthBias,
25 BlendConstants, 31 BlendConstants,
26 DepthBounds, 32 DepthBounds,
27 StencilProperties, 33 StencilProperties,
34 LineWidth,
28 35
29 CullMode, 36 CullMode,
30 DepthBoundsEnable, 37 DepthBoundsEnable,
@@ -36,11 +43,9 @@ enum : u8 {
36 StencilTestEnable, 43 StencilTestEnable,
37 44
38 Blending, 45 Blending,
39 InstanceDivisors,
40 VertexAttributes,
41 ViewportSwizzles, 46 ViewportSwizzles,
42 47
43 Last 48 Last,
44}; 49};
45static_assert(Last <= std::numeric_limits<u8>::max()); 50static_assert(Last <= std::numeric_limits<u8>::max());
46 51
@@ -89,6 +94,10 @@ public:
89 return Exchange(Dirty::StencilProperties, false); 94 return Exchange(Dirty::StencilProperties, false);
90 } 95 }
91 96
97 bool TouchLineWidth() const {
98 return Exchange(Dirty::LineWidth, false);
99 }
100
92 bool TouchCullMode() { 101 bool TouchCullMode() {
93 return Exchange(Dirty::CullMode, false); 102 return Exchange(Dirty::CullMode, false);
94 } 103 }
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index dfd5c65ba..d990eefba 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul
65VKSwapchain::~VKSwapchain() = default; 65VKSwapchain::~VKSwapchain() = default;
66 66
67void VKSwapchain::Create(u32 width, u32 height, bool srgb) { 67void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
68 is_outdated = false;
69 is_suboptimal = false;
70
68 const auto physical_device = device.GetPhysical(); 71 const auto physical_device = device.GetPhysical();
69 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; 72 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
70 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { 73 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
@@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
82 resource_ticks.resize(image_count); 85 resource_ticks.resize(image_count);
83} 86}
84 87
85bool VKSwapchain::AcquireNextImage() { 88void VKSwapchain::AcquireNextImage() {
86 const VkResult result = 89 const VkResult result = device.GetLogical().AcquireNextImageKHR(
87 device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), 90 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
88 *present_semaphores[frame_index], {}, &image_index); 91 VK_NULL_HANDLE, &image_index);
89 92 switch (result) {
93 case VK_SUCCESS:
94 break;
95 case VK_SUBOPTIMAL_KHR:
96 is_suboptimal = true;
97 break;
98 case VK_ERROR_OUT_OF_DATE_KHR:
99 is_outdated = true;
100 break;
101 default:
102 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
103 break;
104 }
90 scheduler.Wait(resource_ticks[image_index]); 105 scheduler.Wait(resource_ticks[image_index]);
91 return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; 106 resource_ticks[image_index] = scheduler.CurrentTick();
92} 107}
93 108
94bool VKSwapchain::Present(VkSemaphore render_semaphore) { 109void VKSwapchain::Present(VkSemaphore render_semaphore) {
95 const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; 110 const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
96 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; 111 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
97 const auto present_queue{device.GetPresentQueue()}; 112 const auto present_queue{device.GetPresentQueue()};
98 bool recreated = false;
99
100 const VkPresentInfoKHR present_info{ 113 const VkPresentInfoKHR present_info{
101 .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, 114 .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
102 .pNext = nullptr, 115 .pNext = nullptr,
@@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
107 .pImageIndices = &image_index, 120 .pImageIndices = &image_index,
108 .pResults = nullptr, 121 .pResults = nullptr,
109 }; 122 };
110
111 switch (const VkResult result = present_queue.Present(present_info)) { 123 switch (const VkResult result = present_queue.Present(present_info)) {
112 case VK_SUCCESS: 124 case VK_SUCCESS:
113 break; 125 break;
@@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) {
115 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); 127 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
116 break; 128 break;
117 case VK_ERROR_OUT_OF_DATE_KHR: 129 case VK_ERROR_OUT_OF_DATE_KHR:
118 if (current_width > 0 && current_height > 0) { 130 is_outdated = true;
119 Create(current_width, current_height, current_srgb);
120 recreated = true;
121 }
122 break; 131 break;
123 default: 132 default:
124 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); 133 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
125 break; 134 break;
126 } 135 }
127 136 ++frame_index;
128 resource_ticks[image_index] = scheduler.CurrentTick(); 137 if (frame_index >= image_count) {
129 frame_index = (frame_index + 1) % static_cast<u32>(image_count); 138 frame_index = 0;
130 return recreated; 139 }
131}
132
133bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const {
134 // TODO(Rodrigo): Handle framebuffer pixel format changes
135 return framebuffer.width != current_width || framebuffer.height != current_height;
136} 140}
137 141
138void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, 142void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
@@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
148 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { 152 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
149 requested_image_count = capabilities.maxImageCount; 153 requested_image_count = capabilities.maxImageCount;
150 } 154 }
151
152 VkSwapchainCreateInfoKHR swapchain_ci{ 155 VkSwapchainCreateInfoKHR swapchain_ci{
153 .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, 156 .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
154 .pNext = nullptr, 157 .pNext = nullptr,
@@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
169 .clipped = VK_FALSE, 172 .clipped = VK_FALSE,
170 .oldSwapchain = nullptr, 173 .oldSwapchain = nullptr,
171 }; 174 };
172
173 const u32 graphics_family{device.GetGraphicsFamily()}; 175 const u32 graphics_family{device.GetGraphicsFamily()};
174 const u32 present_family{device.GetPresentFamily()}; 176 const u32 present_family{device.GetPresentFamily()};
175 const std::array<u32, 2> queue_indices{graphics_family, present_family}; 177 const std::array<u32, 2> queue_indices{graphics_family, present_family};
@@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
178 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); 180 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
179 swapchain_ci.pQueueFamilyIndices = queue_indices.data(); 181 swapchain_ci.pQueueFamilyIndices = queue_indices.data();
180 } 182 }
181
182 // Request the size again to reduce the possibility of a TOCTOU race condition. 183 // Request the size again to reduce the possibility of a TOCTOU race condition.
183 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); 184 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
184 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); 185 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
@@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
186 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); 187 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
187 188
188 extent = swapchain_ci.imageExtent; 189 extent = swapchain_ci.imageExtent;
189 current_width = extent.width;
190 current_height = extent.height;
191 current_srgb = srgb; 190 current_srgb = srgb;
192 191
193 images = swapchain.GetImages(); 192 images = swapchain.GetImages();
@@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities,
197 196
198void VKSwapchain::CreateSemaphores() { 197void VKSwapchain::CreateSemaphores() {
199 present_semaphores.resize(image_count); 198 present_semaphores.resize(image_count);
200 std::generate(present_semaphores.begin(), present_semaphores.end(), 199 std::ranges::generate(present_semaphores,
201 [this] { return device.GetLogical().CreateSemaphore(); }); 200 [this] { return device.GetLogical().CreateSemaphore(); });
202} 201}
203 202
204void VKSwapchain::CreateImageViews() { 203void VKSwapchain::CreateImageViews() {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index adc8d27cf..35c2cdc14 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -28,14 +28,25 @@ public:
28 void Create(u32 width, u32 height, bool srgb); 28 void Create(u32 width, u32 height, bool srgb);
29 29
30 /// Acquires the next image in the swapchain, waits as needed. 30 /// Acquires the next image in the swapchain, waits as needed.
31 bool AcquireNextImage(); 31 void AcquireNextImage();
32 32
33 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be 33 /// Presents the rendered image to the swapchain.
34 /// recreated. Takes responsability for the ownership of fence. 34 void Present(VkSemaphore render_semaphore);
35 bool Present(VkSemaphore render_semaphore);
36 35
37 /// Returns true when the framebuffer layout has changed. 36 /// Returns true when the color space has changed.
38 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; 37 bool HasColorSpaceChanged(bool is_srgb) const {
38 return current_srgb != is_srgb;
39 }
40
41 /// Returns true when the swapchain is outdated.
42 bool IsOutDated() const {
43 return is_outdated;
44 }
45
46 /// Returns true when the swapchain is suboptimal.
47 bool IsSubOptimal() const {
48 return is_suboptimal;
49 }
39 50
40 VkExtent2D GetSize() const { 51 VkExtent2D GetSize() const {
41 return extent; 52 return extent;
@@ -61,10 +72,6 @@ public:
61 return image_format; 72 return image_format;
62 } 73 }
63 74
64 bool GetSrgbState() const {
65 return current_srgb;
66 }
67
68private: 75private:
69 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, 76 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
70 bool srgb); 77 bool srgb);
@@ -92,9 +99,9 @@ private:
92 VkFormat image_format{}; 99 VkFormat image_format{};
93 VkExtent2D extent{}; 100 VkExtent2D extent{};
94 101
95 u32 current_width{};
96 u32 current_height{};
97 bool current_srgb{}; 102 bool current_srgb{};
103 bool is_outdated{};
104 bool is_suboptimal{};
98}; 105};
99 106
100} // namespace Vulkan 107} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 88ccf96f5..8e029bcb3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,6 +15,7 @@
15#include "video_core/renderer_vulkan/maxwell_to_vk.h" 15#include "video_core/renderer_vulkan/maxwell_to_vk.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 16#include "video_core/renderer_vulkan/vk_compute_pass.h"
17#include "video_core/renderer_vulkan/vk_rasterizer.h" 17#include "video_core/renderer_vulkan/vk_rasterizer.h"
18#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
18#include "video_core/renderer_vulkan/vk_scheduler.h" 19#include "video_core/renderer_vulkan/vk_scheduler.h"
19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 20#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
20#include "video_core/renderer_vulkan/vk_texture_cache.h" 21#include "video_core/renderer_vulkan/vk_texture_cache.h"
@@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange;
34using VideoCore::Surface::IsPixelFormatASTC; 35using VideoCore::Surface::IsPixelFormatASTC;
35 36
36namespace { 37namespace {
37
38constexpr std::array ATTACHMENT_REFERENCES{
39 VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL},
40 VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL},
41 VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL},
42 VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL},
43 VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL},
44 VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL},
45 VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL},
46 VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL},
47 VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL},
48};
49
50constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { 38constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
51 if (color == std::array<float, 4>{0, 0, 0, 0}) { 39 if (color == std::array<float, 4>{0, 0, 0, 0}) {
52 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; 40 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
@@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
174 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); 162 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
175} 163}
176 164
177[[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) {
178 if (info.type != ImageType::Buffer) {
179 return vk::Buffer{};
180 }
181 const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format);
182 return device.GetLogical().CreateBuffer(VkBufferCreateInfo{
183 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
184 .pNext = nullptr,
185 .flags = 0,
186 .size = info.size.width * bytes_per_block,
187 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
188 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT |
189 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
190 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
191 .queueFamilyIndexCount = 0,
192 .pQueueFamilyIndices = nullptr,
193 });
194}
195
196[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { 165[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
197 switch (VideoCore::Surface::GetFormatType(format)) { 166 switch (VideoCore::Surface::GetFormatType(format)) {
198 case VideoCore::Surface::SurfaceType::ColorTexture: 167 case VideoCore::Surface::SurfaceType::ColorTexture:
@@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
226 } 195 }
227} 196}
228 197
229[[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device,
230 const ImageView* image_view) {
231 using MaxwellToVK::SurfaceFormat;
232 const PixelFormat pixel_format = image_view->format;
233 return VkAttachmentDescription{
234 .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
235 .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format,
236 .samples = image_view->Samples(),
237 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
238 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
239 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
240 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
241 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
242 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
243 };
244}
245
246[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { 198[[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) {
247 switch (swizzle) { 199 switch (swizzle) {
248 case SwizzleSource::Zero: 200 case SwizzleSource::Zero:
@@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
263 return VK_COMPONENT_SWIZZLE_ZERO; 215 return VK_COMPONENT_SWIZZLE_ZERO;
264} 216}
265 217
218[[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) {
219 switch (type) {
220 case Shader::TextureType::Color1D:
221 return VK_IMAGE_VIEW_TYPE_1D;
222 case Shader::TextureType::Color2D:
223 return VK_IMAGE_VIEW_TYPE_2D;
224 case Shader::TextureType::ColorCube:
225 return VK_IMAGE_VIEW_TYPE_CUBE;
226 case Shader::TextureType::Color3D:
227 return VK_IMAGE_VIEW_TYPE_3D;
228 case Shader::TextureType::ColorArray1D:
229 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
230 case Shader::TextureType::ColorArray2D:
231 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
232 case Shader::TextureType::ColorArrayCube:
233 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
234 case Shader::TextureType::Buffer:
235 UNREACHABLE_MSG("Texture buffers can't be image views");
236 return VK_IMAGE_VIEW_TYPE_1D;
237 }
238 UNREACHABLE_MSG("Invalid image view type={}", type);
239 return VK_IMAGE_VIEW_TYPE_2D;
240}
241
266[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { 242[[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) {
267 switch (type) { 243 switch (type) {
268 case VideoCommon::ImageViewType::e1D: 244 case VideoCommon::ImageViewType::e1D:
@@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
280 case VideoCommon::ImageViewType::CubeArray: 256 case VideoCommon::ImageViewType::CubeArray:
281 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; 257 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
282 case VideoCommon::ImageViewType::Rect: 258 case VideoCommon::ImageViewType::Rect:
283 LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); 259 UNIMPLEMENTED_MSG("Rect image view");
284 return VK_IMAGE_VIEW_TYPE_2D; 260 return VK_IMAGE_VIEW_TYPE_2D;
285 case VideoCommon::ImageViewType::Buffer: 261 case VideoCommon::ImageViewType::Buffer:
286 UNREACHABLE_MSG("Texture buffers can't be image views"); 262 UNREACHABLE_MSG("Texture buffers can't be image views");
@@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
327 }; 303 };
328} 304}
329 305
330[[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 306[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies(
331 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 307 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
332 std::vector<VkBufferCopy> result(copies.size()); 308 std::vector<VkBufferCopy> result(copies.size());
333 std::ranges::transform( 309 std::ranges::transform(
@@ -587,6 +563,28 @@ struct RangedBarrierRange {
587 } 563 }
588}; 564};
589 565
566[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
567 switch (format) {
568 case Shader::ImageFormat::Typeless:
569 break;
570 case Shader::ImageFormat::R8_SINT:
571 return VK_FORMAT_R8_SINT;
572 case Shader::ImageFormat::R8_UINT:
573 return VK_FORMAT_R8_UINT;
574 case Shader::ImageFormat::R16_UINT:
575 return VK_FORMAT_R16_UINT;
576 case Shader::ImageFormat::R16_SINT:
577 return VK_FORMAT_R16_SINT;
578 case Shader::ImageFormat::R32_UINT:
579 return VK_FORMAT_R32_UINT;
580 case Shader::ImageFormat::R32G32_UINT:
581 return VK_FORMAT_R32G32_UINT;
582 case Shader::ImageFormat::R32G32B32A32_UINT:
583 return VK_FORMAT_R32G32B32A32_UINT;
584 }
585 UNREACHABLE_MSG("Invalid image format={}", format);
586 return VK_FORMAT_R32_UINT;
587}
590} // Anonymous namespace 588} // Anonymous namespace
591 589
592void TextureCacheRuntime::Finish() { 590void TextureCacheRuntime::Finish() {
@@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
625 return; 623 return;
626 } 624 }
627 } 625 }
628 ASSERT(src.ImageFormat() == dst.ImageFormat()); 626 ASSERT(src.format == dst.format);
629 ASSERT(!(is_dst_msaa && !is_src_msaa)); 627 ASSERT(!(is_dst_msaa && !is_src_msaa));
630 ASSERT(operation == Fermi2D::Operation::SrcCopy); 628 ASSERT(operation == Fermi2D::Operation::SrcCopy);
631 629
@@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
842Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, 840Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
843 VAddr cpu_addr_) 841 VAddr cpu_addr_)
844 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, 842 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
845 image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), 843 image(MakeImage(runtime.device, info)),
844 commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)),
846 aspect_mask(ImageAspectMask(info.format)) { 845 aspect_mask(ImageAspectMask(info.format)) {
847 if (image) {
848 commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
849 } else {
850 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
851 }
852 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { 846 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
853 if (Settings::values.accelerate_astc.GetValue()) { 847 if (Settings::values.accelerate_astc.GetValue()) {
854 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 848 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
@@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
857 } 851 }
858 } 852 }
859 if (runtime.device.HasDebuggingToolAttached()) { 853 if (runtime.device.HasDebuggingToolAttached()) {
860 if (image) { 854 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
861 image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
862 } else {
863 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
864 }
865 } 855 }
866 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ 856 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
867 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, 857 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
@@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag
913 }); 903 });
914} 904}
915 905
916void Image::UploadMemory(const StagingBufferRef& map,
917 std::span<const VideoCommon::BufferCopy> copies) {
918 // TODO: Move this to another API
919 scheduler->RequestOutsideRenderPassOperationContext();
920 std::vector vk_copies = TransformBufferCopies(copies, map.offset);
921 const VkBuffer src_buffer = map.buffer;
922 const VkBuffer dst_buffer = *buffer;
923 scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) {
924 // TODO: Barriers
925 cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies);
926 });
927}
928
929void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { 906void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
930 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); 907 std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask);
931 scheduler->RequestOutsideRenderPassOperationContext(); 908 scheduler->RequestOutsideRenderPassOperationContext();
@@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm
984ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, 961ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
985 ImageId image_id_, Image& image) 962 ImageId image_id_, Image& image)
986 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, 963 : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device},
987 image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( 964 image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} {
988 image.info.num_samples)} { 965 using Shader::TextureType;
966
989 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); 967 const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info);
990 std::array<SwizzleSource, 4> swizzle{ 968 std::array<SwizzleSource, 4> swizzle{
991 SwizzleSource::R, 969 SwizzleSource::R,
@@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1023 }, 1001 },
1024 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), 1002 .subresourceRange = MakeSubresourceRange(aspect_mask, info.range),
1025 }; 1003 };
1026 const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { 1004 const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) {
1027 VkImageViewCreateInfo ci{create_info}; 1005 VkImageViewCreateInfo ci{create_info};
1028 ci.viewType = ImageViewType(view_type); 1006 ci.viewType = ImageViewType(tex_type);
1029 if (num_layers) { 1007 if (num_layers) {
1030 ci.subresourceRange.layerCount = *num_layers; 1008 ci.subresourceRange.layerCount = *num_layers;
1031 } 1009 }
1032 vk::ImageView handle = device->GetLogical().CreateImageView(ci); 1010 vk::ImageView handle = device->GetLogical().CreateImageView(ci);
1033 if (device->HasDebuggingToolAttached()) { 1011 if (device->HasDebuggingToolAttached()) {
1034 handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); 1012 handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
1035 } 1013 }
1036 image_views[static_cast<size_t>(view_type)] = std::move(handle); 1014 image_views[static_cast<size_t>(tex_type)] = std::move(handle);
1037 }; 1015 };
1038 switch (info.type) { 1016 switch (info.type) {
1039 case VideoCommon::ImageViewType::e1D: 1017 case VideoCommon::ImageViewType::e1D:
1040 case VideoCommon::ImageViewType::e1DArray: 1018 case VideoCommon::ImageViewType::e1DArray:
1041 create(VideoCommon::ImageViewType::e1D, 1); 1019 create(TextureType::Color1D, 1);
1042 create(VideoCommon::ImageViewType::e1DArray, std::nullopt); 1020 create(TextureType::ColorArray1D, std::nullopt);
1043 render_target = Handle(VideoCommon::ImageViewType::e1DArray); 1021 render_target = Handle(TextureType::ColorArray1D);
1044 break; 1022 break;
1045 case VideoCommon::ImageViewType::e2D: 1023 case VideoCommon::ImageViewType::e2D:
1046 case VideoCommon::ImageViewType::e2DArray: 1024 case VideoCommon::ImageViewType::e2DArray:
1047 create(VideoCommon::ImageViewType::e2D, 1); 1025 create(TextureType::Color2D, 1);
1048 create(VideoCommon::ImageViewType::e2DArray, std::nullopt); 1026 create(TextureType::ColorArray2D, std::nullopt);
1049 render_target = Handle(VideoCommon::ImageViewType::e2DArray); 1027 render_target = Handle(Shader::TextureType::ColorArray2D);
1050 break; 1028 break;
1051 case VideoCommon::ImageViewType::e3D: 1029 case VideoCommon::ImageViewType::e3D:
1052 create(VideoCommon::ImageViewType::e3D, std::nullopt); 1030 create(TextureType::Color3D, std::nullopt);
1053 render_target = Handle(VideoCommon::ImageViewType::e3D); 1031 render_target = Handle(Shader::TextureType::Color3D);
1054 break; 1032 break;
1055 case VideoCommon::ImageViewType::Cube: 1033 case VideoCommon::ImageViewType::Cube:
1056 case VideoCommon::ImageViewType::CubeArray: 1034 case VideoCommon::ImageViewType::CubeArray:
1057 create(VideoCommon::ImageViewType::Cube, 6); 1035 create(TextureType::ColorCube, 6);
1058 create(VideoCommon::ImageViewType::CubeArray, std::nullopt); 1036 create(TextureType::ColorArrayCube, std::nullopt);
1059 break; 1037 break;
1060 case VideoCommon::ImageViewType::Rect: 1038 case VideoCommon::ImageViewType::Rect:
1061 UNIMPLEMENTED(); 1039 UNIMPLEMENTED();
1062 break; 1040 break;
1063 case VideoCommon::ImageViewType::Buffer: 1041 case VideoCommon::ImageViewType::Buffer:
1064 buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ 1042 UNREACHABLE();
1065 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1066 .pNext = nullptr,
1067 .flags = 0,
1068 .buffer = image.Buffer(),
1069 .format = format_info.format,
1070 .offset = 0, // TODO: Redesign buffer cache to support this
1071 .range = image.guest_size_bytes,
1072 });
1073 break; 1043 break;
1074 } 1044 }
1075} 1045}
1076 1046
1047ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1048 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
1049 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
1050 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
1051
1077ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) 1052ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params)
1078 : VideoCommon::ImageViewBase{params} {} 1053 : VideoCommon::ImageViewBase{params} {}
1079 1054
@@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() {
1081 if (depth_view) { 1056 if (depth_view) {
1082 return *depth_view; 1057 return *depth_view;
1083 } 1058 }
1084 depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); 1059 const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
1060 depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT);
1085 return *depth_view; 1061 return *depth_view;
1086} 1062}
1087 1063
@@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() {
1089 if (stencil_view) { 1065 if (stencil_view) {
1090 return *stencil_view; 1066 return *stencil_view;
1091 } 1067 }
1092 stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); 1068 const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
1069 stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT);
1093 return *stencil_view; 1070 return *stencil_view;
1094} 1071}
1095 1072
1096vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { 1073VkImageView ImageView::StorageView(Shader::TextureType texture_type,
1074 Shader::ImageFormat image_format) {
1075 if (image_format == Shader::ImageFormat::Typeless) {
1076 return Handle(texture_type);
1077 }
1078 const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
1079 image_format == Shader::ImageFormat::R16_SINT};
1080 if (!storage_views) {
1081 storage_views = std::make_unique<StorageViews>();
1082 }
1083 auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
1084 auto& view{views[static_cast<size_t>(texture_type)]};
1085 if (view) {
1086 return *view;
1087 }
1088 view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
1089 return *view;
1090}
1091
1092vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
1097 return device->GetLogical().CreateImageView({ 1093 return device->GetLogical().CreateImageView({
1098 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1094 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1099 .pNext = nullptr, 1095 .pNext = nullptr,
1100 .flags = 0, 1096 .flags = 0,
1101 .image = image_handle, 1097 .image = image_handle,
1102 .viewType = ImageViewType(type), 1098 .viewType = ImageViewType(type),
1103 .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, 1099 .format = vk_format,
1104 .components{ 1100 .components{
1105 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 1101 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
1106 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 1102 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
@@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
1164 1160
1165Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, 1161Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
1166 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { 1162 ImageView* depth_buffer, const VideoCommon::RenderTargets& key) {
1167 std::vector<VkAttachmentDescription> descriptions;
1168 std::vector<VkImageView> attachments; 1163 std::vector<VkImageView> attachments;
1169 RenderPassKey renderpass_key{}; 1164 RenderPassKey renderpass_key{};
1170 s32 num_layers = 1; 1165 s32 num_layers = 1;
@@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1175 renderpass_key.color_formats[index] = PixelFormat::Invalid; 1170 renderpass_key.color_formats[index] = PixelFormat::Invalid;
1176 continue; 1171 continue;
1177 } 1172 }
1178 descriptions.push_back(AttachmentDescription(runtime.device, color_buffer));
1179 attachments.push_back(color_buffer->RenderTarget()); 1173 attachments.push_back(color_buffer->RenderTarget());
1180 renderpass_key.color_formats[index] = color_buffer->format; 1174 renderpass_key.color_formats[index] = color_buffer->format;
1181 num_layers = std::max(num_layers, color_buffer->range.extent.layers); 1175 num_layers = std::max(num_layers, color_buffer->range.extent.layers);
@@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1185 ++num_images; 1179 ++num_images;
1186 } 1180 }
1187 const size_t num_colors = attachments.size(); 1181 const size_t num_colors = attachments.size();
1188 const VkAttachmentReference* depth_attachment =
1189 depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr;
1190 if (depth_buffer) { 1182 if (depth_buffer) {
1191 descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer));
1192 attachments.push_back(depth_buffer->RenderTarget()); 1183 attachments.push_back(depth_buffer->RenderTarget());
1193 renderpass_key.depth_format = depth_buffer->format; 1184 renderpass_key.depth_format = depth_buffer->format;
1194 num_layers = std::max(num_layers, depth_buffer->range.extent.layers); 1185 num_layers = std::max(num_layers, depth_buffer->range.extent.layers);
@@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1201 } 1192 }
1202 renderpass_key.samples = samples; 1193 renderpass_key.samples = samples;
1203 1194
1204 const auto& device = runtime.device.GetLogical(); 1195 renderpass = runtime.render_pass_cache.Get(renderpass_key);
1205 const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); 1196
1206 if (is_new) {
1207 const VkSubpassDescription subpass{
1208 .flags = 0,
1209 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
1210 .inputAttachmentCount = 0,
1211 .pInputAttachments = nullptr,
1212 .colorAttachmentCount = static_cast<u32>(num_colors),
1213 .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr,
1214 .pResolveAttachments = nullptr,
1215 .pDepthStencilAttachment = depth_attachment,
1216 .preserveAttachmentCount = 0,
1217 .pPreserveAttachments = nullptr,
1218 };
1219 cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{
1220 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
1221 .pNext = nullptr,
1222 .flags = 0,
1223 .attachmentCount = static_cast<u32>(descriptions.size()),
1224 .pAttachments = descriptions.data(),
1225 .subpassCount = 1,
1226 .pSubpasses = &subpass,
1227 .dependencyCount = 0,
1228 .pDependencies = nullptr,
1229 });
1230 }
1231 renderpass = *cache_pair->second;
1232 render_area = VkExtent2D{ 1197 render_area = VkExtent2D{
1233 .width = key.size.width, 1198 .width = key.size.width,
1234 .height = key.size.height, 1199 .height = key.size.height,
1235 }; 1200 };
1236 num_color_buffers = static_cast<u32>(num_colors); 1201 num_color_buffers = static_cast<u32>(num_colors);
1237 framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ 1202 framebuffer = runtime.device.GetLogical().CreateFramebuffer({
1238 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 1203 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
1239 .pNext = nullptr, 1204 .pNext = nullptr,
1240 .flags = 0, 1205 .flags = 0,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 172bcdf98..0b73d55f8 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -7,6 +7,7 @@
7#include <compare> 7#include <compare>
8#include <span> 8#include <span>
9 9
10#include "shader_recompiler/shader_info.h"
10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
11#include "video_core/texture_cache/texture_cache.h" 12#include "video_core/texture_cache/texture_cache.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
@@ -26,35 +27,10 @@ class Device;
26class Image; 27class Image;
27class ImageView; 28class ImageView;
28class Framebuffer; 29class Framebuffer;
30class RenderPassCache;
29class StagingBufferPool; 31class StagingBufferPool;
30class VKScheduler; 32class VKScheduler;
31 33
32struct RenderPassKey {
33 constexpr auto operator<=>(const RenderPassKey&) const noexcept = default;
34
35 std::array<PixelFormat, NUM_RT> color_formats;
36 PixelFormat depth_format;
37 VkSampleCountFlagBits samples;
38};
39
40} // namespace Vulkan
41
42namespace std {
43template <>
44struct hash<Vulkan::RenderPassKey> {
45 [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
46 size_t value = static_cast<size_t>(key.depth_format) << 48;
47 value ^= static_cast<size_t>(key.samples) << 52;
48 for (size_t i = 0; i < key.color_formats.size(); ++i) {
49 value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
50 }
51 return value;
52 }
53};
54} // namespace std
55
56namespace Vulkan {
57
58struct TextureCacheRuntime { 34struct TextureCacheRuntime {
59 const Device& device; 35 const Device& device;
60 VKScheduler& scheduler; 36 VKScheduler& scheduler;
@@ -62,13 +38,13 @@ struct TextureCacheRuntime {
62 StagingBufferPool& staging_buffer_pool; 38 StagingBufferPool& staging_buffer_pool;
63 BlitImageHelper& blit_image_helper; 39 BlitImageHelper& blit_image_helper;
64 ASTCDecoderPass& astc_decoder_pass; 40 ASTCDecoderPass& astc_decoder_pass;
65 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; 41 RenderPassCache& render_pass_cache;
66 42
67 void Finish(); 43 void Finish();
68 44
69 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 45 StagingBufferRef UploadStagingBuffer(size_t size);
70 46
71 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 47 StagingBufferRef DownloadStagingBuffer(size_t size);
72 48
73 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, 49 void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
74 const Region2D& dst_region, const Region2D& src_region, 50 const Region2D& dst_region, const Region2D& src_region,
@@ -79,7 +55,7 @@ struct TextureCacheRuntime {
79 55
80 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); 56 void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view);
81 57
82 [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { 58 bool CanAccelerateImageUpload(Image&) const noexcept {
83 return false; 59 return false;
84 } 60 }
85 61
@@ -117,8 +93,6 @@ public:
117 void UploadMemory(const StagingBufferRef& map, 93 void UploadMemory(const StagingBufferRef& map,
118 std::span<const VideoCommon::BufferImageCopy> copies); 94 std::span<const VideoCommon::BufferImageCopy> copies);
119 95
120 void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies);
121
122 void DownloadMemory(const StagingBufferRef& map, 96 void DownloadMemory(const StagingBufferRef& map,
123 std::span<const VideoCommon::BufferImageCopy> copies); 97 std::span<const VideoCommon::BufferImageCopy> copies);
124 98
@@ -126,10 +100,6 @@ public:
126 return *image; 100 return *image;
127 } 101 }
128 102
129 [[nodiscard]] VkBuffer Buffer() const noexcept {
130 return *buffer;
131 }
132
133 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { 103 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
134 return aspect_mask; 104 return aspect_mask;
135 } 105 }
@@ -146,7 +116,6 @@ public:
146private: 116private:
147 VKScheduler* scheduler; 117 VKScheduler* scheduler;
148 vk::Image image; 118 vk::Image image;
149 vk::Buffer buffer;
150 MemoryCommit commit; 119 MemoryCommit commit;
151 vk::ImageView image_view; 120 vk::ImageView image_view;
152 std::vector<vk::ImageView> storage_image_views; 121 std::vector<vk::ImageView> storage_image_views;
@@ -157,18 +126,19 @@ private:
157class ImageView : public VideoCommon::ImageViewBase { 126class ImageView : public VideoCommon::ImageViewBase {
158public: 127public:
159 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); 128 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
129 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
130 const VideoCommon::ImageViewInfo&, GPUVAddr);
160 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); 131 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
161 132
162 [[nodiscard]] VkImageView DepthView(); 133 [[nodiscard]] VkImageView DepthView();
163 134
164 [[nodiscard]] VkImageView StencilView(); 135 [[nodiscard]] VkImageView StencilView();
165 136
166 [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { 137 [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type,
167 return *image_views[static_cast<size_t>(query_type)]; 138 Shader::ImageFormat image_format);
168 }
169 139
170 [[nodiscard]] VkBufferView BufferView() const noexcept { 140 [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept {
171 return *buffer_view; 141 return *image_views[static_cast<size_t>(texture_type)];
172 } 142 }
173 143
174 [[nodiscard]] VkImage ImageHandle() const noexcept { 144 [[nodiscard]] VkImage ImageHandle() const noexcept {
@@ -179,26 +149,36 @@ public:
179 return render_target; 149 return render_target;
180 } 150 }
181 151
182 [[nodiscard]] PixelFormat ImageFormat() const noexcept {
183 return image_format;
184 }
185
186 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { 152 [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
187 return samples; 153 return samples;
188 } 154 }
189 155
156 [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
157 return gpu_addr;
158 }
159
160 [[nodiscard]] u32 BufferSize() const noexcept {
161 return buffer_size;
162 }
163
190private: 164private:
191 [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); 165 struct StorageViews {
166 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds;
167 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds;
168 };
169
170 [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask);
192 171
193 const Device* device = nullptr; 172 const Device* device = nullptr;
194 std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; 173 std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
174 std::unique_ptr<StorageViews> storage_views;
195 vk::ImageView depth_view; 175 vk::ImageView depth_view;
196 vk::ImageView stencil_view; 176 vk::ImageView stencil_view;
197 vk::BufferView buffer_view;
198 VkImage image_handle = VK_NULL_HANDLE; 177 VkImage image_handle = VK_NULL_HANDLE;
199 VkImageView render_target = VK_NULL_HANDLE; 178 VkImageView render_target = VK_NULL_HANDLE;
200 PixelFormat image_format = PixelFormat::Invalid;
201 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; 179 VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
180 GPUVAddr gpu_addr = 0;
181 u32 buffer_size = 0;
202}; 182};
203 183
204class ImageAlloc : public VideoCommon::ImageAllocBase {}; 184class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index dc45fdcb1..0df3a7fe9 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -15,7 +15,9 @@
15namespace Vulkan { 15namespace Vulkan {
16 16
17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) 17VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_)
18 : device{device_}, scheduler{scheduler_} {} 18 : device{device_}, scheduler{scheduler_} {
19 payload_cursor = payload.data();
20}
19 21
20VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; 22VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default;
21 23
@@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() {
36 upload_start = payload_cursor; 38 upload_start = payload_cursor;
37} 39}
38 40
39void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
40 VkDescriptorSet set) {
41 const void* const data = upload_start;
42 const vk::Device* const logical = &device.GetLogical();
43 scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
44 logical->UpdateDescriptorSet(set, update_template, data);
45 });
46}
47
48} // namespace Vulkan 41} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index d35e77c44..d7de4c490 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -39,7 +39,9 @@ public:
39 39
40 void Acquire(); 40 void Acquire();
41 41
42 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 42 const DescriptorUpdateEntry* UpdateData() const noexcept {
43 return upload_start;
44 }
43 45
44 void AddSampledImage(VkImageView image_view, VkSampler sampler) { 46 void AddSampledImage(VkImageView image_view, VkSampler sampler) {
45 *(payload_cursor++) = VkDescriptorImageInfo{ 47 *(payload_cursor++) = VkDescriptorImageInfo{