diff options
| author | 2021-07-25 11:39:04 -0700 | |
|---|---|---|
| committer | 2021-07-25 11:39:04 -0700 | |
| commit | 98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch) | |
| tree | 816faa96c2c4d291825063433331a8ea4b3d08f1 /src/video_core/renderer_vulkan | |
| parent | Merge pull request #6699 from lat9nq/common-threads (diff) | |
| parent | shader: Support out of bound local memory reads and immediate writes (diff) | |
| download | yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip | |
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/video_core/renderer_vulkan')
42 files changed, 2988 insertions, 5326 deletions
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index b7f5b8bc2..6c1b2f063 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -49,6 +49,16 @@ constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREA | |||
| 49 | .bindingCount = 1, | 49 | .bindingCount = 1, |
| 50 | .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, | 50 | .pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>, |
| 51 | }; | 51 | }; |
| 52 | template <u32 num_textures> | ||
| 53 | inline constexpr DescriptorBankInfo TEXTURE_DESCRIPTOR_BANK_INFO{ | ||
| 54 | .uniform_buffers = 0, | ||
| 55 | .storage_buffers = 0, | ||
| 56 | .texture_buffers = 0, | ||
| 57 | .image_buffers = 0, | ||
| 58 | .textures = num_textures, | ||
| 59 | .images = 0, | ||
| 60 | .score = 2, | ||
| 61 | }; | ||
| 52 | constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ | 62 | constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ |
| 53 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 63 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 54 | .pNext = nullptr, | 64 | .pNext = nullptr, |
| @@ -323,18 +333,19 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi | |||
| 323 | cmdbuf.SetScissor(0, scissor); | 333 | cmdbuf.SetScissor(0, scissor); |
| 324 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); | 334 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); |
| 325 | } | 335 | } |
| 326 | |||
| 327 | } // Anonymous namespace | 336 | } // Anonymous namespace |
| 328 | 337 | ||
| 329 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, | 338 | BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_, |
| 330 | StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool) | 339 | StateTracker& state_tracker_, DescriptorPool& descriptor_pool) |
| 331 | : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, | 340 | : device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_}, |
| 332 | one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( | 341 | one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout( |
| 333 | ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | 342 | ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), |
| 334 | two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( | 343 | two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout( |
| 335 | TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), | 344 | TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)), |
| 336 | one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout), | 345 | one_texture_descriptor_allocator{ |
| 337 | two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout), | 346 | descriptor_pool.Allocator(*one_texture_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<1>)}, |
| 347 | two_textures_descriptor_allocator{ | ||
| 348 | descriptor_pool.Allocator(*two_textures_set_layout, TEXTURE_DESCRIPTOR_BANK_INFO<2>)}, | ||
| 338 | one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( | 349 | one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout( |
| 339 | PipelineLayoutCreateInfo(one_texture_set_layout.address()))), | 350 | PipelineLayoutCreateInfo(one_texture_set_layout.address()))), |
| 340 | two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( | 351 | two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout( |
| @@ -362,14 +373,14 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV | |||
| 362 | .operation = operation, | 373 | .operation = operation, |
| 363 | }; | 374 | }; |
| 364 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 375 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| 365 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | 376 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); |
| 366 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; | 377 | const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler; |
| 367 | const VkPipeline pipeline = FindOrEmplacePipeline(key); | 378 | const VkPipeline pipeline = FindOrEmplacePipeline(key); |
| 368 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 369 | scheduler.RequestRenderpass(dst_framebuffer); | 379 | scheduler.RequestRenderpass(dst_framebuffer); |
| 370 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set, | 380 | scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler, |
| 371 | &device = device](vk::CommandBuffer cmdbuf) { | 381 | src_view](vk::CommandBuffer cmdbuf) { |
| 372 | // TODO: Barriers | 382 | // TODO: Barriers |
| 383 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 373 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | 384 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); |
| 374 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 385 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| 375 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, | 386 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, |
| @@ -391,12 +402,11 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | |||
| 391 | const VkPipelineLayout layout = *two_textures_pipeline_layout; | 402 | const VkPipelineLayout layout = *two_textures_pipeline_layout; |
| 392 | const VkSampler sampler = *nearest_sampler; | 403 | const VkSampler sampler = *nearest_sampler; |
| 393 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); | 404 | const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass()); |
| 394 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 395 | scheduler.RequestRenderpass(dst_framebuffer); | 405 | scheduler.RequestRenderpass(dst_framebuffer); |
| 396 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, | 406 | scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view, |
| 397 | src_stencil_view, descriptor_set, | 407 | src_stencil_view, this](vk::CommandBuffer cmdbuf) { |
| 398 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 399 | // TODO: Barriers | 408 | // TODO: Barriers |
| 409 | const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit(); | ||
| 400 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, | 410 | UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view, |
| 401 | src_stencil_view); | 411 | src_stencil_view); |
| 402 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 412 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| @@ -416,7 +426,6 @@ void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer, | |||
| 416 | 426 | ||
| 417 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, | 427 | void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer, |
| 418 | const ImageView& src_image_view) { | 428 | const ImageView& src_image_view) { |
| 419 | |||
| 420 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); | 429 | ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass()); |
| 421 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); | 430 | Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view); |
| 422 | } | 431 | } |
| @@ -436,16 +445,14 @@ void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer, | |||
| 436 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, | 445 | void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, |
| 437 | const ImageView& src_image_view) { | 446 | const ImageView& src_image_view) { |
| 438 | const VkPipelineLayout layout = *one_texture_pipeline_layout; | 447 | const VkPipelineLayout layout = *one_texture_pipeline_layout; |
| 439 | const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D); | 448 | const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D); |
| 440 | const VkSampler sampler = *nearest_sampler; | 449 | const VkSampler sampler = *nearest_sampler; |
| 441 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 442 | const VkExtent2D extent{ | 450 | const VkExtent2D extent{ |
| 443 | .width = src_image_view.size.width, | 451 | .width = src_image_view.size.width, |
| 444 | .height = src_image_view.size.height, | 452 | .height = src_image_view.size.height, |
| 445 | }; | 453 | }; |
| 446 | scheduler.RequestRenderpass(dst_framebuffer); | 454 | scheduler.RequestRenderpass(dst_framebuffer); |
| 447 | scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent, | 455 | scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) { |
| 448 | &device = device](vk::CommandBuffer cmdbuf) { | ||
| 449 | const VkOffset2D offset{ | 456 | const VkOffset2D offset{ |
| 450 | .x = 0, | 457 | .x = 0, |
| 451 | .y = 0, | 458 | .y = 0, |
| @@ -466,6 +473,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb | |||
| 466 | .tex_scale = {viewport.width, viewport.height}, | 473 | .tex_scale = {viewport.width, viewport.height}, |
| 467 | .tex_offset = {0.0f, 0.0f}, | 474 | .tex_offset = {0.0f, 0.0f}, |
| 468 | }; | 475 | }; |
| 476 | const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); | ||
| 469 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); | 477 | UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); |
| 470 | 478 | ||
| 471 | // TODO: Barriers | 479 | // TODO: Barriers |
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 0d81a06ed..33ee095c1 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h | |||
| @@ -31,7 +31,7 @@ struct BlitImagePipelineKey { | |||
| 31 | class BlitImageHelper { | 31 | class BlitImageHelper { |
| 32 | public: | 32 | public: |
| 33 | explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, | 33 | explicit BlitImageHelper(const Device& device, VKScheduler& scheduler, |
| 34 | StateTracker& state_tracker, VKDescriptorPool& descriptor_pool); | 34 | StateTracker& state_tracker, DescriptorPool& descriptor_pool); |
| 35 | ~BlitImageHelper(); | 35 | ~BlitImageHelper(); |
| 36 | 36 | ||
| 37 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, | 37 | void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 362278f01..d70153df3 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -15,9 +15,7 @@ | |||
| 15 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 15 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | |||
| 19 | namespace { | 18 | namespace { |
| 20 | |||
| 21 | constexpr size_t POINT = 0; | 19 | constexpr size_t POINT = 0; |
| 22 | constexpr size_t LINE = 1; | 20 | constexpr size_t LINE = 1; |
| 23 | constexpr size_t POLYGON = 2; | 21 | constexpr size_t POLYGON = 2; |
| @@ -39,10 +37,20 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { | |||
| 39 | POLYGON, // Patches | 37 | POLYGON, // Patches |
| 40 | }; | 38 | }; |
| 41 | 39 | ||
| 40 | void RefreshXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs) { | ||
| 41 | std::ranges::transform(regs.tfb_layouts, state.layouts.begin(), [](const auto& layout) { | ||
| 42 | return VideoCommon::TransformFeedbackState::Layout{ | ||
| 43 | .stream = layout.stream, | ||
| 44 | .varying_count = layout.varying_count, | ||
| 45 | .stride = layout.stride, | ||
| 46 | }; | ||
| 47 | }); | ||
| 48 | state.varyings = regs.tfb_varying_locs; | ||
| 49 | } | ||
| 42 | } // Anonymous namespace | 50 | } // Anonymous namespace |
| 43 | 51 | ||
| 44 | void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | 52 | void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, |
| 45 | bool has_extended_dynamic_state) { | 53 | bool has_extended_dynamic_state, bool has_dynamic_vertex_input) { |
| 46 | const Maxwell& regs = maxwell3d.regs; | 54 | const Maxwell& regs = maxwell3d.regs; |
| 47 | const std::array enabled_lut{ | 55 | const std::array enabled_lut{ |
| 48 | regs.polygon_offset_point_enable, | 56 | regs.polygon_offset_point_enable, |
| @@ -52,6 +60,9 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 52 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); | 60 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); |
| 53 | 61 | ||
| 54 | raw1 = 0; | 62 | raw1 = 0; |
| 63 | extended_dynamic_state.Assign(has_extended_dynamic_state ? 1 : 0); | ||
| 64 | dynamic_vertex_input.Assign(has_dynamic_vertex_input ? 1 : 0); | ||
| 65 | xfb_enabled.Assign(regs.tfb_enabled != 0); | ||
| 55 | primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); | 66 | primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); |
| 56 | depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); | 67 | depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); |
| 57 | depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); | 68 | depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value()); |
| @@ -63,37 +74,66 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 63 | tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); | 74 | tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); |
| 64 | logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); | 75 | logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); |
| 65 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); | 76 | logic_op.Assign(PackLogicOp(regs.logic_op.operation)); |
| 66 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); | ||
| 67 | topology.Assign(regs.draw.topology); | 77 | topology.Assign(regs.draw.topology); |
| 68 | msaa_mode.Assign(regs.multisample_mode); | 78 | msaa_mode.Assign(regs.multisample_mode); |
| 69 | 79 | ||
| 70 | raw2 = 0; | 80 | raw2 = 0; |
| 81 | rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); | ||
| 71 | const auto test_func = | 82 | const auto test_func = |
| 72 | regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; | 83 | regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; |
| 73 | alpha_test_func.Assign(PackComparisonOp(test_func)); | 84 | alpha_test_func.Assign(PackComparisonOp(test_func)); |
| 74 | early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); | 85 | early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); |
| 75 | 86 | depth_enabled.Assign(regs.zeta_enable != 0 ? 1 : 0); | |
| 87 | depth_format.Assign(static_cast<u32>(regs.zeta.format)); | ||
| 88 | y_negate.Assign(regs.screen_y_control.y_negate != 0 ? 1 : 0); | ||
| 89 | provoking_vertex_last.Assign(regs.provoking_vertex_last != 0 ? 1 : 0); | ||
| 90 | conservative_raster_enable.Assign(regs.conservative_raster_enable != 0 ? 1 : 0); | ||
| 91 | smooth_lines.Assign(regs.line_smooth_enable != 0 ? 1 : 0); | ||
| 92 | |||
| 93 | for (size_t i = 0; i < regs.rt.size(); ++i) { | ||
| 94 | color_formats[i] = static_cast<u8>(regs.rt[i].format); | ||
| 95 | } | ||
| 76 | alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); | 96 | alpha_test_ref = Common::BitCast<u32>(regs.alpha_test_ref); |
| 77 | point_size = Common::BitCast<u32>(regs.point_size); | 97 | point_size = Common::BitCast<u32>(regs.point_size); |
| 78 | 98 | ||
| 79 | if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { | 99 | if (maxwell3d.dirty.flags[Dirty::VertexInput]) { |
| 80 | maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; | 100 | if (has_dynamic_vertex_input) { |
| 81 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 101 | // Dirty flag will be reset by the command buffer update |
| 82 | const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); | 102 | static constexpr std::array LUT{ |
| 83 | binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; | 103 | 0u, // Invalid |
| 84 | } | 104 | 1u, // SignedNorm |
| 85 | } | 105 | 1u, // UnsignedNorm |
| 86 | if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { | 106 | 2u, // SignedInt |
| 87 | maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; | 107 | 3u, // UnsignedInt |
| 88 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | 108 | 1u, // UnsignedScaled |
| 89 | const auto& input = regs.vertex_attrib_format[index]; | 109 | 1u, // SignedScaled |
| 90 | auto& attribute = attributes[index]; | 110 | 1u, // Float |
| 91 | attribute.raw = 0; | 111 | }; |
| 92 | attribute.enabled.Assign(input.IsConstant() ? 0 : 1); | 112 | const auto& attrs = regs.vertex_attrib_format; |
| 93 | attribute.buffer.Assign(input.buffer); | 113 | attribute_types = 0; |
| 94 | attribute.offset.Assign(input.offset); | 114 | for (size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |
| 95 | attribute.type.Assign(static_cast<u32>(input.type.Value())); | 115 | const u32 mask = attrs[i].constant != 0 ? 0 : 3; |
| 96 | attribute.size.Assign(static_cast<u32>(input.size.Value())); | 116 | const u32 type = LUT[static_cast<size_t>(attrs[i].type.Value())]; |
| 117 | attribute_types |= static_cast<u64>(type & mask) << (i * 2); | ||
| 118 | } | ||
| 119 | } else { | ||
| 120 | maxwell3d.dirty.flags[Dirty::VertexInput] = false; | ||
| 121 | enabled_divisors = 0; | ||
| 122 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 123 | const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); | ||
| 124 | binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; | ||
| 125 | enabled_divisors |= (is_enabled ? u64{1} : 0) << index; | ||
| 126 | } | ||
| 127 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 128 | const auto& input = regs.vertex_attrib_format[index]; | ||
| 129 | auto& attribute = attributes[index]; | ||
| 130 | attribute.raw = 0; | ||
| 131 | attribute.enabled.Assign(input.constant ? 0 : 1); | ||
| 132 | attribute.buffer.Assign(input.buffer); | ||
| 133 | attribute.offset.Assign(input.offset); | ||
| 134 | attribute.type.Assign(static_cast<u32>(input.type.Value())); | ||
| 135 | attribute.size.Assign(static_cast<u32>(input.size.Value())); | ||
| 136 | } | ||
| 97 | } | 137 | } |
| 98 | } | 138 | } |
| 99 | if (maxwell3d.dirty.flags[Dirty::Blending]) { | 139 | if (maxwell3d.dirty.flags[Dirty::Blending]) { |
| @@ -109,10 +149,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | |||
| 109 | return static_cast<u16>(viewport.swizzle.raw); | 149 | return static_cast<u16>(viewport.swizzle.raw); |
| 110 | }); | 150 | }); |
| 111 | } | 151 | } |
| 112 | if (!has_extended_dynamic_state) { | 152 | if (!extended_dynamic_state) { |
| 113 | no_extended_dynamic_state.Assign(1); | ||
| 114 | dynamic_state.Refresh(regs); | 153 | dynamic_state.Refresh(regs); |
| 115 | } | 154 | } |
| 155 | if (xfb_enabled) { | ||
| 156 | RefreshXfbState(xfb_state, regs); | ||
| 157 | } | ||
| 116 | } | 158 | } |
| 117 | 159 | ||
| 118 | void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { | 160 | void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index a0eb83a68..c9be37935 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 14 | #include "video_core/surface.h" | 14 | #include "video_core/surface.h" |
| 15 | #include "video_core/transform_feedback.h" | ||
| 15 | 16 | ||
| 16 | namespace Vulkan { | 17 | namespace Vulkan { |
| 17 | 18 | ||
| @@ -60,7 +61,7 @@ struct FixedPipelineState { | |||
| 60 | 61 | ||
| 61 | void Refresh(const Maxwell& regs, size_t index); | 62 | void Refresh(const Maxwell& regs, size_t index); |
| 62 | 63 | ||
| 63 | constexpr std::array<bool, 4> Mask() const noexcept { | 64 | std::array<bool, 4> Mask() const noexcept { |
| 64 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; | 65 | return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; |
| 65 | } | 66 | } |
| 66 | 67 | ||
| @@ -97,11 +98,11 @@ struct FixedPipelineState { | |||
| 97 | BitField<20, 3, u32> type; | 98 | BitField<20, 3, u32> type; |
| 98 | BitField<23, 6, u32> size; | 99 | BitField<23, 6, u32> size; |
| 99 | 100 | ||
| 100 | constexpr Maxwell::VertexAttribute::Type Type() const noexcept { | 101 | Maxwell::VertexAttribute::Type Type() const noexcept { |
| 101 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); | 102 | return static_cast<Maxwell::VertexAttribute::Type>(type.Value()); |
| 102 | } | 103 | } |
| 103 | 104 | ||
| 104 | constexpr Maxwell::VertexAttribute::Size Size() const noexcept { | 105 | Maxwell::VertexAttribute::Size Size() const noexcept { |
| 105 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); | 106 | return static_cast<Maxwell::VertexAttribute::Size>(size.Value()); |
| 106 | } | 107 | } |
| 107 | }; | 108 | }; |
| @@ -167,37 +168,53 @@ struct FixedPipelineState { | |||
| 167 | 168 | ||
| 168 | union { | 169 | union { |
| 169 | u32 raw1; | 170 | u32 raw1; |
| 170 | BitField<0, 1, u32> no_extended_dynamic_state; | 171 | BitField<0, 1, u32> extended_dynamic_state; |
| 171 | BitField<2, 1, u32> primitive_restart_enable; | 172 | BitField<1, 1, u32> dynamic_vertex_input; |
| 172 | BitField<3, 1, u32> depth_bias_enable; | 173 | BitField<2, 1, u32> xfb_enabled; |
| 173 | BitField<4, 1, u32> depth_clamp_disabled; | 174 | BitField<3, 1, u32> primitive_restart_enable; |
| 174 | BitField<5, 1, u32> ndc_minus_one_to_one; | 175 | BitField<4, 1, u32> depth_bias_enable; |
| 175 | BitField<6, 2, u32> polygon_mode; | 176 | BitField<5, 1, u32> depth_clamp_disabled; |
| 176 | BitField<8, 5, u32> patch_control_points_minus_one; | 177 | BitField<6, 1, u32> ndc_minus_one_to_one; |
| 177 | BitField<13, 2, u32> tessellation_primitive; | 178 | BitField<7, 2, u32> polygon_mode; |
| 178 | BitField<15, 2, u32> tessellation_spacing; | 179 | BitField<9, 5, u32> patch_control_points_minus_one; |
| 179 | BitField<17, 1, u32> tessellation_clockwise; | 180 | BitField<14, 2, u32> tessellation_primitive; |
| 180 | BitField<18, 1, u32> logic_op_enable; | 181 | BitField<16, 2, u32> tessellation_spacing; |
| 181 | BitField<19, 4, u32> logic_op; | 182 | BitField<18, 1, u32> tessellation_clockwise; |
| 182 | BitField<23, 1, u32> rasterize_enable; | 183 | BitField<19, 1, u32> logic_op_enable; |
| 184 | BitField<20, 4, u32> logic_op; | ||
| 183 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; | 185 | BitField<24, 4, Maxwell::PrimitiveTopology> topology; |
| 184 | BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; | 186 | BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode; |
| 185 | }; | 187 | }; |
| 186 | union { | 188 | union { |
| 187 | u32 raw2; | 189 | u32 raw2; |
| 188 | BitField<0, 3, u32> alpha_test_func; | 190 | BitField<0, 1, u32> rasterize_enable; |
| 189 | BitField<3, 1, u32> early_z; | 191 | BitField<1, 3, u32> alpha_test_func; |
| 192 | BitField<4, 1, u32> early_z; | ||
| 193 | BitField<5, 1, u32> depth_enabled; | ||
| 194 | BitField<6, 5, u32> depth_format; | ||
| 195 | BitField<11, 1, u32> y_negate; | ||
| 196 | BitField<12, 1, u32> provoking_vertex_last; | ||
| 197 | BitField<13, 1, u32> conservative_raster_enable; | ||
| 198 | BitField<14, 1, u32> smooth_lines; | ||
| 190 | }; | 199 | }; |
| 200 | std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||
| 191 | 201 | ||
| 192 | u32 alpha_test_ref; | 202 | u32 alpha_test_ref; |
| 193 | u32 point_size; | 203 | u32 point_size; |
| 194 | std::array<u32, Maxwell::NumVertexArrays> binding_divisors; | ||
| 195 | std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; | ||
| 196 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; | 204 | std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments; |
| 197 | std::array<u16, Maxwell::NumViewports> viewport_swizzles; | 205 | std::array<u16, Maxwell::NumViewports> viewport_swizzles; |
| 206 | union { | ||
| 207 | u64 attribute_types; // Used with VK_EXT_vertex_input_dynamic_state | ||
| 208 | u64 enabled_divisors; | ||
| 209 | }; | ||
| 210 | std::array<VertexAttribute, Maxwell::NumVertexAttributes> attributes; | ||
| 211 | std::array<u32, Maxwell::NumVertexArrays> binding_divisors; | ||
| 212 | |||
| 198 | DynamicState dynamic_state; | 213 | DynamicState dynamic_state; |
| 214 | VideoCommon::TransformFeedbackState xfb_state; | ||
| 199 | 215 | ||
| 200 | void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); | 216 | void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state, |
| 217 | bool has_dynamic_vertex_input); | ||
| 201 | 218 | ||
| 202 | size_t Hash() const noexcept; | 219 | size_t Hash() const noexcept; |
| 203 | 220 | ||
| @@ -208,8 +225,24 @@ struct FixedPipelineState { | |||
| 208 | } | 225 | } |
| 209 | 226 | ||
| 210 | size_t Size() const noexcept { | 227 | size_t Size() const noexcept { |
| 211 | const size_t total_size = sizeof *this; | 228 | if (xfb_enabled) { |
| 212 | return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); | 229 | // When transform feedback is enabled, use the whole struct |
| 230 | return sizeof(*this); | ||
| 231 | } | ||
| 232 | if (dynamic_vertex_input) { | ||
| 233 | // Exclude dynamic state and attributes | ||
| 234 | return offsetof(FixedPipelineState, attributes); | ||
| 235 | } | ||
| 236 | if (extended_dynamic_state) { | ||
| 237 | // Exclude dynamic state | ||
| 238 | return offsetof(FixedPipelineState, dynamic_state); | ||
| 239 | } | ||
| 240 | // Default | ||
| 241 | return offsetof(FixedPipelineState, xfb_state); | ||
| 242 | } | ||
| 243 | |||
| 244 | u32 DynamicAttributeType(size_t index) const noexcept { | ||
| 245 | return (attribute_types >> (index * 2)) & 0b11; | ||
| 213 | } | 246 | } |
| 214 | }; | 247 | }; |
| 215 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); | 248 | static_assert(std::has_unique_object_representations_v<FixedPipelineState>); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f088447e9..68a23b602 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -157,7 +157,7 @@ struct FormatTuple { | |||
| 157 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT | 157 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32_FLOAT |
| 158 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT | 158 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16_FLOAT |
| 159 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM | 159 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16_UNORM |
| 160 | {VK_FORMAT_UNDEFINED}, // R16_SNORM | 160 | {VK_FORMAT_R16_SNORM, Attachable | Storage}, // R16_SNORM |
| 161 | {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT | 161 | {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16_UINT |
| 162 | {VK_FORMAT_UNDEFINED}, // R16_SINT | 162 | {VK_FORMAT_UNDEFINED}, // R16_SINT |
| 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM | 163 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM |
| @@ -266,19 +266,20 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with | |||
| 266 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; | 266 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; |
| 267 | } | 267 | } |
| 268 | 268 | ||
| 269 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { | 269 | VkShaderStageFlagBits ShaderStage(Shader::Stage stage) { |
| 270 | switch (stage) { | 270 | switch (stage) { |
| 271 | case Tegra::Engines::ShaderType::Vertex: | 271 | case Shader::Stage::VertexA: |
| 272 | case Shader::Stage::VertexB: | ||
| 272 | return VK_SHADER_STAGE_VERTEX_BIT; | 273 | return VK_SHADER_STAGE_VERTEX_BIT; |
| 273 | case Tegra::Engines::ShaderType::TesselationControl: | 274 | case Shader::Stage::TessellationControl: |
| 274 | return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; | 275 | return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; |
| 275 | case Tegra::Engines::ShaderType::TesselationEval: | 276 | case Shader::Stage::TessellationEval: |
| 276 | return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; | 277 | return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; |
| 277 | case Tegra::Engines::ShaderType::Geometry: | 278 | case Shader::Stage::Geometry: |
| 278 | return VK_SHADER_STAGE_GEOMETRY_BIT; | 279 | return VK_SHADER_STAGE_GEOMETRY_BIT; |
| 279 | case Tegra::Engines::ShaderType::Fragment: | 280 | case Shader::Stage::Fragment: |
| 280 | return VK_SHADER_STAGE_FRAGMENT_BIT; | 281 | return VK_SHADER_STAGE_FRAGMENT_BIT; |
| 281 | case Tegra::Engines::ShaderType::Compute: | 282 | case Shader::Stage::Compute: |
| 282 | return VK_SHADER_STAGE_COMPUTE_BIT; | 283 | return VK_SHADER_STAGE_COMPUTE_BIT; |
| 283 | } | 284 | } |
| 284 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); | 285 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", stage); |
| @@ -685,6 +686,19 @@ VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face) { | |||
| 685 | return {}; | 686 | return {}; |
| 686 | } | 687 | } |
| 687 | 688 | ||
| 689 | VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode) { | ||
| 690 | switch (polygon_mode) { | ||
| 691 | case Maxwell::PolygonMode::Point: | ||
| 692 | return VK_POLYGON_MODE_POINT; | ||
| 693 | case Maxwell::PolygonMode::Line: | ||
| 694 | return VK_POLYGON_MODE_LINE; | ||
| 695 | case Maxwell::PolygonMode::Fill: | ||
| 696 | return VK_POLYGON_MODE_FILL; | ||
| 697 | } | ||
| 698 | UNIMPLEMENTED_MSG("Unimplemented polygon mode={}", polygon_mode); | ||
| 699 | return {}; | ||
| 700 | } | ||
| 701 | |||
| 688 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { | 702 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { |
| 689 | switch (swizzle) { | 703 | switch (swizzle) { |
| 690 | case Tegra::Texture::SwizzleSource::Zero: | 704 | case Tegra::Texture::SwizzleSource::Zero: |
| @@ -741,4 +755,28 @@ VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reducti | |||
| 741 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; | 755 | return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT; |
| 742 | } | 756 | } |
| 743 | 757 | ||
| 758 | VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 759 | switch (msaa_mode) { | ||
| 760 | case Tegra::Texture::MsaaMode::Msaa1x1: | ||
| 761 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 762 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 763 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 764 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 765 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 766 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 767 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 768 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 769 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 770 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 771 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 772 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 773 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 774 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 775 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 776 | default: | ||
| 777 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 778 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 779 | } | ||
| 780 | } | ||
| 781 | |||
| 744 | } // namespace Vulkan::MaxwellToVK | 782 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index e3e06ba38..8a9616039 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "shader_recompiler/stage.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/surface.h" | 10 | #include "video_core/surface.h" |
| 10 | #include "video_core/textures/texture.h" | 11 | #include "video_core/textures/texture.h" |
| @@ -45,7 +46,7 @@ struct FormatInfo { | |||
| 45 | [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, | 46 | [[nodiscard]] FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with_srgb, |
| 46 | PixelFormat pixel_format); | 47 | PixelFormat pixel_format); |
| 47 | 48 | ||
| 48 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); | 49 | VkShaderStageFlagBits ShaderStage(Shader::Stage stage); |
| 49 | 50 | ||
| 50 | VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); | 51 | VkPrimitiveTopology PrimitiveTopology(const Device& device, Maxwell::PrimitiveTopology topology); |
| 51 | 52 | ||
| @@ -65,10 +66,14 @@ VkFrontFace FrontFace(Maxwell::FrontFace front_face); | |||
| 65 | 66 | ||
| 66 | VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); | 67 | VkCullModeFlagBits CullFace(Maxwell::CullFace cull_face); |
| 67 | 68 | ||
| 69 | VkPolygonMode PolygonMode(Maxwell::PolygonMode polygon_mode); | ||
| 70 | |||
| 68 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | 71 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); |
| 69 | 72 | ||
| 70 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); | 73 | VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle); |
| 71 | 74 | ||
| 72 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); | 75 | VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction); |
| 73 | 76 | ||
| 77 | VkSampleCountFlagBits MsaaMode(Tegra::Texture::MsaaMode msaa_mode); | ||
| 78 | |||
| 74 | } // namespace Vulkan::MaxwellToVK | 79 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h new file mode 100644 index 000000000..4847db6b6 --- /dev/null +++ b/src/video_core/renderer_vulkan/pipeline_helper.h | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include <boost/container/small_vector.hpp> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 16 | #include "video_core/texture_cache/texture_cache.h" | ||
| 17 | #include "video_core/texture_cache/types.h" | ||
| 18 | #include "video_core/textures/texture.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 20 | |||
| 21 | namespace Vulkan { | ||
| 22 | |||
| 23 | class DescriptorLayoutBuilder { | ||
| 24 | public: | ||
| 25 | DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} | ||
| 26 | |||
| 27 | bool CanUsePushDescriptor() const noexcept { | ||
| 28 | return device->IsKhrPushDescriptorSupported() && | ||
| 29 | num_descriptors <= device->MaxPushDescriptors(); | ||
| 30 | } | ||
| 31 | |||
| 32 | vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { | ||
| 33 | if (bindings.empty()) { | ||
| 34 | return nullptr; | ||
| 35 | } | ||
| 36 | const VkDescriptorSetLayoutCreateFlags flags = | ||
| 37 | use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; | ||
| 38 | return device->GetLogical().CreateDescriptorSetLayout({ | ||
| 39 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 40 | .pNext = nullptr, | ||
| 41 | .flags = flags, | ||
| 42 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 43 | .pBindings = bindings.data(), | ||
| 44 | }); | ||
| 45 | } | ||
| 46 | |||
| 47 | vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, | ||
| 48 | VkPipelineLayout pipeline_layout, | ||
| 49 | bool use_push_descriptor) const { | ||
| 50 | if (entries.empty()) { | ||
| 51 | return nullptr; | ||
| 52 | } | ||
| 53 | const VkDescriptorUpdateTemplateType type = | ||
| 54 | use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR | ||
| 55 | : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; | ||
| 56 | return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||
| 57 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||
| 58 | .pNext = nullptr, | ||
| 59 | .flags = 0, | ||
| 60 | .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), | ||
| 61 | .pDescriptorUpdateEntries = entries.data(), | ||
| 62 | .templateType = type, | ||
| 63 | .descriptorSetLayout = descriptor_set_layout, | ||
| 64 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 65 | .pipelineLayout = pipeline_layout, | ||
| 66 | .set = 0, | ||
| 67 | }); | ||
| 68 | } | ||
| 69 | |||
| 70 | vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { | ||
| 71 | return device->GetLogical().CreatePipelineLayout({ | ||
| 72 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 73 | .pNext = nullptr, | ||
| 74 | .flags = 0, | ||
| 75 | .setLayoutCount = descriptor_set_layout ? 1U : 0U, | ||
| 76 | .pSetLayouts = bindings.empty() ? nullptr : &descriptor_set_layout, | ||
| 77 | .pushConstantRangeCount = 0, | ||
| 78 | .pPushConstantRanges = nullptr, | ||
| 79 | }); | ||
| 80 | } | ||
| 81 | |||
| 82 | void Add(const Shader::Info& info, VkShaderStageFlags stage) { | ||
| 83 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, stage, info.constant_buffer_descriptors); | ||
| 84 | Add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stage, info.storage_buffers_descriptors); | ||
| 85 | Add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, stage, info.texture_buffer_descriptors); | ||
| 86 | Add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, stage, info.image_buffer_descriptors); | ||
| 87 | Add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, stage, info.texture_descriptors); | ||
| 88 | Add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, stage, info.image_descriptors); | ||
| 89 | } | ||
| 90 | |||
| 91 | private: | ||
| 92 | template <typename Descriptors> | ||
| 93 | void Add(VkDescriptorType type, VkShaderStageFlags stage, const Descriptors& descriptors) { | ||
| 94 | const size_t num{descriptors.size()}; | ||
| 95 | for (size_t i = 0; i < num; ++i) { | ||
| 96 | bindings.push_back({ | ||
| 97 | .binding = binding, | ||
| 98 | .descriptorType = type, | ||
| 99 | .descriptorCount = descriptors[i].count, | ||
| 100 | .stageFlags = stage, | ||
| 101 | .pImmutableSamplers = nullptr, | ||
| 102 | }); | ||
| 103 | entries.push_back({ | ||
| 104 | .dstBinding = binding, | ||
| 105 | .dstArrayElement = 0, | ||
| 106 | .descriptorCount = descriptors[i].count, | ||
| 107 | .descriptorType = type, | ||
| 108 | .offset = offset, | ||
| 109 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 110 | }); | ||
| 111 | ++binding; | ||
| 112 | num_descriptors += descriptors[i].count; | ||
| 113 | offset += sizeof(DescriptorUpdateEntry); | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | const Device* device{}; | ||
| 118 | boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; | ||
| 119 | boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; | ||
| 120 | u32 binding{}; | ||
| 121 | u32 num_descriptors{}; | ||
| 122 | size_t offset{}; | ||
| 123 | }; | ||
| 124 | |||
| 125 | inline void PushImageDescriptors(const Shader::Info& info, const VkSampler*& samplers, | ||
| 126 | const ImageId*& image_view_ids, TextureCache& texture_cache, | ||
| 127 | VKUpdateDescriptorQueue& update_descriptor_queue) { | ||
| 128 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 129 | image_view_ids += desc.count; | ||
| 130 | } | ||
| 131 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 132 | image_view_ids += desc.count; | ||
| 133 | } | ||
| 134 | for (const auto& desc : info.texture_descriptors) { | ||
| 135 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 136 | const VkSampler sampler{*(samplers++)}; | ||
| 137 | ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; | ||
| 138 | const VkImageView vk_image_view{image_view.Handle(desc.type)}; | ||
| 139 | update_descriptor_queue.AddSampledImage(vk_image_view, sampler); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | for (const auto& desc : info.image_descriptors) { | ||
| 143 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 144 | ImageView& image_view{texture_cache.GetImageView(*(image_view_ids++))}; | ||
| 145 | if (desc.is_written) { | ||
| 146 | texture_cache.MarkModification(image_view.image_id); | ||
| 147 | } | ||
| 148 | const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; | ||
| 149 | update_descriptor_queue.AddImage(vk_image_view); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index bec3a81d9..a8d04dc61 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -130,35 +130,45 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 130 | if (!framebuffer) { | 130 | if (!framebuffer) { |
| 131 | return; | 131 | return; |
| 132 | } | 132 | } |
| 133 | const auto& layout = render_window.GetFramebufferLayout(); | 133 | SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); |
| 134 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | 134 | if (!render_window.IsShown()) { |
| 135 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 135 | return; |
| 136 | const bool use_accelerated = | 136 | } |
| 137 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 137 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 138 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 138 | const bool use_accelerated = |
| 139 | if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { | 139 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 140 | swapchain.Create(layout.width, layout.height, is_srgb); | 140 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 141 | blit_screen.Recreate(); | 141 | |
| 142 | } | 142 | bool has_been_recreated = false; |
| 143 | 143 | const auto recreate_swapchain = [&] { | |
| 144 | scheduler.WaitWorker(); | 144 | if (!has_been_recreated) { |
| 145 | 145 | has_been_recreated = true; | |
| 146 | while (!swapchain.AcquireNextImage()) { | 146 | scheduler.WaitWorker(); |
| 147 | swapchain.Create(layout.width, layout.height, is_srgb); | ||
| 148 | blit_screen.Recreate(); | ||
| 149 | } | 147 | } |
| 150 | const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); | 148 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); |
| 151 | 149 | swapchain.Create(layout.width, layout.height, is_srgb); | |
| 152 | scheduler.Flush(render_semaphore); | 150 | }; |
| 153 | 151 | if (swapchain.IsSubOptimal() || swapchain.HasColorSpaceChanged(is_srgb)) { | |
| 154 | if (swapchain.Present(render_semaphore)) { | 152 | recreate_swapchain(); |
| 155 | blit_screen.Recreate(); | 153 | } |
| 154 | bool is_outdated; | ||
| 155 | do { | ||
| 156 | swapchain.AcquireNextImage(); | ||
| 157 | is_outdated = swapchain.IsOutDated(); | ||
| 158 | if (is_outdated) { | ||
| 159 | recreate_swapchain(); | ||
| 156 | } | 160 | } |
| 157 | gpu.RendererFrameEndNotify(); | 161 | } while (is_outdated); |
| 158 | rasterizer.TickFrame(); | 162 | if (has_been_recreated) { |
| 163 | blit_screen.Recreate(); | ||
| 159 | } | 164 | } |
| 165 | const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); | ||
| 166 | scheduler.Flush(render_semaphore); | ||
| 167 | scheduler.WaitWorker(); | ||
| 168 | swapchain.Present(render_semaphore); | ||
| 160 | 169 | ||
| 161 | render_window.OnFrameDisplayed(); | 170 | gpu.RendererFrameEndNotify(); |
| 171 | rasterizer.TickFrame(); | ||
| 162 | } | 172 | } |
| 163 | 173 | ||
| 164 | void RendererVulkan::Report() const { | 174 | void RendererVulkan::Report() const { |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 363134129..516f428e7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -184,47 +184,43 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 184 | .depth = 1, | 184 | .depth = 1, |
| 185 | }, | 185 | }, |
| 186 | }; | 186 | }; |
| 187 | scheduler.Record( | 187 | scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { |
| 188 | [buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) { | 188 | const VkImage image = *raw_images[image_index]; |
| 189 | const VkImageMemoryBarrier base_barrier{ | 189 | const VkImageMemoryBarrier base_barrier{ |
| 190 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 190 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 191 | .pNext = nullptr, | 191 | .pNext = nullptr, |
| 192 | .srcAccessMask = 0, | 192 | .srcAccessMask = 0, |
| 193 | .dstAccessMask = 0, | 193 | .dstAccessMask = 0, |
| 194 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 194 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 195 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 195 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 196 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 196 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 197 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 197 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 198 | .image = image, | 198 | .image = image, |
| 199 | .subresourceRange = | 199 | .subresourceRange{ |
| 200 | { | 200 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
| 201 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | 201 | .baseMipLevel = 0, |
| 202 | .baseMipLevel = 0, | 202 | .levelCount = 1, |
| 203 | .levelCount = 1, | 203 | .baseArrayLayer = 0, |
| 204 | .baseArrayLayer = 0, | 204 | .layerCount = 1, |
| 205 | .layerCount = 1, | 205 | }, |
| 206 | }, | 206 | }; |
| 207 | }; | 207 | VkImageMemoryBarrier read_barrier = base_barrier; |
| 208 | VkImageMemoryBarrier read_barrier = base_barrier; | 208 | read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; |
| 209 | read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; | 209 | read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| 210 | read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | 210 | read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; |
| 211 | read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; | 211 | |
| 212 | 212 | VkImageMemoryBarrier write_barrier = base_barrier; | |
| 213 | VkImageMemoryBarrier write_barrier = base_barrier; | 213 | write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| 214 | write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | 214 | write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; |
| 215 | write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; | 215 | |
| 216 | 216 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | |
| 217 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | 217 | read_barrier); |
| 218 | 0, read_barrier); | 218 | cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); |
| 219 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); | 219 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 220 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | 220 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); |
| 221 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); | 221 | }); |
| 222 | }); | ||
| 223 | } | 222 | } |
| 224 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], | 223 | scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { |
| 225 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, | ||
| 226 | size = swapchain.GetSize(), pipeline = *pipeline, | ||
| 227 | layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { | ||
| 228 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; | 224 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; |
| 229 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; | 225 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; |
| 230 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; | 226 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; |
| @@ -234,8 +230,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 234 | const VkRenderPassBeginInfo renderpass_bi{ | 230 | const VkRenderPassBeginInfo renderpass_bi{ |
| 235 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | 231 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 236 | .pNext = nullptr, | 232 | .pNext = nullptr, |
| 237 | .renderPass = renderpass, | 233 | .renderPass = *renderpass, |
| 238 | .framebuffer = framebuffer, | 234 | .framebuffer = *framebuffers[image_index], |
| 239 | .renderArea = | 235 | .renderArea = |
| 240 | { | 236 | { |
| 241 | .offset = {0, 0}, | 237 | .offset = {0, 0}, |
| @@ -257,12 +253,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 257 | .extent = size, | 253 | .extent = size, |
| 258 | }; | 254 | }; |
| 259 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 255 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 260 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | 256 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); |
| 261 | cmdbuf.SetViewport(0, viewport); | 257 | cmdbuf.SetViewport(0, viewport); |
| 262 | cmdbuf.SetScissor(0, scissor); | 258 | cmdbuf.SetScissor(0, scissor); |
| 263 | 259 | ||
| 264 | cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); | 260 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); |
| 265 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); | 261 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, |
| 262 | descriptor_sets[image_index], {}); | ||
| 266 | cmdbuf.Draw(4, 1, 0, 0); | 263 | cmdbuf.Draw(4, 1, 0, 0); |
| 267 | cmdbuf.EndRenderPass(); | 264 | cmdbuf.EndRenderPass(); |
| 268 | }); | 265 | }); |
| @@ -304,8 +301,7 @@ void VKBlitScreen::CreateShaders() { | |||
| 304 | 301 | ||
| 305 | void VKBlitScreen::CreateSemaphores() { | 302 | void VKBlitScreen::CreateSemaphores() { |
| 306 | semaphores.resize(image_count); | 303 | semaphores.resize(image_count); |
| 307 | std::generate(semaphores.begin(), semaphores.end(), | 304 | std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); |
| 308 | [this] { return device.GetLogical().CreateSemaphore(); }); | ||
| 309 | } | 305 | } |
| 310 | 306 | ||
| 311 | void VKBlitScreen::CreateDescriptorPool() { | 307 | void VKBlitScreen::CreateDescriptorPool() { |
| @@ -633,8 +629,8 @@ void VKBlitScreen::CreateFramebuffers() { | |||
| 633 | } | 629 | } |
| 634 | 630 | ||
| 635 | void VKBlitScreen::ReleaseRawImages() { | 631 | void VKBlitScreen::ReleaseRawImages() { |
| 636 | for (std::size_t i = 0; i < raw_images.size(); ++i) { | 632 | for (const u64 tick : resource_ticks) { |
| 637 | scheduler.Wait(resource_ticks.at(i)); | 633 | scheduler.Wait(tick); |
| 638 | } | 634 | } |
| 639 | raw_images.clear(); | 635 | raw_images.clear(); |
| 640 | raw_buffer_commits.clear(); | 636 | raw_buffer_commits.clear(); |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0def1e769..f4b3ee95c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -60,38 +60,74 @@ std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | |||
| 60 | } | 60 | } |
| 61 | return indices; | 61 | return indices; |
| 62 | } | 62 | } |
| 63 | } // Anonymous namespace | ||
| 64 | |||
| 65 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||
| 66 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||
| 67 | 63 | ||
| 68 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 64 | vk::Buffer CreateBuffer(const Device& device, u64 size) { |
| 69 | VAddr cpu_addr_, u64 size_bytes_) | 65 | VkBufferUsageFlags flags = |
| 70 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | 66 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 71 | buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 67 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | |
| 68 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | ||
| 69 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; | ||
| 70 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 71 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||
| 72 | } | ||
| 73 | return device.GetLogical().CreateBuffer({ | ||
| 72 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 74 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 73 | .pNext = nullptr, | 75 | .pNext = nullptr, |
| 74 | .flags = 0, | 76 | .flags = 0, |
| 75 | .size = SizeBytes(), | 77 | .size = size, |
| 76 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 78 | .usage = flags, |
| 77 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 78 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 79 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 80 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 81 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 79 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 82 | .queueFamilyIndexCount = 0, | 80 | .queueFamilyIndexCount = 0, |
| 83 | .pQueueFamilyIndices = nullptr, | 81 | .pQueueFamilyIndices = nullptr, |
| 84 | }); | 82 | }); |
| 83 | } | ||
| 84 | } // Anonymous namespace | ||
| 85 | |||
| 86 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||
| 87 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||
| 88 | |||
| 89 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||
| 90 | VAddr cpu_addr_, u64 size_bytes_) | ||
| 91 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | ||
| 92 | device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, | ||
| 93 | commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { | ||
| 85 | if (runtime.device.HasDebuggingToolAttached()) { | 94 | if (runtime.device.HasDebuggingToolAttached()) { |
| 86 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 95 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 87 | } | 96 | } |
| 88 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 97 | } |
| 98 | |||
| 99 | VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) { | ||
| 100 | if (!device) { | ||
| 101 | // Null buffer, return a null descriptor | ||
| 102 | return VK_NULL_HANDLE; | ||
| 103 | } | ||
| 104 | const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { | ||
| 105 | return offset == view.offset && size == view.size && format == view.format; | ||
| 106 | })}; | ||
| 107 | if (it != views.end()) { | ||
| 108 | return *it->handle; | ||
| 109 | } | ||
| 110 | views.push_back({ | ||
| 111 | .offset = offset, | ||
| 112 | .size = size, | ||
| 113 | .format = format, | ||
| 114 | .handle = device->GetLogical().CreateBufferView({ | ||
| 115 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 116 | .pNext = nullptr, | ||
| 117 | .flags = 0, | ||
| 118 | .buffer = *buffer, | ||
| 119 | .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Buffer, false, format).format, | ||
| 120 | .offset = offset, | ||
| 121 | .range = size, | ||
| 122 | }), | ||
| 123 | }); | ||
| 124 | return *views.back().handle; | ||
| 89 | } | 125 | } |
| 90 | 126 | ||
| 91 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | 127 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, |
| 92 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | 128 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 93 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 129 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 94 | VKDescriptorPool& descriptor_pool) | 130 | DescriptorPool& descriptor_pool) |
| 95 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | 131 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, |
| 96 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | 132 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, |
| 97 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 133 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3bb81d5b3..c27402ff0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -9,13 +9,14 @@ | |||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 12 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 15 | ||
| 15 | namespace Vulkan { | 16 | namespace Vulkan { |
| 16 | 17 | ||
| 17 | class Device; | 18 | class Device; |
| 18 | class VKDescriptorPool; | 19 | class DescriptorPool; |
| 19 | class VKScheduler; | 20 | class VKScheduler; |
| 20 | 21 | ||
| 21 | class BufferCacheRuntime; | 22 | class BufferCacheRuntime; |
| @@ -26,6 +27,8 @@ public: | |||
| 26 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 27 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 27 | VAddr cpu_addr_, u64 size_bytes_); | 28 | VAddr cpu_addr_, u64 size_bytes_); |
| 28 | 29 | ||
| 30 | [[nodiscard]] VkBufferView View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||
| 31 | |||
| 29 | [[nodiscard]] VkBuffer Handle() const noexcept { | 32 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 30 | return *buffer; | 33 | return *buffer; |
| 31 | } | 34 | } |
| @@ -35,8 +38,17 @@ public: | |||
| 35 | } | 38 | } |
| 36 | 39 | ||
| 37 | private: | 40 | private: |
| 41 | struct BufferView { | ||
| 42 | u32 offset; | ||
| 43 | u32 size; | ||
| 44 | VideoCore::Surface::PixelFormat format; | ||
| 45 | vk::BufferView handle; | ||
| 46 | }; | ||
| 47 | |||
| 48 | const Device* device{}; | ||
| 38 | vk::Buffer buffer; | 49 | vk::Buffer buffer; |
| 39 | MemoryCommit commit; | 50 | MemoryCommit commit; |
| 51 | std::vector<BufferView> views; | ||
| 40 | }; | 52 | }; |
| 41 | 53 | ||
| 42 | class BufferCacheRuntime { | 54 | class BufferCacheRuntime { |
| @@ -49,7 +61,7 @@ public: | |||
| 49 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, | 61 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, |
| 50 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | 62 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 51 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 63 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 52 | VKDescriptorPool& descriptor_pool); | 64 | DescriptorPool& descriptor_pool); |
| 53 | 65 | ||
| 54 | void Finish(); | 66 | void Finish(); |
| 55 | 67 | ||
| @@ -87,6 +99,11 @@ public: | |||
| 87 | BindBuffer(buffer, offset, size); | 99 | BindBuffer(buffer, offset, size); |
| 88 | } | 100 | } |
| 89 | 101 | ||
| 102 | void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 103 | VideoCore::Surface::PixelFormat format) { | ||
| 104 | update_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format)); | ||
| 105 | } | ||
| 106 | |||
| 90 | private: | 107 | private: |
| 91 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { | 108 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size) { |
| 92 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 109 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| @@ -124,6 +141,7 @@ struct BufferCacheParams { | |||
| 124 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | 141 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; |
| 125 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 142 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 126 | static constexpr bool USE_MEMORY_MAPS = true; | 143 | static constexpr bool USE_MEMORY_MAPS = true; |
| 144 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | ||
| 127 | }; | 145 | }; |
| 128 | 146 | ||
| 129 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 147 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 4181d83ee..8e426ce2c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -41,80 +41,92 @@ constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; | |||
| 41 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; | 41 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; |
| 42 | constexpr size_t ASTC_NUM_BINDINGS = 4; | 42 | constexpr size_t ASTC_NUM_BINDINGS = 4; |
| 43 | 43 | ||
| 44 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 44 | template <size_t size> |
| 45 | return { | 45 | inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ |
| 46 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 46 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 47 | .offset = 0, | 47 | .offset = 0, |
| 48 | .size = static_cast<u32>(size), | 48 | .size = static_cast<u32>(size), |
| 49 | }; | 49 | }; |
| 50 | } | ||
| 51 | |||
| 52 | std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { | ||
| 53 | return {{ | ||
| 54 | { | ||
| 55 | .binding = 0, | ||
| 56 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 57 | .descriptorCount = 1, | ||
| 58 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 59 | .pImmutableSamplers = nullptr, | ||
| 60 | }, | ||
| 61 | { | ||
| 62 | .binding = 1, | ||
| 63 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 64 | .descriptorCount = 1, | ||
| 65 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 66 | .pImmutableSamplers = nullptr, | ||
| 67 | }, | ||
| 68 | }}; | ||
| 69 | } | ||
| 70 | 50 | ||
| 71 | std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() { | 51 | constexpr std::array<VkDescriptorSetLayoutBinding, 2> INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS{{ |
| 72 | return {{ | 52 | { |
| 73 | { | 53 | .binding = 0, |
| 74 | .binding = ASTC_BINDING_INPUT_BUFFER, | 54 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 75 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 55 | .descriptorCount = 1, |
| 76 | .descriptorCount = 1, | 56 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 77 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 57 | .pImmutableSamplers = nullptr, |
| 78 | .pImmutableSamplers = nullptr, | 58 | }, |
| 79 | }, | 59 | { |
| 80 | { | 60 | .binding = 1, |
| 81 | .binding = ASTC_BINDING_ENC_BUFFER, | 61 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 82 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 62 | .descriptorCount = 1, |
| 83 | .descriptorCount = 1, | 63 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 84 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 64 | .pImmutableSamplers = nullptr, |
| 85 | .pImmutableSamplers = nullptr, | 65 | }, |
| 86 | }, | 66 | }}; |
| 87 | { | 67 | |
| 88 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | 68 | constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ |
| 89 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 69 | .uniform_buffers = 0, |
| 90 | .descriptorCount = 1, | 70 | .storage_buffers = 2, |
| 91 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 71 | .texture_buffers = 0, |
| 92 | .pImmutableSamplers = nullptr, | 72 | .image_buffers = 0, |
| 93 | }, | 73 | .textures = 0, |
| 94 | { | 74 | .images = 0, |
| 95 | .binding = ASTC_BINDING_OUTPUT_IMAGE, | 75 | .score = 2, |
| 96 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | 76 | }; |
| 97 | .descriptorCount = 1, | ||
| 98 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 99 | .pImmutableSamplers = nullptr, | ||
| 100 | }, | ||
| 101 | }}; | ||
| 102 | } | ||
| 103 | 77 | ||
| 104 | VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | 78 | constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{ |
| 105 | return { | 79 | { |
| 106 | .dstBinding = 0, | 80 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| 107 | .dstArrayElement = 0, | ||
| 108 | .descriptorCount = 2, | ||
| 109 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 81 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 110 | .offset = 0, | 82 | .descriptorCount = 1, |
| 111 | .stride = sizeof(DescriptorUpdateEntry), | 83 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 112 | }; | 84 | .pImmutableSamplers = nullptr, |
| 113 | } | 85 | }, |
| 86 | { | ||
| 87 | .binding = ASTC_BINDING_ENC_BUFFER, | ||
| 88 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 89 | .descriptorCount = 1, | ||
| 90 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 91 | .pImmutableSamplers = nullptr, | ||
| 92 | }, | ||
| 93 | { | ||
| 94 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 95 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 96 | .descriptorCount = 1, | ||
| 97 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 98 | .pImmutableSamplers = nullptr, | ||
| 99 | }, | ||
| 100 | { | ||
| 101 | .binding = ASTC_BINDING_OUTPUT_IMAGE, | ||
| 102 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | ||
| 103 | .descriptorCount = 1, | ||
| 104 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 105 | .pImmutableSamplers = nullptr, | ||
| 106 | }, | ||
| 107 | }}; | ||
| 108 | |||
| 109 | constexpr DescriptorBankInfo ASTC_BANK_INFO{ | ||
| 110 | .uniform_buffers = 0, | ||
| 111 | .storage_buffers = 3, | ||
| 112 | .texture_buffers = 0, | ||
| 113 | .image_buffers = 0, | ||
| 114 | .textures = 0, | ||
| 115 | .images = 1, | ||
| 116 | .score = 4, | ||
| 117 | }; | ||
| 114 | 118 | ||
| 115 | std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | 119 | constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ |
| 116 | BuildASTCPassDescriptorUpdateTemplateEntry() { | 120 | .dstBinding = 0, |
| 117 | return {{ | 121 | .dstArrayElement = 0, |
| 122 | .descriptorCount = 2, | ||
| 123 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 124 | .offset = 0, | ||
| 125 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 126 | }; | ||
| 127 | |||
| 128 | constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | ||
| 129 | ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY{{ | ||
| 118 | { | 130 | { |
| 119 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, | 131 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, |
| 120 | .dstArrayElement = 0, | 132 | .dstArrayElement = 0, |
| @@ -148,7 +160,6 @@ BuildASTCPassDescriptorUpdateTemplateEntry() { | |||
| 148 | .stride = sizeof(DescriptorUpdateEntry), | 160 | .stride = sizeof(DescriptorUpdateEntry), |
| 149 | }, | 161 | }, |
| 150 | }}; | 162 | }}; |
| 151 | } | ||
| 152 | 163 | ||
| 153 | struct AstcPushConstants { | 164 | struct AstcPushConstants { |
| 154 | std::array<u32, 2> blocks_dims; | 165 | std::array<u32, 2> blocks_dims; |
| @@ -159,14 +170,14 @@ struct AstcPushConstants { | |||
| 159 | u32 block_height; | 170 | u32 block_height; |
| 160 | u32 block_height_mask; | 171 | u32 block_height_mask; |
| 161 | }; | 172 | }; |
| 162 | |||
| 163 | } // Anonymous namespace | 173 | } // Anonymous namespace |
| 164 | 174 | ||
| 165 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 175 | ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, |
| 166 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 176 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 167 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 177 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 168 | vk::Span<VkPushConstantRange> push_constants, | 178 | const DescriptorBankInfo& bank_info, |
| 169 | std::span<const u32> code) { | 179 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code) |
| 180 | : device{device_} { | ||
| 170 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ | 181 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ |
| 171 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 182 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, |
| 172 | .pNext = nullptr, | 183 | .pNext = nullptr, |
| @@ -196,8 +207,7 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ | |||
| 196 | .pipelineLayout = *layout, | 207 | .pipelineLayout = *layout, |
| 197 | .set = 0, | 208 | .set = 0, |
| 198 | }); | 209 | }); |
| 199 | 210 | descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, bank_info); | |
| 200 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); | ||
| 201 | } | 211 | } |
| 202 | module = device.GetLogical().CreateShaderModule({ | 212 | module = device.GetLogical().CreateShaderModule({ |
| 203 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 213 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, |
| @@ -206,43 +216,34 @@ VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_ | |||
| 206 | .codeSize = static_cast<u32>(code.size_bytes()), | 216 | .codeSize = static_cast<u32>(code.size_bytes()), |
| 207 | .pCode = code.data(), | 217 | .pCode = code.data(), |
| 208 | }); | 218 | }); |
| 219 | device.SaveShader(code); | ||
| 209 | pipeline = device.GetLogical().CreateComputePipeline({ | 220 | pipeline = device.GetLogical().CreateComputePipeline({ |
| 210 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 221 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 211 | .pNext = nullptr, | 222 | .pNext = nullptr, |
| 212 | .flags = 0, | 223 | .flags = 0, |
| 213 | .stage = | 224 | .stage{ |
| 214 | { | 225 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 215 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 226 | .pNext = nullptr, |
| 216 | .pNext = nullptr, | 227 | .flags = 0, |
| 217 | .flags = 0, | 228 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 218 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 229 | .module = *module, |
| 219 | .module = *module, | 230 | .pName = "main", |
| 220 | .pName = "main", | 231 | .pSpecializationInfo = nullptr, |
| 221 | .pSpecializationInfo = nullptr, | 232 | }, |
| 222 | }, | ||
| 223 | .layout = *layout, | 233 | .layout = *layout, |
| 224 | .basePipelineHandle = nullptr, | 234 | .basePipelineHandle = nullptr, |
| 225 | .basePipelineIndex = 0, | 235 | .basePipelineIndex = 0, |
| 226 | }); | 236 | }); |
| 227 | } | 237 | } |
| 228 | 238 | ||
| 229 | VKComputePass::~VKComputePass() = default; | 239 | ComputePass::~ComputePass() = default; |
| 230 | 240 | ||
| 231 | VkDescriptorSet VKComputePass::CommitDescriptorSet( | 241 | Uint8Pass::Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| 232 | VKUpdateDescriptorQueue& update_descriptor_queue) { | 242 | DescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, |
| 233 | if (!descriptor_template) { | ||
| 234 | return nullptr; | ||
| 235 | } | ||
| 236 | const VkDescriptorSet set = descriptor_allocator->Commit(); | ||
| 237 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 238 | return set; | ||
| 239 | } | ||
| 240 | |||
| 241 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | ||
| 242 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, | ||
| 243 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 243 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 244 | : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), | 244 | : ComputePass(device_, descriptor_pool, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |
| 245 | BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV), | 245 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, {}, |
| 246 | VULKAN_UINT8_COMP_SPV), | ||
| 246 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 247 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 247 | update_descriptor_queue{update_descriptor_queue_} {} | 248 | update_descriptor_queue{update_descriptor_queue_} {} |
| 248 | 249 | ||
| @@ -256,11 +257,11 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer | |||
| 256 | update_descriptor_queue.Acquire(); | 257 | update_descriptor_queue.Acquire(); |
| 257 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 258 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 258 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); | 259 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 259 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 260 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 261 | const VkBuffer buffer{staging.buffer}; | ||
| 260 | 262 | ||
| 261 | scheduler.RequestOutsideRenderPassOperationContext(); | 263 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 262 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 264 | scheduler.Record([this, buffer, descriptor_data, num_vertices](vk::CommandBuffer cmdbuf) { |
| 263 | num_vertices](vk::CommandBuffer cmdbuf) { | ||
| 264 | static constexpr u32 DISPATCH_SIZE = 1024; | 265 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 265 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | 266 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 266 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 267 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| @@ -268,8 +269,10 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer | |||
| 268 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | 269 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| 269 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | 270 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, |
| 270 | }; | 271 | }; |
| 271 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 272 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 272 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 273 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 274 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 275 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 273 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); | 276 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); |
| 274 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 277 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 275 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); | 278 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| @@ -278,12 +281,12 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer | |||
| 278 | } | 281 | } |
| 279 | 282 | ||
| 280 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 283 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| 281 | VKDescriptorPool& descriptor_pool_, | 284 | DescriptorPool& descriptor_pool_, |
| 282 | StagingBufferPool& staging_buffer_pool_, | 285 | StagingBufferPool& staging_buffer_pool_, |
| 283 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 286 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| 284 | : VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(), | 287 | : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, |
| 285 | BuildInputOutputDescriptorUpdateTemplate(), | 288 | INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, |
| 286 | BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV), | 289 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), |
| 287 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | 290 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 288 | update_descriptor_queue{update_descriptor_queue_} {} | 291 | update_descriptor_queue{update_descriptor_queue_} {} |
| 289 | 292 | ||
| @@ -313,11 +316,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 313 | update_descriptor_queue.Acquire(); | 316 | update_descriptor_queue.Acquire(); |
| 314 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 317 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 315 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); | 318 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 316 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 319 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 317 | 320 | ||
| 318 | scheduler.RequestOutsideRenderPassOperationContext(); | 321 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 319 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, | 322 | scheduler.Record([this, buffer = staging.buffer, descriptor_data, num_tri_vertices, base_vertex, |
| 320 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 323 | index_shift](vk::CommandBuffer cmdbuf) { |
| 321 | static constexpr u32 DISPATCH_SIZE = 1024; | 324 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 322 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | 325 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 323 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 326 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| @@ -325,10 +328,12 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 325 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | 328 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| 326 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | 329 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, |
| 327 | }; | 330 | }; |
| 328 | const std::array push_constants = {base_vertex, index_shift}; | 331 | const std::array push_constants{base_vertex, index_shift}; |
| 329 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 332 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 330 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 333 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 331 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | 334 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); |
| 335 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 336 | cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | ||
| 332 | &push_constants); | 337 | &push_constants); |
| 333 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); | 338 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); |
| 334 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 339 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| @@ -338,15 +343,14 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 338 | } | 343 | } |
| 339 | 344 | ||
| 340 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | 345 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, |
| 341 | VKDescriptorPool& descriptor_pool_, | 346 | DescriptorPool& descriptor_pool_, |
| 342 | StagingBufferPool& staging_buffer_pool_, | 347 | StagingBufferPool& staging_buffer_pool_, |
| 343 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 348 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 344 | MemoryAllocator& memory_allocator_) | 349 | MemoryAllocator& memory_allocator_) |
| 345 | : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(), | 350 | : ComputePass(device_, descriptor_pool_, ASTC_DESCRIPTOR_SET_BINDINGS, |
| 346 | BuildASTCPassDescriptorUpdateTemplateEntry(), | 351 | ASTC_PASS_DESCRIPTOR_UPDATE_TEMPLATE_ENTRY, ASTC_BANK_INFO, |
| 347 | BuildComputePushConstantRange(sizeof(AstcPushConstants)), | 352 | COMPUTE_PUSH_CONSTANT_RANGE<sizeof(AstcPushConstants)>, ASTC_DECODER_COMP_SPV), |
| 348 | ASTC_DECODER_COMP_SPV), | 353 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, |
| 349 | device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 350 | update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} | 354 | update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {} |
| 351 | 355 | ||
| 352 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 356 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| @@ -444,16 +448,14 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 444 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), | 448 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), |
| 445 | sizeof(SWIZZLE_TABLE)); | 449 | sizeof(SWIZZLE_TABLE)); |
| 446 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 450 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 447 | 451 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | |
| 448 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||
| 449 | const VkPipelineLayout vk_layout = *layout; | ||
| 450 | 452 | ||
| 451 | // To unswizzle the ASTC data | 453 | // To unswizzle the ASTC data |
| 452 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 454 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 453 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | 455 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 454 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | 456 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 455 | scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, | 457 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, |
| 456 | block_dims, params, set](vk::CommandBuffer cmdbuf) { | 458 | params, descriptor_data](vk::CommandBuffer cmdbuf) { |
| 457 | const AstcPushConstants uniforms{ | 459 | const AstcPushConstants uniforms{ |
| 458 | .blocks_dims = block_dims, | 460 | .blocks_dims = block_dims, |
| 459 | .bytes_per_block_log2 = params.bytes_per_block_log2, | 461 | .bytes_per_block_log2 = params.bytes_per_block_log2, |
| @@ -463,8 +465,10 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 463 | .block_height = params.block_height, | 465 | .block_height = params.block_height, |
| 464 | .block_height_mask = params.block_height_mask, | 466 | .block_height_mask = params.block_height_mask, |
| 465 | }; | 467 | }; |
| 466 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); | 468 | const VkDescriptorSet set = descriptor_allocator.Commit(); |
| 467 | cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | 469 | device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); |
| 470 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); | ||
| 471 | cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||
| 468 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); | 472 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| 469 | }); | 473 | }); |
| 470 | } | 474 | } |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 5ea187c30..114aef2bd 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 8 | #include <span> | 7 | #include <span> |
| 9 | #include <utility> | 8 | #include <utility> |
| 10 | 9 | ||
| @@ -27,31 +26,31 @@ class VKUpdateDescriptorQueue; | |||
| 27 | class Image; | 26 | class Image; |
| 28 | struct StagingBufferRef; | 27 | struct StagingBufferRef; |
| 29 | 28 | ||
| 30 | class VKComputePass { | 29 | class ComputePass { |
| 31 | public: | 30 | public: |
| 32 | explicit VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 31 | explicit ComputePass(const Device& device, DescriptorPool& descriptor_pool, |
| 33 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 32 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 34 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, | 33 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 35 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); | 34 | const DescriptorBankInfo& bank_info, |
| 36 | ~VKComputePass(); | 35 | vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code); |
| 36 | ~ComputePass(); | ||
| 37 | 37 | ||
| 38 | protected: | 38 | protected: |
| 39 | VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); | 39 | const Device& device; |
| 40 | |||
| 41 | vk::DescriptorUpdateTemplateKHR descriptor_template; | 40 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 42 | vk::PipelineLayout layout; | 41 | vk::PipelineLayout layout; |
| 43 | vk::Pipeline pipeline; | 42 | vk::Pipeline pipeline; |
| 43 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 44 | DescriptorAllocator descriptor_allocator; | ||
| 44 | 45 | ||
| 45 | private: | 46 | private: |
| 46 | vk::DescriptorSetLayout descriptor_set_layout; | ||
| 47 | std::optional<DescriptorAllocator> descriptor_allocator; | ||
| 48 | vk::ShaderModule module; | 47 | vk::ShaderModule module; |
| 49 | }; | 48 | }; |
| 50 | 49 | ||
| 51 | class Uint8Pass final : public VKComputePass { | 50 | class Uint8Pass final : public ComputePass { |
| 52 | public: | 51 | public: |
| 53 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, | 52 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| 54 | VKDescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, | 53 | DescriptorPool& descriptor_pool_, StagingBufferPool& staging_buffer_pool_, |
| 55 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 54 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 56 | ~Uint8Pass(); | 55 | ~Uint8Pass(); |
| 57 | 56 | ||
| @@ -66,10 +65,10 @@ private: | |||
| 66 | VKUpdateDescriptorQueue& update_descriptor_queue; | 65 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 67 | }; | 66 | }; |
| 68 | 67 | ||
| 69 | class QuadIndexedPass final : public VKComputePass { | 68 | class QuadIndexedPass final : public ComputePass { |
| 70 | public: | 69 | public: |
| 71 | explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 70 | explicit QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| 72 | VKDescriptorPool& descriptor_pool_, | 71 | DescriptorPool& descriptor_pool_, |
| 73 | StagingBufferPool& staging_buffer_pool_, | 72 | StagingBufferPool& staging_buffer_pool_, |
| 74 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 73 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 75 | ~QuadIndexedPass(); | 74 | ~QuadIndexedPass(); |
| @@ -84,10 +83,10 @@ private: | |||
| 84 | VKUpdateDescriptorQueue& update_descriptor_queue; | 83 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 85 | }; | 84 | }; |
| 86 | 85 | ||
| 87 | class ASTCDecoderPass final : public VKComputePass { | 86 | class ASTCDecoderPass final : public ComputePass { |
| 88 | public: | 87 | public: |
| 89 | explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | 88 | explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, |
| 90 | VKDescriptorPool& descriptor_pool_, | 89 | DescriptorPool& descriptor_pool_, |
| 91 | StagingBufferPool& staging_buffer_pool_, | 90 | StagingBufferPool& staging_buffer_pool_, |
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 91 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 93 | MemoryAllocator& memory_allocator_); | 92 | MemoryAllocator& memory_allocator_); |
| @@ -99,7 +98,6 @@ public: | |||
| 99 | private: | 98 | private: |
| 100 | void MakeDataBuffer(); | 99 | void MakeDataBuffer(); |
| 101 | 100 | ||
| 102 | const Device& device; | ||
| 103 | VKScheduler& scheduler; | 101 | VKScheduler& scheduler; |
| 104 | StagingBufferPool& staging_buffer_pool; | 102 | StagingBufferPool& staging_buffer_pool; |
| 105 | VKUpdateDescriptorQueue& update_descriptor_queue; | 103 | VKUpdateDescriptorQueue& update_descriptor_queue; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 3a48219b7..70b84c7a6 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -2,152 +2,198 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <vector> | 6 | #include <vector> |
| 6 | 7 | ||
| 8 | #include <boost/container/small_vector.hpp> | ||
| 9 | |||
| 10 | #include "video_core/renderer_vulkan/pipeline_helper.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 7 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 12 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 13 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 14 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 10 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 11 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 17 | #include "video_core/shader_notify.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_device.h" | 18 | #include "video_core/vulkan_common/vulkan_device.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 20 | ||
| 16 | namespace Vulkan { | 21 | namespace Vulkan { |
| 17 | 22 | ||
| 18 | VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 23 | using Shader::ImageBufferDescriptor; |
| 19 | VKDescriptorPool& descriptor_pool_, | 24 | using Tegra::Texture::TexturePair; |
| 20 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 25 | |
| 21 | const SPIRVShader& shader_) | 26 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, |
| 22 | : device{device_}, scheduler{scheduler_}, entries{shader_.entries}, | 27 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 23 | descriptor_set_layout{CreateDescriptorSetLayout()}, | 28 | Common::ThreadWorker* thread_worker, |
| 24 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | 29 | VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, |
| 25 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | 30 | vk::ShaderModule spv_module_) |
| 26 | descriptor_template{CreateDescriptorUpdateTemplate()}, | 31 | : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, |
| 27 | shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {} | 32 | spv_module(std::move(spv_module_)) { |
| 28 | 33 | if (shader_notify) { | |
| 29 | VKComputePipeline::~VKComputePipeline() = default; | 34 | shader_notify->MarkShaderBuilding(); |
| 30 | |||
| 31 | VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { | ||
| 32 | if (!descriptor_template) { | ||
| 33 | return {}; | ||
| 34 | } | ||
| 35 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 36 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 37 | return set; | ||
| 38 | } | ||
| 39 | |||
| 40 | vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | ||
| 41 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 42 | u32 binding = 0; | ||
| 43 | const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { | ||
| 44 | // TODO(Rodrigo): Maybe make individual bindings here? | ||
| 45 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { | ||
| 46 | bindings.push_back({ | ||
| 47 | .binding = binding++, | ||
| 48 | .descriptorType = descriptor_type, | ||
| 49 | .descriptorCount = 1, | ||
| 50 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 51 | .pImmutableSamplers = nullptr, | ||
| 52 | }); | ||
| 53 | } | ||
| 54 | }; | ||
| 55 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); | ||
| 56 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); | ||
| 57 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); | ||
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); | ||
| 59 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); | ||
| 60 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); | ||
| 61 | |||
| 62 | return device.GetLogical().CreateDescriptorSetLayout({ | ||
| 63 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 64 | .pNext = nullptr, | ||
| 65 | .flags = 0, | ||
| 66 | .bindingCount = static_cast<u32>(bindings.size()), | ||
| 67 | .pBindings = bindings.data(), | ||
| 68 | }); | ||
| 69 | } | ||
| 70 | |||
| 71 | vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { | ||
| 72 | return device.GetLogical().CreatePipelineLayout({ | ||
| 73 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 74 | .pNext = nullptr, | ||
| 75 | .flags = 0, | ||
| 76 | .setLayoutCount = 1, | ||
| 77 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 78 | .pushConstantRangeCount = 0, | ||
| 79 | .pPushConstantRanges = nullptr, | ||
| 80 | }); | ||
| 81 | } | ||
| 82 | |||
| 83 | vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { | ||
| 84 | std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; | ||
| 85 | u32 binding = 0; | ||
| 86 | u32 offset = 0; | ||
| 87 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); | ||
| 88 | if (template_entries.empty()) { | ||
| 89 | // If the shader doesn't use descriptor sets, skip template creation. | ||
| 90 | return {}; | ||
| 91 | } | 35 | } |
| 92 | 36 | std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), | |
| 93 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR({ | 37 | uniform_buffer_sizes.begin()); |
| 94 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | 38 | |
| 95 | .pNext = nullptr, | 39 | auto func{[this, &descriptor_pool, shader_notify] { |
| 96 | .flags = 0, | 40 | DescriptorLayoutBuilder builder{device}; |
| 97 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | 41 | builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); |
| 98 | .pDescriptorUpdateEntries = template_entries.data(), | 42 | |
| 99 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | 43 | descriptor_set_layout = builder.CreateDescriptorSetLayout(false); |
| 100 | .descriptorSetLayout = *descriptor_set_layout, | 44 | pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); |
| 101 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | 45 | descriptor_update_template = |
| 102 | .pipelineLayout = *layout, | 46 | builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); |
| 103 | .set = DESCRIPTOR_SET, | 47 | descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); |
| 104 | }); | 48 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ |
| 105 | } | 49 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |
| 106 | 50 | .pNext = nullptr, | |
| 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | 51 | .requiredSubgroupSize = GuestWarpSize, |
| 108 | device.SaveShader(code); | 52 | }; |
| 109 | 53 | pipeline = device.GetLogical().CreateComputePipeline({ | |
| 110 | return device.GetLogical().CreateShaderModule({ | 54 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 111 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 55 | .pNext = nullptr, |
| 112 | .pNext = nullptr, | 56 | .flags = 0, |
| 113 | .flags = 0, | 57 | .stage{ |
| 114 | .codeSize = code.size() * sizeof(u32), | ||
| 115 | .pCode = code.data(), | ||
| 116 | }); | ||
| 117 | } | ||
| 118 | |||
| 119 | vk::Pipeline VKComputePipeline::CreatePipeline() const { | ||
| 120 | |||
| 121 | VkComputePipelineCreateInfo ci{ | ||
| 122 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 123 | .pNext = nullptr, | ||
| 124 | .flags = 0, | ||
| 125 | .stage = | ||
| 126 | { | ||
| 127 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 58 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 128 | .pNext = nullptr, | 59 | .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, |
| 129 | .flags = 0, | 60 | .flags = 0, |
| 130 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 61 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 131 | .module = *shader_module, | 62 | .module = *spv_module, |
| 132 | .pName = "main", | 63 | .pName = "main", |
| 133 | .pSpecializationInfo = nullptr, | 64 | .pSpecializationInfo = nullptr, |
| 134 | }, | 65 | }, |
| 135 | .layout = *layout, | 66 | .layout = *pipeline_layout, |
| 136 | .basePipelineHandle = nullptr, | 67 | .basePipelineHandle = 0, |
| 137 | .basePipelineIndex = 0, | 68 | .basePipelineIndex = 0, |
| 138 | }; | 69 | }); |
| 139 | 70 | std::lock_guard lock{build_mutex}; | |
| 140 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | 71 | is_built = true; |
| 141 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | 72 | build_condvar.notify_one(); |
| 142 | .pNext = nullptr, | 73 | if (shader_notify) { |
| 143 | .requiredSubgroupSize = GuestWarpSize, | 74 | shader_notify->MarkShaderComplete(); |
| 144 | }; | 75 | } |
| 145 | 76 | }}; | |
| 146 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { | 77 | if (thread_worker) { |
| 147 | ci.stage.pNext = &subgroup_size_ci; | 78 | thread_worker->QueueWork(std::move(func)); |
| 79 | } else { | ||
| 80 | func(); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | ||
| 85 | Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, | ||
| 86 | BufferCache& buffer_cache, TextureCache& texture_cache) { | ||
| 87 | update_descriptor_queue.Acquire(); | ||
| 88 | |||
| 89 | buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes); | ||
| 90 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 91 | size_t ssbo_index{}; | ||
| 92 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 93 | ASSERT(desc.count == 1); | ||
| 94 | buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, | ||
| 95 | desc.is_written); | ||
| 96 | ++ssbo_index; | ||
| 148 | } | 97 | } |
| 149 | 98 | ||
| 150 | return device.GetLogical().CreateComputePipeline(ci); | 99 | texture_cache.SynchronizeComputeDescriptors(); |
| 100 | |||
| 101 | static constexpr size_t max_elements = 64; | ||
| 102 | std::array<ImageId, max_elements> image_view_ids; | ||
| 103 | boost::container::static_vector<u32, max_elements> image_view_indices; | ||
| 104 | boost::container::static_vector<VkSampler, max_elements> samplers; | ||
| 105 | |||
| 106 | const auto& qmd{kepler_compute.launch_description}; | ||
| 107 | const auto& cbufs{qmd.const_buffer_config}; | ||
| 108 | const bool via_header_index{qmd.linked_tsc != 0}; | ||
| 109 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 110 | ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); | ||
| 111 | const u32 index_offset{index << desc.size_shift}; | ||
| 112 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 113 | const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; | ||
| 114 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 115 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 116 | if (desc.has_secondary) { | ||
| 117 | ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); | ||
| 118 | const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 119 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + | ||
| 120 | secondary_offset}; | ||
| 121 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 122 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 123 | return TexturePair(lhs_raw | rhs_raw, via_header_index); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 127 | }}; | ||
| 128 | const auto add_image{[&](const auto& desc) { | ||
| 129 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 130 | const auto handle{read_handle(desc, index)}; | ||
| 131 | image_view_indices.push_back(handle.first); | ||
| 132 | } | ||
| 133 | }}; | ||
| 134 | std::ranges::for_each(info.texture_buffer_descriptors, add_image); | ||
| 135 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | ||
| 136 | for (const auto& desc : info.texture_descriptors) { | ||
| 137 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 138 | const auto handle{read_handle(desc, index)}; | ||
| 139 | image_view_indices.push_back(handle.first); | ||
| 140 | |||
| 141 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | ||
| 142 | samplers.push_back(sampler->Handle()); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | std::ranges::for_each(info.image_descriptors, add_image); | ||
| 146 | |||
| 147 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 148 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 149 | |||
| 150 | buffer_cache.UnbindComputeTextureBuffers(); | ||
| 151 | ImageId* texture_buffer_ids{image_view_ids.data()}; | ||
| 152 | size_t index{}; | ||
| 153 | const auto add_buffer{[&](const auto& desc) { | ||
| 154 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 155 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 156 | bool is_written{false}; | ||
| 157 | if constexpr (is_image) { | ||
| 158 | is_written = desc.is_written; | ||
| 159 | } | ||
| 160 | ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); | ||
| 161 | buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), | ||
| 162 | image_view.BufferSize(), image_view.format, | ||
| 163 | is_written, is_image); | ||
| 164 | ++texture_buffer_ids; | ||
| 165 | ++index; | ||
| 166 | } | ||
| 167 | }}; | ||
| 168 | std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); | ||
| 169 | std::ranges::for_each(info.image_buffer_descriptors, add_buffer); | ||
| 170 | |||
| 171 | buffer_cache.UpdateComputeBuffers(); | ||
| 172 | buffer_cache.BindHostComputeBuffers(); | ||
| 173 | |||
| 174 | const VkSampler* samplers_it{samplers.data()}; | ||
| 175 | const ImageId* views_it{image_view_ids.data()}; | ||
| 176 | PushImageDescriptors(info, samplers_it, views_it, texture_cache, update_descriptor_queue); | ||
| 177 | |||
| 178 | if (!is_built.load(std::memory_order::relaxed)) { | ||
| 179 | // Wait for the pipeline to be built | ||
| 180 | scheduler.Record([this](vk::CommandBuffer) { | ||
| 181 | std::unique_lock lock{build_mutex}; | ||
| 182 | build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); | ||
| 183 | }); | ||
| 184 | } | ||
| 185 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||
| 186 | scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||
| 187 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 188 | if (!descriptor_set_layout) { | ||
| 189 | return; | ||
| 190 | } | ||
| 191 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||
| 192 | const vk::Device& dev{device.GetLogical()}; | ||
| 193 | dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | ||
| 194 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, | ||
| 195 | descriptor_set, nullptr); | ||
| 196 | }); | ||
| 151 | } | 197 | } |
| 152 | 198 | ||
| 153 | } // namespace Vulkan | 199 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 7e16575ac..52fec04d3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -4,61 +4,63 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <atomic> | ||
| 8 | #include <condition_variable> | ||
| 9 | #include <mutex> | ||
| 10 | |||
| 7 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/thread_worker.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 16 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 9 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 17 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 20 | ||
| 21 | namespace VideoCore { | ||
| 22 | class ShaderNotify; | ||
| 23 | } | ||
| 24 | |||
| 12 | namespace Vulkan { | 25 | namespace Vulkan { |
| 13 | 26 | ||
| 14 | class Device; | 27 | class Device; |
| 15 | class VKScheduler; | 28 | class VKScheduler; |
| 16 | class VKUpdateDescriptorQueue; | ||
| 17 | 29 | ||
| 18 | class VKComputePipeline final { | 30 | class ComputePipeline { |
| 19 | public: | 31 | public: |
| 20 | explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_, | 32 | explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, |
| 21 | VKDescriptorPool& descriptor_pool_, | 33 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 34 | Common::ThreadWorker* thread_worker, |
| 23 | const SPIRVShader& shader_); | 35 | VideoCore::ShaderNotify* shader_notify, const Shader::Info& info, |
| 24 | ~VKComputePipeline(); | 36 | vk::ShaderModule spv_module); |
| 25 | |||
| 26 | VkDescriptorSet CommitDescriptorSet(); | ||
| 27 | 37 | ||
| 28 | VkPipeline GetHandle() const { | 38 | ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; |
| 29 | return *pipeline; | 39 | ComputePipeline(ComputePipeline&&) noexcept = delete; |
| 30 | } | ||
| 31 | 40 | ||
| 32 | VkPipelineLayout GetLayout() const { | 41 | ComputePipeline& operator=(const ComputePipeline&) = delete; |
| 33 | return *layout; | 42 | ComputePipeline(const ComputePipeline&) = delete; |
| 34 | } | ||
| 35 | 43 | ||
| 36 | const ShaderEntries& GetEntries() const { | 44 | void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, |
| 37 | return entries; | 45 | VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); |
| 38 | } | ||
| 39 | 46 | ||
| 40 | private: | 47 | private: |
| 41 | vk::DescriptorSetLayout CreateDescriptorSetLayout() const; | ||
| 42 | |||
| 43 | vk::PipelineLayout CreatePipelineLayout() const; | ||
| 44 | |||
| 45 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; | ||
| 46 | |||
| 47 | vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; | ||
| 48 | |||
| 49 | vk::Pipeline CreatePipeline() const; | ||
| 50 | |||
| 51 | const Device& device; | 48 | const Device& device; |
| 52 | VKScheduler& scheduler; | 49 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 53 | ShaderEntries entries; | 50 | Shader::Info info; |
| 54 | 51 | ||
| 52 | VideoCommon::ComputeUniformBufferSizes uniform_buffer_sizes{}; | ||
| 53 | |||
| 54 | vk::ShaderModule spv_module; | ||
| 55 | vk::DescriptorSetLayout descriptor_set_layout; | 55 | vk::DescriptorSetLayout descriptor_set_layout; |
| 56 | DescriptorAllocator descriptor_allocator; | 56 | DescriptorAllocator descriptor_allocator; |
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | 57 | vk::PipelineLayout pipeline_layout; |
| 58 | vk::PipelineLayout layout; | 58 | vk::DescriptorUpdateTemplateKHR descriptor_update_template; |
| 59 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 60 | vk::ShaderModule shader_module; | ||
| 61 | vk::Pipeline pipeline; | 59 | vk::Pipeline pipeline; |
| 60 | |||
| 61 | std::condition_variable build_condvar; | ||
| 62 | std::mutex build_mutex; | ||
| 63 | std::atomic_bool is_built{false}; | ||
| 62 | }; | 64 | }; |
| 63 | 65 | ||
| 64 | } // namespace Vulkan | 66 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ef9fb5910..8e77e4796 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | ||
| 6 | #include <span> | ||
| 5 | #include <vector> | 7 | #include <vector> |
| 6 | 8 | ||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| @@ -13,79 +15,149 @@ | |||
| 13 | 15 | ||
| 14 | namespace Vulkan { | 16 | namespace Vulkan { |
| 15 | 17 | ||
| 16 | // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. | 18 | // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines |
| 17 | constexpr std::size_t SETS_GROW_RATE = 0x20; | 19 | constexpr size_t SETS_GROW_RATE = 16; |
| 20 | constexpr s32 SCORE_THRESHOLD = 3; | ||
| 21 | constexpr u32 SETS_PER_POOL = 64; | ||
| 18 | 22 | ||
| 19 | DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, | 23 | struct DescriptorBank { |
| 20 | VkDescriptorSetLayout layout_) | 24 | DescriptorBankInfo info; |
| 21 | : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), | 25 | std::vector<vk::DescriptorPool> pools; |
| 22 | descriptor_pool{descriptor_pool_}, layout{layout_} {} | 26 | }; |
| 23 | 27 | ||
| 24 | DescriptorAllocator::~DescriptorAllocator() = default; | 28 | bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { |
| 29 | return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && | ||
| 30 | texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && | ||
| 31 | textures >= subset.textures && images >= subset.image_buffers; | ||
| 32 | } | ||
| 25 | 33 | ||
| 26 | VkDescriptorSet DescriptorAllocator::Commit() { | 34 | template <typename Descriptors> |
| 27 | const std::size_t index = CommitResource(); | 35 | static u32 Accumulate(const Descriptors& descriptors) { |
| 28 | return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; | 36 | u32 count = 0; |
| 37 | for (const auto& descriptor : descriptors) { | ||
| 38 | count += descriptor.count; | ||
| 39 | } | ||
| 40 | return count; | ||
| 29 | } | 41 | } |
| 30 | 42 | ||
| 31 | void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { | 43 | static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) { |
| 32 | descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); | 44 | DescriptorBankInfo bank; |
| 45 | for (const Shader::Info& info : infos) { | ||
| 46 | bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); | ||
| 47 | bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); | ||
| 48 | bank.texture_buffers += Accumulate(info.texture_buffer_descriptors); | ||
| 49 | bank.image_buffers += Accumulate(info.image_buffer_descriptors); | ||
| 50 | bank.textures += Accumulate(info.texture_descriptors); | ||
| 51 | bank.images += Accumulate(info.image_descriptors); | ||
| 52 | } | ||
| 53 | bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + | ||
| 54 | bank.image_buffers + bank.textures + bank.images; | ||
| 55 | return bank; | ||
| 33 | } | 56 | } |
| 34 | 57 | ||
| 35 | VKDescriptorPool::VKDescriptorPool(const Device& device_, VKScheduler& scheduler) | 58 | static void AllocatePool(const Device& device, DescriptorBank& bank) { |
| 36 | : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ | 59 | std::array<VkDescriptorPoolSize, 6> pool_sizes; |
| 37 | AllocateNewPool()} {} | 60 | size_t pool_cursor{}; |
| 38 | 61 | const auto add = [&](VkDescriptorType type, u32 count) { | |
| 39 | VKDescriptorPool::~VKDescriptorPool() = default; | 62 | if (count > 0) { |
| 40 | 63 | pool_sizes[pool_cursor++] = { | |
| 41 | vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { | 64 | .type = type, |
| 42 | static constexpr u32 num_sets = 0x20000; | 65 | .descriptorCount = count * SETS_PER_POOL, |
| 43 | static constexpr VkDescriptorPoolSize pool_sizes[] = { | 66 | }; |
| 44 | {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, | 67 | } |
| 45 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, | ||
| 46 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, | ||
| 47 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, | ||
| 48 | {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, | ||
| 49 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}, | ||
| 50 | }; | 68 | }; |
| 51 | 69 | const auto& info{bank.info}; | |
| 52 | const VkDescriptorPoolCreateInfo ci{ | 70 | add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, info.uniform_buffers); |
| 71 | add(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, info.storage_buffers); | ||
| 72 | add(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, info.texture_buffers); | ||
| 73 | add(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, info.image_buffers); | ||
| 74 | add(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, info.textures); | ||
| 75 | add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, info.images); | ||
| 76 | bank.pools.push_back(device.GetLogical().CreateDescriptorPool({ | ||
| 53 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | 77 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, |
| 54 | .pNext = nullptr, | 78 | .pNext = nullptr, |
| 55 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, | 79 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, |
| 56 | .maxSets = num_sets, | 80 | .maxSets = SETS_PER_POOL, |
| 57 | .poolSizeCount = static_cast<u32>(std::size(pool_sizes)), | 81 | .poolSizeCount = static_cast<u32>(pool_cursor), |
| 58 | .pPoolSizes = std::data(pool_sizes), | 82 | .pPoolSizes = std::data(pool_sizes), |
| 59 | }; | 83 | })); |
| 60 | return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); | 84 | } |
| 85 | |||
| 86 | DescriptorAllocator::DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, | ||
| 87 | DescriptorBank& bank_, VkDescriptorSetLayout layout_) | ||
| 88 | : ResourcePool(master_semaphore_, SETS_GROW_RATE), device{&device_}, bank{&bank_}, | ||
| 89 | layout{layout_} {} | ||
| 90 | |||
| 91 | VkDescriptorSet DescriptorAllocator::Commit() { | ||
| 92 | const size_t index = CommitResource(); | ||
| 93 | return sets[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; | ||
| 61 | } | 94 | } |
| 62 | 95 | ||
| 63 | vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, | 96 | void DescriptorAllocator::Allocate(size_t begin, size_t end) { |
| 64 | std::size_t count) { | 97 | sets.push_back(AllocateDescriptors(end - begin)); |
| 65 | const std::vector layout_copies(count, layout); | 98 | } |
| 66 | VkDescriptorSetAllocateInfo ai{ | 99 | |
| 100 | vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { | ||
| 101 | const std::vector<VkDescriptorSetLayout> layouts(count, layout); | ||
| 102 | VkDescriptorSetAllocateInfo allocate_info{ | ||
| 67 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | 103 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, |
| 68 | .pNext = nullptr, | 104 | .pNext = nullptr, |
| 69 | .descriptorPool = **active_pool, | 105 | .descriptorPool = *bank->pools.back(), |
| 70 | .descriptorSetCount = static_cast<u32>(count), | 106 | .descriptorSetCount = static_cast<u32>(count), |
| 71 | .pSetLayouts = layout_copies.data(), | 107 | .pSetLayouts = layouts.data(), |
| 72 | }; | 108 | }; |
| 73 | 109 | vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); | |
| 74 | vk::DescriptorSets sets = active_pool->Allocate(ai); | 110 | if (!new_sets.IsOutOfPoolMemory()) { |
| 75 | if (!sets.IsOutOfPoolMemory()) { | 111 | return new_sets; |
| 76 | return sets; | ||
| 77 | } | 112 | } |
| 78 | |||
| 79 | // Our current pool is out of memory. Allocate a new one and retry | 113 | // Our current pool is out of memory. Allocate a new one and retry |
| 80 | active_pool = AllocateNewPool(); | 114 | AllocatePool(*device, *bank); |
| 81 | ai.descriptorPool = **active_pool; | 115 | allocate_info.descriptorPool = *bank->pools.back(); |
| 82 | sets = active_pool->Allocate(ai); | 116 | new_sets = bank->pools.back().Allocate(allocate_info); |
| 83 | if (!sets.IsOutOfPoolMemory()) { | 117 | if (!new_sets.IsOutOfPoolMemory()) { |
| 84 | return sets; | 118 | return new_sets; |
| 85 | } | 119 | } |
| 86 | |||
| 87 | // After allocating a new pool, we are out of memory again. We can't handle this from here. | 120 | // After allocating a new pool, we are out of memory again. We can't handle this from here. |
| 88 | throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); | 121 | throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); |
| 89 | } | 122 | } |
| 90 | 123 | ||
| 124 | DescriptorPool::DescriptorPool(const Device& device_, VKScheduler& scheduler) | ||
| 125 | : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()} {} | ||
| 126 | |||
| 127 | DescriptorPool::~DescriptorPool() = default; | ||
| 128 | |||
| 129 | DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, | ||
| 130 | std::span<const Shader::Info> infos) { | ||
| 131 | return Allocator(layout, MakeBankInfo(infos)); | ||
| 132 | } | ||
| 133 | |||
| 134 | DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, | ||
| 135 | const Shader::Info& info) { | ||
| 136 | return Allocator(layout, MakeBankInfo(std::array{info})); | ||
| 137 | } | ||
| 138 | |||
| 139 | DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, | ||
| 140 | const DescriptorBankInfo& info) { | ||
| 141 | return DescriptorAllocator(device, master_semaphore, Bank(info), layout); | ||
| 142 | } | ||
| 143 | |||
| 144 | DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { | ||
| 145 | std::shared_lock read_lock{banks_mutex}; | ||
| 146 | const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { | ||
| 147 | return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); | ||
| 148 | }); | ||
| 149 | if (it != bank_infos.end()) { | ||
| 150 | return *banks[std::distance(bank_infos.begin(), it)].get(); | ||
| 151 | } | ||
| 152 | read_lock.unlock(); | ||
| 153 | |||
| 154 | std::unique_lock write_lock{banks_mutex}; | ||
| 155 | bank_infos.push_back(reqs); | ||
| 156 | |||
| 157 | auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>()); | ||
| 158 | bank.info = reqs; | ||
| 159 | AllocatePool(device, bank); | ||
| 160 | return bank; | ||
| 161 | } | ||
| 162 | |||
| 91 | } // namespace Vulkan | 163 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index f892be7be..59466aac5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h | |||
| @@ -4,57 +4,85 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <shared_mutex> | ||
| 8 | #include <span> | ||
| 7 | #include <vector> | 9 | #include <vector> |
| 8 | 10 | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_resource_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_resource_pool.h" |
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 14 | ||
| 12 | namespace Vulkan { | 15 | namespace Vulkan { |
| 13 | 16 | ||
| 14 | class Device; | 17 | class Device; |
| 15 | class VKDescriptorPool; | ||
| 16 | class VKScheduler; | 18 | class VKScheduler; |
| 17 | 19 | ||
| 20 | struct DescriptorBank; | ||
| 21 | |||
| 22 | struct DescriptorBankInfo { | ||
| 23 | [[nodiscard]] bool IsSuperset(const DescriptorBankInfo& subset) const noexcept; | ||
| 24 | |||
| 25 | u32 uniform_buffers{}; ///< Number of uniform buffer descriptors | ||
| 26 | u32 storage_buffers{}; ///< Number of storage buffer descriptors | ||
| 27 | u32 texture_buffers{}; ///< Number of texture buffer descriptors | ||
| 28 | u32 image_buffers{}; ///< Number of image buffer descriptors | ||
| 29 | u32 textures{}; ///< Number of texture descriptors | ||
| 30 | u32 images{}; ///< Number of image descriptors | ||
| 31 | s32 score{}; ///< Number of descriptors in total | ||
| 32 | }; | ||
| 33 | |||
| 18 | class DescriptorAllocator final : public ResourcePool { | 34 | class DescriptorAllocator final : public ResourcePool { |
| 35 | friend class DescriptorPool; | ||
| 36 | |||
| 19 | public: | 37 | public: |
| 20 | explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); | 38 | explicit DescriptorAllocator() = default; |
| 21 | ~DescriptorAllocator() override; | 39 | ~DescriptorAllocator() override = default; |
| 40 | |||
| 41 | DescriptorAllocator& operator=(DescriptorAllocator&&) noexcept = default; | ||
| 42 | DescriptorAllocator(DescriptorAllocator&&) noexcept = default; | ||
| 22 | 43 | ||
| 23 | DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; | 44 | DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; |
| 24 | DescriptorAllocator(const DescriptorAllocator&) = delete; | 45 | DescriptorAllocator(const DescriptorAllocator&) = delete; |
| 25 | 46 | ||
| 26 | VkDescriptorSet Commit(); | 47 | VkDescriptorSet Commit(); |
| 27 | 48 | ||
| 28 | protected: | ||
| 29 | void Allocate(std::size_t begin, std::size_t end) override; | ||
| 30 | |||
| 31 | private: | 49 | private: |
| 32 | VKDescriptorPool& descriptor_pool; | 50 | explicit DescriptorAllocator(const Device& device_, MasterSemaphore& master_semaphore_, |
| 33 | const VkDescriptorSetLayout layout; | 51 | DescriptorBank& bank_, VkDescriptorSetLayout layout_); |
| 34 | 52 | ||
| 35 | std::vector<vk::DescriptorSets> descriptors_allocations; | 53 | void Allocate(size_t begin, size_t end) override; |
| 36 | }; | 54 | |
| 55 | vk::DescriptorSets AllocateDescriptors(size_t count); | ||
| 56 | |||
| 57 | const Device* device{}; | ||
| 58 | DescriptorBank* bank{}; | ||
| 59 | VkDescriptorSetLayout layout{}; | ||
| 37 | 60 | ||
| 38 | class VKDescriptorPool final { | 61 | std::vector<vk::DescriptorSets> sets; |
| 39 | friend DescriptorAllocator; | 62 | }; |
| 40 | 63 | ||
| 64 | class DescriptorPool { | ||
| 41 | public: | 65 | public: |
| 42 | explicit VKDescriptorPool(const Device& device, VKScheduler& scheduler); | 66 | explicit DescriptorPool(const Device& device, VKScheduler& scheduler); |
| 43 | ~VKDescriptorPool(); | 67 | ~DescriptorPool(); |
| 44 | 68 | ||
| 45 | VKDescriptorPool(const VKDescriptorPool&) = delete; | 69 | DescriptorPool& operator=(const DescriptorPool&) = delete; |
| 46 | VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; | 70 | DescriptorPool(const DescriptorPool&) = delete; |
| 47 | 71 | ||
| 48 | private: | 72 | DescriptorAllocator Allocator(VkDescriptorSetLayout layout, |
| 49 | vk::DescriptorPool* AllocateNewPool(); | 73 | std::span<const Shader::Info> infos); |
| 74 | DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const Shader::Info& info); | ||
| 75 | DescriptorAllocator Allocator(VkDescriptorSetLayout layout, const DescriptorBankInfo& info); | ||
| 50 | 76 | ||
| 51 | vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); | 77 | private: |
| 78 | DescriptorBank& Bank(const DescriptorBankInfo& reqs); | ||
| 52 | 79 | ||
| 53 | const Device& device; | 80 | const Device& device; |
| 54 | MasterSemaphore& master_semaphore; | 81 | MasterSemaphore& master_semaphore; |
| 55 | 82 | ||
| 56 | std::vector<vk::DescriptorPool> pools; | 83 | std::shared_mutex banks_mutex; |
| 57 | vk::DescriptorPool* active_pool; | 84 | std::vector<DescriptorBankInfo> bank_infos; |
| 85 | std::vector<std::unique_ptr<DescriptorBank>> banks; | ||
| 58 | }; | 86 | }; |
| 59 | 87 | ||
| 60 | } // namespace Vulkan \ No newline at end of file | 88 | } // namespace Vulkan \ No newline at end of file |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index fc6dd83eb..18482e1d0 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -1,29 +1,58 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <span> |
| 7 | #include <cstring> | ||
| 8 | #include <vector> | ||
| 9 | 7 | ||
| 10 | #include "common/common_types.h" | 8 | #include <boost/container/small_vector.hpp> |
| 11 | #include "common/microprofile.h" | 9 | #include <boost/container/static_vector.hpp> |
| 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 10 | |
| 11 | #include "common/bit_field.h" | ||
| 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 13 | #include "video_core/renderer_vulkan/pipeline_helper.h" |
| 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 15 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 16 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 16 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 20 | #include "video_core/shader_notify.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_device.h" | 21 | #include "video_core/vulkan_common/vulkan_device.h" |
| 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 21 | |||
| 22 | namespace Vulkan { | ||
| 23 | 22 | ||
| 24 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 23 | #if defined(_MSC_VER) && defined(NDEBUG) |
| 24 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | ||
| 25 | #else | ||
| 26 | #define LAMBDA_FORCEINLINE | ||
| 27 | #endif | ||
| 25 | 28 | ||
| 29 | namespace Vulkan { | ||
| 26 | namespace { | 30 | namespace { |
| 31 | using boost::container::small_vector; | ||
| 32 | using boost::container::static_vector; | ||
| 33 | using Shader::ImageBufferDescriptor; | ||
| 34 | using Tegra::Texture::TexturePair; | ||
| 35 | using VideoCore::Surface::PixelFormat; | ||
| 36 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 37 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 38 | |||
| 39 | constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; | ||
| 40 | constexpr size_t MAX_IMAGE_ELEMENTS = 64; | ||
| 41 | |||
| 42 | DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) { | ||
| 43 | DescriptorLayoutBuilder builder{device}; | ||
| 44 | for (size_t index = 0; index < infos.size(); ++index) { | ||
| 45 | static constexpr std::array stages{ | ||
| 46 | VK_SHADER_STAGE_VERTEX_BIT, | ||
| 47 | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, | ||
| 48 | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, | ||
| 49 | VK_SHADER_STAGE_GEOMETRY_BIT, | ||
| 50 | VK_SHADER_STAGE_FRAGMENT_BIT, | ||
| 51 | }; | ||
| 52 | builder.Add(infos[index], stages.at(index)); | ||
| 53 | } | ||
| 54 | return builder; | ||
| 55 | } | ||
| 27 | 56 | ||
| 28 | template <class StencilFace> | 57 | template <class StencilFace> |
| 29 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { | 58 | VkStencilOpState GetStencilFaceState(const StencilFace& face) { |
| @@ -39,15 +68,24 @@ VkStencilOpState GetStencilFaceState(const StencilFace& face) { | |||
| 39 | } | 68 | } |
| 40 | 69 | ||
| 41 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { | 70 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { |
| 42 | static constexpr std::array unsupported_topologies = { | 71 | static constexpr std::array unsupported_topologies{ |
| 43 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, | 72 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, |
| 44 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, | 73 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, |
| 45 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, | 74 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, |
| 46 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, | 75 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, |
| 47 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, | 76 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, |
| 48 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; | 77 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, |
| 49 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), | 78 | // VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT, |
| 50 | topology) == std::end(unsupported_topologies); | 79 | }; |
| 80 | return std::ranges::find(unsupported_topologies, topology) == unsupported_topologies.end(); | ||
| 81 | } | ||
| 82 | |||
| 83 | bool IsLine(VkPrimitiveTopology topology) { | ||
| 84 | static constexpr std::array line_topologies{ | ||
| 85 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, VK_PRIMITIVE_TOPOLOGY_LINE_STRIP, | ||
| 86 | // VK_PRIMITIVE_TOPOLOGY_LINE_LOOP_EXT, | ||
| 87 | }; | ||
| 88 | return std::ranges::find(line_topologies, topology) == line_topologies.end(); | ||
| 51 | } | 89 | } |
| 52 | 90 | ||
| 53 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | 91 | VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { |
| @@ -59,8 +97,7 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | |||
| 59 | BitField<12, 3, Maxwell::ViewportSwizzle> w; | 97 | BitField<12, 3, Maxwell::ViewportSwizzle> w; |
| 60 | }; | 98 | }; |
| 61 | const Swizzle unpacked{swizzle}; | 99 | const Swizzle unpacked{swizzle}; |
| 62 | 100 | return VkViewportSwizzleNV{ | |
| 63 | return { | ||
| 64 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), | 101 | .x = MaxwellToVK::ViewportSwizzle(unpacked.x), |
| 65 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), | 102 | .y = MaxwellToVK::ViewportSwizzle(unpacked.y), |
| 66 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), | 103 | .z = MaxwellToVK::ViewportSwizzle(unpacked.z), |
| @@ -68,193 +105,446 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) { | |||
| 68 | }; | 105 | }; |
| 69 | } | 106 | } |
| 70 | 107 | ||
| 71 | VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) { | 108 | PixelFormat DecodeFormat(u8 encoded_format) { |
| 72 | switch (msaa_mode) { | 109 | const auto format{static_cast<Tegra::RenderTargetFormat>(encoded_format)}; |
| 73 | case Tegra::Texture::MsaaMode::Msaa1x1: | 110 | if (format == Tegra::RenderTargetFormat::NONE) { |
| 74 | return VK_SAMPLE_COUNT_1_BIT; | 111 | return PixelFormat::Invalid; |
| 75 | case Tegra::Texture::MsaaMode::Msaa2x1: | ||
| 76 | case Tegra::Texture::MsaaMode::Msaa2x1_D3D: | ||
| 77 | return VK_SAMPLE_COUNT_2_BIT; | ||
| 78 | case Tegra::Texture::MsaaMode::Msaa2x2: | ||
| 79 | case Tegra::Texture::MsaaMode::Msaa2x2_VC4: | ||
| 80 | case Tegra::Texture::MsaaMode::Msaa2x2_VC12: | ||
| 81 | return VK_SAMPLE_COUNT_4_BIT; | ||
| 82 | case Tegra::Texture::MsaaMode::Msaa4x2: | ||
| 83 | case Tegra::Texture::MsaaMode::Msaa4x2_D3D: | ||
| 84 | case Tegra::Texture::MsaaMode::Msaa4x2_VC8: | ||
| 85 | case Tegra::Texture::MsaaMode::Msaa4x2_VC24: | ||
| 86 | return VK_SAMPLE_COUNT_8_BIT; | ||
| 87 | case Tegra::Texture::MsaaMode::Msaa4x4: | ||
| 88 | return VK_SAMPLE_COUNT_16_BIT; | ||
| 89 | default: | ||
| 90 | UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode)); | ||
| 91 | return VK_SAMPLE_COUNT_1_BIT; | ||
| 92 | } | 112 | } |
| 113 | return PixelFormatFromRenderTargetFormat(format); | ||
| 93 | } | 114 | } |
| 94 | 115 | ||
| 95 | } // Anonymous namespace | 116 | RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { |
| 117 | RenderPassKey key; | ||
| 118 | std::ranges::transform(state.color_formats, key.color_formats.begin(), DecodeFormat); | ||
| 119 | if (state.depth_enabled != 0) { | ||
| 120 | const auto depth_format{static_cast<Tegra::DepthFormat>(state.depth_format.Value())}; | ||
| 121 | key.depth_format = PixelFormatFromDepthFormat(depth_format); | ||
| 122 | } else { | ||
| 123 | key.depth_format = PixelFormat::Invalid; | ||
| 124 | } | ||
| 125 | key.samples = MaxwellToVK::MsaaMode(state.msaa_mode); | ||
| 126 | return key; | ||
| 127 | } | ||
| 96 | 128 | ||
| 97 | VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | 129 | size_t NumAttachments(const FixedPipelineState& state) { |
| 98 | VKDescriptorPool& descriptor_pool_, | 130 | size_t num{}; |
| 99 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 131 | for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
| 100 | const GraphicsPipelineCacheKey& key, | 132 | const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])}; |
| 101 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 133 | if (format != Tegra::RenderTargetFormat::NONE) { |
| 102 | const SPIRVProgram& program, u32 num_color_buffers) | 134 | num = index + 1; |
| 103 | : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()}, | 135 | } |
| 104 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | 136 | } |
| 105 | descriptor_allocator{descriptor_pool_, *descriptor_set_layout}, | 137 | return num; |
| 106 | update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()}, | ||
| 107 | descriptor_template{CreateDescriptorUpdateTemplate(program)}, | ||
| 108 | modules(CreateShaderModules(program)), | ||
| 109 | pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {} | ||
| 110 | |||
| 111 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | ||
| 112 | |||
| 113 | VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | ||
| 114 | if (!descriptor_template) { | ||
| 115 | return {}; | ||
| 116 | } | ||
| 117 | const VkDescriptorSet set = descriptor_allocator.Commit(); | ||
| 118 | update_descriptor_queue.Send(*descriptor_template, set); | ||
| 119 | return set; | ||
| 120 | } | 138 | } |
| 121 | 139 | ||
| 122 | vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( | 140 | template <typename Spec> |
| 123 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const { | 141 | bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules, |
| 124 | const VkDescriptorSetLayoutCreateInfo ci{ | 142 | const std::array<Shader::Info, NUM_STAGES>& stage_infos) { |
| 125 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | 143 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { |
| 126 | .pNext = nullptr, | 144 | if (!Spec::enabled_stages[stage] && modules[stage]) { |
| 127 | .flags = 0, | 145 | return false; |
| 128 | .bindingCount = bindings.size(), | 146 | } |
| 129 | .pBindings = bindings.data(), | 147 | const auto& info{stage_infos[stage]}; |
| 130 | }; | 148 | if constexpr (!Spec::has_storage_buffers) { |
| 131 | return device.GetLogical().CreateDescriptorSetLayout(ci); | 149 | if (!info.storage_buffers_descriptors.empty()) { |
| 150 | return false; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | if constexpr (!Spec::has_texture_buffers) { | ||
| 154 | if (!info.texture_buffer_descriptors.empty()) { | ||
| 155 | return false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | if constexpr (!Spec::has_image_buffers) { | ||
| 159 | if (!info.image_buffer_descriptors.empty()) { | ||
| 160 | return false; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | if constexpr (!Spec::has_images) { | ||
| 164 | if (!info.image_descriptors.empty()) { | ||
| 165 | return false; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | } | ||
| 169 | return true; | ||
| 132 | } | 170 | } |
| 133 | 171 | ||
| 134 | vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { | 172 | using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); |
| 135 | const VkPipelineLayoutCreateInfo ci{ | 173 | |
| 136 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | 174 | template <typename Spec, typename... Specs> |
| 137 | .pNext = nullptr, | 175 | ConfigureFuncPtr FindSpec(const std::array<vk::ShaderModule, NUM_STAGES>& modules, |
| 138 | .flags = 0, | 176 | const std::array<Shader::Info, NUM_STAGES>& stage_infos) { |
| 139 | .setLayoutCount = 1, | 177 | if constexpr (sizeof...(Specs) > 0) { |
| 140 | .pSetLayouts = descriptor_set_layout.address(), | 178 | if (!Passes<Spec>(modules, stage_infos)) { |
| 141 | .pushConstantRangeCount = 0, | 179 | return FindSpec<Specs...>(modules, stage_infos); |
| 142 | .pPushConstantRanges = nullptr, | 180 | } |
| 143 | }; | 181 | } |
| 144 | return device.GetLogical().CreatePipelineLayout(ci); | 182 | return GraphicsPipeline::MakeConfigureSpecFunc<Spec>(); |
| 145 | } | 183 | } |
| 146 | 184 | ||
| 147 | vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( | 185 | struct SimpleVertexFragmentSpec { |
| 148 | const SPIRVProgram& program) const { | 186 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; |
| 149 | std::vector<VkDescriptorUpdateTemplateEntry> template_entries; | 187 | static constexpr bool has_storage_buffers = false; |
| 150 | u32 binding = 0; | 188 | static constexpr bool has_texture_buffers = false; |
| 151 | u32 offset = 0; | 189 | static constexpr bool has_image_buffers = false; |
| 152 | for (const auto& stage : program) { | 190 | static constexpr bool has_images = false; |
| 153 | if (stage) { | 191 | }; |
| 154 | FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries); | 192 | |
| 193 | struct SimpleVertexSpec { | ||
| 194 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false}; | ||
| 195 | static constexpr bool has_storage_buffers = false; | ||
| 196 | static constexpr bool has_texture_buffers = false; | ||
| 197 | static constexpr bool has_image_buffers = false; | ||
| 198 | static constexpr bool has_images = false; | ||
| 199 | }; | ||
| 200 | |||
| 201 | struct DefaultSpec { | ||
| 202 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||
| 203 | static constexpr bool has_storage_buffers = true; | ||
| 204 | static constexpr bool has_texture_buffers = true; | ||
| 205 | static constexpr bool has_image_buffers = true; | ||
| 206 | static constexpr bool has_images = true; | ||
| 207 | }; | ||
| 208 | |||
| 209 | ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& modules, | ||
| 210 | const std::array<Shader::Info, NUM_STAGES>& infos) { | ||
| 211 | return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(modules, infos); | ||
| 212 | } | ||
| 213 | } // Anonymous namespace | ||
| 214 | |||
| 215 | GraphicsPipeline::GraphicsPipeline( | ||
| 216 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | ||
| 217 | VKScheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, | ||
| 218 | VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, | ||
| 219 | VKUpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, | ||
| 220 | RenderPassCache& render_pass_cache, const GraphicsPipelineCacheKey& key_, | ||
| 221 | std::array<vk::ShaderModule, NUM_STAGES> stages, | ||
| 222 | const std::array<const Shader::Info*, NUM_STAGES>& infos) | ||
| 223 | : key{key_}, maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, device{device_}, | ||
| 224 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, scheduler{scheduler_}, | ||
| 225 | update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { | ||
| 226 | if (shader_notify) { | ||
| 227 | shader_notify->MarkShaderBuilding(); | ||
| 228 | } | ||
| 229 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 230 | const Shader::Info* const info{infos[stage]}; | ||
| 231 | if (!info) { | ||
| 232 | continue; | ||
| 155 | } | 233 | } |
| 234 | stage_infos[stage] = *info; | ||
| 235 | enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; | ||
| 236 | std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 156 | } | 237 | } |
| 157 | if (template_entries.empty()) { | 238 | auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { |
| 158 | // If the shader doesn't use descriptor sets, skip template creation. | 239 | DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; |
| 159 | return {}; | 240 | uses_push_descriptor = builder.CanUsePushDescriptor(); |
| 241 | descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); | ||
| 242 | if (!uses_push_descriptor) { | ||
| 243 | descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); | ||
| 244 | } | ||
| 245 | const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; | ||
| 246 | pipeline_layout = builder.CreatePipelineLayout(set_layout); | ||
| 247 | descriptor_update_template = | ||
| 248 | builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); | ||
| 249 | |||
| 250 | const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; | ||
| 251 | Validate(); | ||
| 252 | MakePipeline(render_pass); | ||
| 253 | |||
| 254 | std::lock_guard lock{build_mutex}; | ||
| 255 | is_built = true; | ||
| 256 | build_condvar.notify_one(); | ||
| 257 | if (shader_notify) { | ||
| 258 | shader_notify->MarkShaderComplete(); | ||
| 259 | } | ||
| 260 | }}; | ||
| 261 | if (worker_thread) { | ||
| 262 | worker_thread->QueueWork(std::move(func)); | ||
| 263 | } else { | ||
| 264 | func(); | ||
| 160 | } | 265 | } |
| 266 | configure_func = ConfigureFunc(spv_modules, stage_infos); | ||
| 267 | } | ||
| 161 | 268 | ||
| 162 | const VkDescriptorUpdateTemplateCreateInfoKHR ci{ | 269 | void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { |
| 163 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | 270 | transition_keys.push_back(transition->key); |
| 164 | .pNext = nullptr, | 271 | transitions.push_back(transition); |
| 165 | .flags = 0, | ||
| 166 | .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()), | ||
| 167 | .pDescriptorUpdateEntries = template_entries.data(), | ||
| 168 | .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||
| 169 | .descriptorSetLayout = *descriptor_set_layout, | ||
| 170 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 171 | .pipelineLayout = *layout, | ||
| 172 | .set = DESCRIPTOR_SET, | ||
| 173 | }; | ||
| 174 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); | ||
| 175 | } | 272 | } |
| 176 | 273 | ||
| 177 | std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | 274 | template <typename Spec> |
| 178 | const SPIRVProgram& program) const { | 275 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { |
| 179 | VkShaderModuleCreateInfo ci{ | 276 | std::array<ImageId, MAX_IMAGE_ELEMENTS> image_view_ids; |
| 180 | .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, | 277 | std::array<u32, MAX_IMAGE_ELEMENTS> image_view_indices; |
| 181 | .pNext = nullptr, | 278 | std::array<VkSampler, MAX_IMAGE_ELEMENTS> samplers; |
| 182 | .flags = 0, | 279 | size_t sampler_index{}; |
| 183 | .codeSize = 0, | 280 | size_t image_index{}; |
| 184 | .pCode = nullptr, | 281 | |
| 185 | }; | 282 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 283 | |||
| 284 | buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); | ||
| 285 | |||
| 286 | const auto& regs{maxwell3d.regs}; | ||
| 287 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 288 | const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 289 | const Shader::Info& info{stage_infos[stage]}; | ||
| 290 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 291 | if constexpr (Spec::has_storage_buffers) { | ||
| 292 | size_t ssbo_index{}; | ||
| 293 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 294 | ASSERT(desc.count == 1); | ||
| 295 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, | ||
| 296 | desc.cbuf_offset, desc.is_written); | ||
| 297 | ++ssbo_index; | ||
| 298 | } | ||
| 299 | } | ||
| 300 | const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; | ||
| 301 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 302 | ASSERT(cbufs[desc.cbuf_index].enabled); | ||
| 303 | const u32 index_offset{index << desc.size_shift}; | ||
| 304 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 305 | const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; | ||
| 306 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 307 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 308 | if (desc.has_secondary) { | ||
| 309 | ASSERT(cbufs[desc.secondary_cbuf_index].enabled); | ||
| 310 | const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 311 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + | ||
| 312 | second_offset}; | ||
| 313 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 314 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 315 | const u32 raw{lhs_raw | rhs_raw}; | ||
| 316 | return TexturePair(raw, via_header_index); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 320 | }}; | ||
| 321 | const auto add_image{[&](const auto& desc) { | ||
| 322 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 323 | const auto handle{read_handle(desc, index)}; | ||
| 324 | image_view_indices[image_index++] = handle.first; | ||
| 325 | } | ||
| 326 | }}; | ||
| 327 | if constexpr (Spec::has_texture_buffers) { | ||
| 328 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 329 | add_image(desc); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | if constexpr (Spec::has_image_buffers) { | ||
| 333 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 334 | add_image(desc); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | for (const auto& desc : info.texture_descriptors) { | ||
| 338 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 339 | const auto handle{read_handle(desc, index)}; | ||
| 340 | image_view_indices[image_index++] = handle.first; | ||
| 186 | 341 | ||
| 187 | std::vector<vk::ShaderModule> shader_modules; | 342 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; |
| 188 | shader_modules.reserve(Maxwell::MaxShaderStage); | 343 | samplers[sampler_index++] = sampler->Handle(); |
| 189 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { | 344 | } |
| 190 | const auto& stage = program[i]; | 345 | } |
| 191 | if (!stage) { | 346 | if constexpr (Spec::has_images) { |
| 192 | continue; | 347 | for (const auto& desc : info.image_descriptors) { |
| 348 | add_image(desc); | ||
| 349 | } | ||
| 193 | } | 350 | } |
| 351 | }}; | ||
| 352 | if constexpr (Spec::enabled_stages[0]) { | ||
| 353 | config_stage(0); | ||
| 354 | } | ||
| 355 | if constexpr (Spec::enabled_stages[1]) { | ||
| 356 | config_stage(1); | ||
| 357 | } | ||
| 358 | if constexpr (Spec::enabled_stages[2]) { | ||
| 359 | config_stage(2); | ||
| 360 | } | ||
| 361 | if constexpr (Spec::enabled_stages[3]) { | ||
| 362 | config_stage(3); | ||
| 363 | } | ||
| 364 | if constexpr (Spec::enabled_stages[4]) { | ||
| 365 | config_stage(4); | ||
| 366 | } | ||
| 367 | const std::span indices_span(image_view_indices.data(), image_index); | ||
| 368 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 369 | |||
| 370 | ImageId* texture_buffer_index{image_view_ids.data()}; | ||
| 371 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 372 | size_t index{}; | ||
| 373 | const auto add_buffer{[&](const auto& desc) { | ||
| 374 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 375 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 376 | bool is_written{false}; | ||
| 377 | if constexpr (is_image) { | ||
| 378 | is_written = desc.is_written; | ||
| 379 | } | ||
| 380 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | ||
| 381 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | ||
| 382 | image_view.BufferSize(), image_view.format, | ||
| 383 | is_written, is_image); | ||
| 384 | ++index; | ||
| 385 | ++texture_buffer_index; | ||
| 386 | } | ||
| 387 | }}; | ||
| 388 | buffer_cache.UnbindGraphicsTextureBuffers(stage); | ||
| 194 | 389 | ||
| 195 | device.SaveShader(stage->code); | 390 | const Shader::Info& info{stage_infos[stage]}; |
| 391 | if constexpr (Spec::has_texture_buffers) { | ||
| 392 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 393 | add_buffer(desc); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | if constexpr (Spec::has_image_buffers) { | ||
| 397 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 398 | add_buffer(desc); | ||
| 399 | } | ||
| 400 | } | ||
| 401 | for (const auto& desc : info.texture_descriptors) { | ||
| 402 | texture_buffer_index += desc.count; | ||
| 403 | } | ||
| 404 | if constexpr (Spec::has_images) { | ||
| 405 | for (const auto& desc : info.image_descriptors) { | ||
| 406 | texture_buffer_index += desc.count; | ||
| 407 | } | ||
| 408 | } | ||
| 409 | }}; | ||
| 410 | if constexpr (Spec::enabled_stages[0]) { | ||
| 411 | bind_stage_info(0); | ||
| 412 | } | ||
| 413 | if constexpr (Spec::enabled_stages[1]) { | ||
| 414 | bind_stage_info(1); | ||
| 415 | } | ||
| 416 | if constexpr (Spec::enabled_stages[2]) { | ||
| 417 | bind_stage_info(2); | ||
| 418 | } | ||
| 419 | if constexpr (Spec::enabled_stages[3]) { | ||
| 420 | bind_stage_info(3); | ||
| 421 | } | ||
| 422 | if constexpr (Spec::enabled_stages[4]) { | ||
| 423 | bind_stage_info(4); | ||
| 424 | } | ||
| 425 | |||
| 426 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 427 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 196 | 428 | ||
| 197 | ci.codeSize = stage->code.size() * sizeof(u32); | 429 | update_descriptor_queue.Acquire(); |
| 198 | ci.pCode = stage->code.data(); | 430 | |
| 199 | shader_modules.push_back(device.GetLogical().CreateShaderModule(ci)); | 431 | const VkSampler* samplers_it{samplers.data()}; |
| 432 | const ImageId* views_it{image_view_ids.data()}; | ||
| 433 | const auto prepare_stage{[&](size_t stage) LAMBDA_FORCEINLINE { | ||
| 434 | buffer_cache.BindHostStageBuffers(stage); | ||
| 435 | PushImageDescriptors(stage_infos[stage], samplers_it, views_it, texture_cache, | ||
| 436 | update_descriptor_queue); | ||
| 437 | }}; | ||
| 438 | if constexpr (Spec::enabled_stages[0]) { | ||
| 439 | prepare_stage(0); | ||
| 440 | } | ||
| 441 | if constexpr (Spec::enabled_stages[1]) { | ||
| 442 | prepare_stage(1); | ||
| 200 | } | 443 | } |
| 201 | return shader_modules; | 444 | if constexpr (Spec::enabled_stages[2]) { |
| 445 | prepare_stage(2); | ||
| 446 | } | ||
| 447 | if constexpr (Spec::enabled_stages[3]) { | ||
| 448 | prepare_stage(3); | ||
| 449 | } | ||
| 450 | if constexpr (Spec::enabled_stages[4]) { | ||
| 451 | prepare_stage(4); | ||
| 452 | } | ||
| 453 | ConfigureDraw(); | ||
| 202 | } | 454 | } |
| 203 | 455 | ||
| 204 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | 456 | void GraphicsPipeline::ConfigureDraw() { |
| 205 | VkRenderPass renderpass, | 457 | texture_cache.UpdateRenderTargets(false); |
| 206 | u32 num_color_buffers) const { | 458 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); |
| 207 | const auto& state = cache_key.fixed_state; | 459 | |
| 208 | const auto& viewport_swizzles = state.viewport_swizzles; | 460 | if (!is_built.load(std::memory_order::relaxed)) { |
| 209 | 461 | // Wait for the pipeline to be built | |
| 210 | FixedPipelineState::DynamicState dynamic; | 462 | scheduler.Record([this](vk::CommandBuffer) { |
| 211 | if (device.IsExtExtendedDynamicStateSupported()) { | 463 | std::unique_lock lock{build_mutex}; |
| 212 | // Insert dummy values, as long as they are valid they don't matter as extended dynamic | 464 | build_condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); |
| 213 | // state is ignored | ||
| 214 | dynamic.raw1 = 0; | ||
| 215 | dynamic.raw2 = 0; | ||
| 216 | dynamic.vertex_strides.fill(0); | ||
| 217 | } else { | ||
| 218 | dynamic = state.dynamic_state; | ||
| 219 | } | ||
| 220 | |||
| 221 | std::vector<VkVertexInputBindingDescription> vertex_bindings; | ||
| 222 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | ||
| 223 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 224 | const bool instanced = state.binding_divisors[index] != 0; | ||
| 225 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 226 | vertex_bindings.push_back({ | ||
| 227 | .binding = static_cast<u32>(index), | ||
| 228 | .stride = dynamic.vertex_strides[index], | ||
| 229 | .inputRate = rate, | ||
| 230 | }); | 465 | }); |
| 231 | if (instanced) { | ||
| 232 | vertex_binding_divisors.push_back({ | ||
| 233 | .binding = static_cast<u32>(index), | ||
| 234 | .divisor = state.binding_divisors[index], | ||
| 235 | }); | ||
| 236 | } | ||
| 237 | } | 466 | } |
| 467 | const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; | ||
| 468 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||
| 469 | scheduler.Record([this, descriptor_data, bind_pipeline](vk::CommandBuffer cmdbuf) { | ||
| 470 | if (bind_pipeline) { | ||
| 471 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); | ||
| 472 | } | ||
| 473 | if (!descriptor_set_layout) { | ||
| 474 | return; | ||
| 475 | } | ||
| 476 | if (uses_push_descriptor) { | ||
| 477 | cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, | ||
| 478 | 0, descriptor_data); | ||
| 479 | } else { | ||
| 480 | const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||
| 481 | const vk::Device& dev{device.GetLogical()}; | ||
| 482 | dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | ||
| 483 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, | ||
| 484 | descriptor_set, nullptr); | ||
| 485 | } | ||
| 486 | }); | ||
| 487 | } | ||
| 238 | 488 | ||
| 239 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; | 489 | void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { |
| 240 | const auto& input_attributes = program[0]->entries.attributes; | 490 | FixedPipelineState::DynamicState dynamic{}; |
| 241 | for (std::size_t index = 0; index < state.attributes.size(); ++index) { | 491 | if (!key.state.extended_dynamic_state) { |
| 242 | const auto& attribute = state.attributes[index]; | 492 | dynamic = key.state.dynamic_state; |
| 243 | if (!attribute.enabled) { | 493 | } |
| 244 | continue; | 494 | static_vector<VkVertexInputBindingDescription, 32> vertex_bindings; |
| 495 | static_vector<VkVertexInputBindingDivisorDescriptionEXT, 32> vertex_binding_divisors; | ||
| 496 | static_vector<VkVertexInputAttributeDescription, 32> vertex_attributes; | ||
| 497 | if (key.state.dynamic_vertex_input) { | ||
| 498 | for (size_t index = 0; index < key.state.attributes.size(); ++index) { | ||
| 499 | const u32 type = key.state.DynamicAttributeType(index); | ||
| 500 | if (!stage_infos[0].loads.Generic(index) || type == 0) { | ||
| 501 | continue; | ||
| 502 | } | ||
| 503 | vertex_attributes.push_back({ | ||
| 504 | .location = static_cast<u32>(index), | ||
| 505 | .binding = 0, | ||
| 506 | .format = type == 1 ? VK_FORMAT_R32_SFLOAT | ||
| 507 | : type == 2 ? VK_FORMAT_R32_SINT : VK_FORMAT_R32_UINT, | ||
| 508 | .offset = 0, | ||
| 509 | }); | ||
| 245 | } | 510 | } |
| 246 | if (!input_attributes.contains(static_cast<u32>(index))) { | 511 | if (!vertex_attributes.empty()) { |
| 247 | // Skip attributes not used by the vertex shaders. | 512 | vertex_bindings.push_back({ |
| 248 | continue; | 513 | .binding = 0, |
| 514 | .stride = 4, | ||
| 515 | .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, | ||
| 516 | }); | ||
| 517 | } | ||
| 518 | } else { | ||
| 519 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 520 | const bool instanced = key.state.binding_divisors[index] != 0; | ||
| 521 | const auto rate = | ||
| 522 | instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 523 | vertex_bindings.push_back({ | ||
| 524 | .binding = static_cast<u32>(index), | ||
| 525 | .stride = dynamic.vertex_strides[index], | ||
| 526 | .inputRate = rate, | ||
| 527 | }); | ||
| 528 | if (instanced) { | ||
| 529 | vertex_binding_divisors.push_back({ | ||
| 530 | .binding = static_cast<u32>(index), | ||
| 531 | .divisor = key.state.binding_divisors[index], | ||
| 532 | }); | ||
| 533 | } | ||
| 534 | } | ||
| 535 | for (size_t index = 0; index < key.state.attributes.size(); ++index) { | ||
| 536 | const auto& attribute = key.state.attributes[index]; | ||
| 537 | if (!attribute.enabled || !stage_infos[0].loads.Generic(index)) { | ||
| 538 | continue; | ||
| 539 | } | ||
| 540 | vertex_attributes.push_back({ | ||
| 541 | .location = static_cast<u32>(index), | ||
| 542 | .binding = attribute.buffer, | ||
| 543 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 544 | .offset = attribute.offset, | ||
| 545 | }); | ||
| 249 | } | 546 | } |
| 250 | vertex_attributes.push_back({ | ||
| 251 | .location = static_cast<u32>(index), | ||
| 252 | .binding = attribute.buffer, | ||
| 253 | .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()), | ||
| 254 | .offset = attribute.offset, | ||
| 255 | }); | ||
| 256 | } | 547 | } |
| 257 | |||
| 258 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ | 548 | VkPipelineVertexInputStateCreateInfo vertex_input_ci{ |
| 259 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, | 549 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, |
| 260 | .pNext = nullptr, | 550 | .pNext = nullptr, |
| @@ -264,7 +554,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 264 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), | 554 | .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()), |
| 265 | .pVertexAttributeDescriptions = vertex_attributes.data(), | 555 | .pVertexAttributeDescriptions = vertex_attributes.data(), |
| 266 | }; | 556 | }; |
| 267 | |||
| 268 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ | 557 | const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{ |
| 269 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, | 558 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, |
| 270 | .pNext = nullptr, | 559 | .pNext = nullptr, |
| @@ -274,78 +563,113 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 274 | if (!vertex_binding_divisors.empty()) { | 563 | if (!vertex_binding_divisors.empty()) { |
| 275 | vertex_input_ci.pNext = &input_divisor_ci; | 564 | vertex_input_ci.pNext = &input_divisor_ci; |
| 276 | } | 565 | } |
| 277 | 566 | auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, key.state.topology); | |
| 278 | const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); | 567 | if (input_assembly_topology == VK_PRIMITIVE_TOPOLOGY_PATCH_LIST) { |
| 568 | if (!spv_modules[1] && !spv_modules[2]) { | ||
| 569 | LOG_WARNING(Render_Vulkan, "Patch topology used without tessellation, using points"); | ||
| 570 | input_assembly_topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; | ||
| 571 | } | ||
| 572 | } | ||
| 279 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ | 573 | const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ |
| 280 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, | 574 | .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, |
| 281 | .pNext = nullptr, | 575 | .pNext = nullptr, |
| 282 | .flags = 0, | 576 | .flags = 0, |
| 283 | .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), | 577 | .topology = input_assembly_topology, |
| 284 | .primitiveRestartEnable = state.primitive_restart_enable != 0 && | 578 | .primitiveRestartEnable = key.state.primitive_restart_enable != 0 && |
| 285 | SupportsPrimitiveRestart(input_assembly_topology), | 579 | SupportsPrimitiveRestart(input_assembly_topology), |
| 286 | }; | 580 | }; |
| 287 | |||
| 288 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ | 581 | const VkPipelineTessellationStateCreateInfo tessellation_ci{ |
| 289 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, | 582 | .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO, |
| 290 | .pNext = nullptr, | 583 | .pNext = nullptr, |
| 291 | .flags = 0, | 584 | .flags = 0, |
| 292 | .patchControlPoints = state.patch_control_points_minus_one.Value() + 1, | 585 | .patchControlPoints = key.state.patch_control_points_minus_one.Value() + 1, |
| 293 | }; | ||
| 294 | |||
| 295 | VkPipelineViewportStateCreateInfo viewport_ci{ | ||
| 296 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | ||
| 297 | .pNext = nullptr, | ||
| 298 | .flags = 0, | ||
| 299 | .viewportCount = Maxwell::NumViewports, | ||
| 300 | .pViewports = nullptr, | ||
| 301 | .scissorCount = Maxwell::NumViewports, | ||
| 302 | .pScissors = nullptr, | ||
| 303 | }; | 586 | }; |
| 304 | 587 | ||
| 305 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; | 588 | std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles; |
| 306 | std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); | 589 | std::ranges::transform(key.state.viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle); |
| 307 | VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ | 590 | const VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{ |
| 308 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, | 591 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, |
| 309 | .pNext = nullptr, | 592 | .pNext = nullptr, |
| 310 | .flags = 0, | 593 | .flags = 0, |
| 311 | .viewportCount = Maxwell::NumViewports, | 594 | .viewportCount = Maxwell::NumViewports, |
| 312 | .pViewportSwizzles = swizzles.data(), | 595 | .pViewportSwizzles = swizzles.data(), |
| 313 | }; | 596 | }; |
| 314 | if (device.IsNvViewportSwizzleSupported()) { | 597 | const VkPipelineViewportStateCreateInfo viewport_ci{ |
| 315 | viewport_ci.pNext = &swizzle_ci; | 598 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, |
| 316 | } | 599 | .pNext = device.IsNvViewportSwizzleSupported() ? &swizzle_ci : nullptr, |
| 600 | .flags = 0, | ||
| 601 | .viewportCount = Maxwell::NumViewports, | ||
| 602 | .pViewports = nullptr, | ||
| 603 | .scissorCount = Maxwell::NumViewports, | ||
| 604 | .pScissors = nullptr, | ||
| 605 | }; | ||
| 317 | 606 | ||
| 318 | const VkPipelineRasterizationStateCreateInfo rasterization_ci{ | 607 | VkPipelineRasterizationStateCreateInfo rasterization_ci{ |
| 319 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, | 608 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, |
| 320 | .pNext = nullptr, | 609 | .pNext = nullptr, |
| 321 | .flags = 0, | 610 | .flags = 0, |
| 322 | .depthClampEnable = | 611 | .depthClampEnable = |
| 323 | static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), | 612 | static_cast<VkBool32>(key.state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE), |
| 324 | .rasterizerDiscardEnable = | 613 | .rasterizerDiscardEnable = |
| 325 | static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), | 614 | static_cast<VkBool32>(key.state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE), |
| 326 | .polygonMode = VK_POLYGON_MODE_FILL, | 615 | .polygonMode = |
| 616 | MaxwellToVK::PolygonMode(FixedPipelineState::UnpackPolygonMode(key.state.polygon_mode)), | ||
| 327 | .cullMode = static_cast<VkCullModeFlags>( | 617 | .cullMode = static_cast<VkCullModeFlags>( |
| 328 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), | 618 | dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE), |
| 329 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), | 619 | .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()), |
| 330 | .depthBiasEnable = state.depth_bias_enable, | 620 | .depthBiasEnable = key.state.depth_bias_enable, |
| 331 | .depthBiasConstantFactor = 0.0f, | 621 | .depthBiasConstantFactor = 0.0f, |
| 332 | .depthBiasClamp = 0.0f, | 622 | .depthBiasClamp = 0.0f, |
| 333 | .depthBiasSlopeFactor = 0.0f, | 623 | .depthBiasSlopeFactor = 0.0f, |
| 334 | .lineWidth = 1.0f, | 624 | .lineWidth = 1.0f, |
| 335 | }; | 625 | }; |
| 626 | VkPipelineRasterizationLineStateCreateInfoEXT line_state{ | ||
| 627 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, | ||
| 628 | .pNext = nullptr, | ||
| 629 | .lineRasterizationMode = key.state.smooth_lines != 0 | ||
| 630 | ? VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT | ||
| 631 | : VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT, | ||
| 632 | .stippledLineEnable = VK_FALSE, // TODO | ||
| 633 | .lineStippleFactor = 0, | ||
| 634 | .lineStipplePattern = 0, | ||
| 635 | }; | ||
| 636 | VkPipelineRasterizationConservativeStateCreateInfoEXT conservative_raster{ | ||
| 637 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, | ||
| 638 | .pNext = nullptr, | ||
| 639 | .flags = 0, | ||
| 640 | .conservativeRasterizationMode = key.state.conservative_raster_enable != 0 | ||
| 641 | ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT | ||
| 642 | : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT, | ||
| 643 | .extraPrimitiveOverestimationSize = 0.0f, | ||
| 644 | }; | ||
| 645 | VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_vertex{ | ||
| 646 | .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, | ||
| 647 | .pNext = nullptr, | ||
| 648 | .provokingVertexMode = key.state.provoking_vertex_last != 0 | ||
| 649 | ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT | ||
| 650 | : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT, | ||
| 651 | }; | ||
| 652 | if (IsLine(input_assembly_topology) && device.IsExtLineRasterizationSupported()) { | ||
| 653 | line_state.pNext = std::exchange(rasterization_ci.pNext, &line_state); | ||
| 654 | } | ||
| 655 | if (device.IsExtConservativeRasterizationSupported()) { | ||
| 656 | conservative_raster.pNext = std::exchange(rasterization_ci.pNext, &conservative_raster); | ||
| 657 | } | ||
| 658 | if (device.IsExtProvokingVertexSupported()) { | ||
| 659 | provoking_vertex.pNext = std::exchange(rasterization_ci.pNext, &provoking_vertex); | ||
| 660 | } | ||
| 336 | 661 | ||
| 337 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ | 662 | const VkPipelineMultisampleStateCreateInfo multisample_ci{ |
| 338 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, | 663 | .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, |
| 339 | .pNext = nullptr, | 664 | .pNext = nullptr, |
| 340 | .flags = 0, | 665 | .flags = 0, |
| 341 | .rasterizationSamples = ConvertMsaaMode(state.msaa_mode), | 666 | .rasterizationSamples = MaxwellToVK::MsaaMode(key.state.msaa_mode), |
| 342 | .sampleShadingEnable = VK_FALSE, | 667 | .sampleShadingEnable = VK_FALSE, |
| 343 | .minSampleShading = 0.0f, | 668 | .minSampleShading = 0.0f, |
| 344 | .pSampleMask = nullptr, | 669 | .pSampleMask = nullptr, |
| 345 | .alphaToCoverageEnable = VK_FALSE, | 670 | .alphaToCoverageEnable = VK_FALSE, |
| 346 | .alphaToOneEnable = VK_FALSE, | 671 | .alphaToOneEnable = VK_FALSE, |
| 347 | }; | 672 | }; |
| 348 | |||
| 349 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ | 673 | const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ |
| 350 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, | 674 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, |
| 351 | .pNext = nullptr, | 675 | .pNext = nullptr, |
| @@ -355,32 +679,32 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 355 | .depthCompareOp = dynamic.depth_test_enable | 679 | .depthCompareOp = dynamic.depth_test_enable |
| 356 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) | 680 | ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc()) |
| 357 | : VK_COMPARE_OP_ALWAYS, | 681 | : VK_COMPARE_OP_ALWAYS, |
| 358 | .depthBoundsTestEnable = dynamic.depth_bounds_enable, | 682 | .depthBoundsTestEnable = dynamic.depth_bounds_enable && device.IsDepthBoundsSupported(), |
| 359 | .stencilTestEnable = dynamic.stencil_enable, | 683 | .stencilTestEnable = dynamic.stencil_enable, |
| 360 | .front = GetStencilFaceState(dynamic.front), | 684 | .front = GetStencilFaceState(dynamic.front), |
| 361 | .back = GetStencilFaceState(dynamic.back), | 685 | .back = GetStencilFaceState(dynamic.back), |
| 362 | .minDepthBounds = 0.0f, | 686 | .minDepthBounds = 0.0f, |
| 363 | .maxDepthBounds = 0.0f, | 687 | .maxDepthBounds = 0.0f, |
| 364 | }; | 688 | }; |
| 365 | 689 | if (dynamic.depth_bounds_enable && !device.IsDepthBoundsSupported()) { | |
| 366 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 690 | LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); |
| 367 | for (std::size_t index = 0; index < num_color_buffers; ++index) { | 691 | } |
| 368 | static constexpr std::array COMPONENT_TABLE{ | 692 | static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; |
| 693 | const size_t num_attachments{NumAttachments(key.state)}; | ||
| 694 | for (size_t index = 0; index < num_attachments; ++index) { | ||
| 695 | static constexpr std::array mask_table{ | ||
| 369 | VK_COLOR_COMPONENT_R_BIT, | 696 | VK_COLOR_COMPONENT_R_BIT, |
| 370 | VK_COLOR_COMPONENT_G_BIT, | 697 | VK_COLOR_COMPONENT_G_BIT, |
| 371 | VK_COLOR_COMPONENT_B_BIT, | 698 | VK_COLOR_COMPONENT_B_BIT, |
| 372 | VK_COLOR_COMPONENT_A_BIT, | 699 | VK_COLOR_COMPONENT_A_BIT, |
| 373 | }; | 700 | }; |
| 374 | const auto& blend = state.attachments[index]; | 701 | const auto& blend{key.state.attachments[index]}; |
| 375 | 702 | const std::array mask{blend.Mask()}; | |
| 376 | VkColorComponentFlags color_components = 0; | 703 | VkColorComponentFlags write_mask{}; |
| 377 | for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { | 704 | for (size_t i = 0; i < mask_table.size(); ++i) { |
| 378 | if (blend.Mask()[i]) { | 705 | write_mask |= mask[i] ? mask_table[i] : 0; |
| 379 | color_components |= COMPONENT_TABLE[i]; | ||
| 380 | } | ||
| 381 | } | 706 | } |
| 382 | 707 | cb_attachments.push_back({ | |
| 383 | cb_attachments[index] = { | ||
| 384 | .blendEnable = blend.enable != 0, | 708 | .blendEnable = blend.enable != 0, |
| 385 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), | 709 | .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()), |
| 386 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), | 710 | .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()), |
| @@ -388,28 +712,27 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 388 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), | 712 | .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()), |
| 389 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), | 713 | .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()), |
| 390 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), | 714 | .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()), |
| 391 | .colorWriteMask = color_components, | 715 | .colorWriteMask = write_mask, |
| 392 | }; | 716 | }); |
| 393 | } | 717 | } |
| 394 | |||
| 395 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ | 718 | const VkPipelineColorBlendStateCreateInfo color_blend_ci{ |
| 396 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, | 719 | .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, |
| 397 | .pNext = nullptr, | 720 | .pNext = nullptr, |
| 398 | .flags = 0, | 721 | .flags = 0, |
| 399 | .logicOpEnable = VK_FALSE, | 722 | .logicOpEnable = VK_FALSE, |
| 400 | .logicOp = VK_LOGIC_OP_COPY, | 723 | .logicOp = VK_LOGIC_OP_COPY, |
| 401 | .attachmentCount = num_color_buffers, | 724 | .attachmentCount = static_cast<u32>(cb_attachments.size()), |
| 402 | .pAttachments = cb_attachments.data(), | 725 | .pAttachments = cb_attachments.data(), |
| 403 | .blendConstants = {}, | 726 | .blendConstants = {}, |
| 404 | }; | 727 | }; |
| 405 | 728 | static_vector<VkDynamicState, 19> dynamic_states{ | |
| 406 | std::vector dynamic_states{ | ||
| 407 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, | 729 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, |
| 408 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, | 730 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, |
| 409 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, | 731 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, |
| 410 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, | 732 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE, |
| 733 | VK_DYNAMIC_STATE_LINE_WIDTH, | ||
| 411 | }; | 734 | }; |
| 412 | if (device.IsExtExtendedDynamicStateSupported()) { | 735 | if (key.state.extended_dynamic_state) { |
| 413 | static constexpr std::array extended{ | 736 | static constexpr std::array extended{ |
| 414 | VK_DYNAMIC_STATE_CULL_MODE_EXT, | 737 | VK_DYNAMIC_STATE_CULL_MODE_EXT, |
| 415 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, | 738 | VK_DYNAMIC_STATE_FRONT_FACE_EXT, |
| @@ -421,9 +744,11 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 421 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, | 744 | VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT, |
| 422 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, | 745 | VK_DYNAMIC_STATE_STENCIL_OP_EXT, |
| 423 | }; | 746 | }; |
| 747 | if (key.state.dynamic_vertex_input) { | ||
| 748 | dynamic_states.push_back(VK_DYNAMIC_STATE_VERTEX_INPUT_EXT); | ||
| 749 | } | ||
| 424 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); | 750 | dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end()); |
| 425 | } | 751 | } |
| 426 | |||
| 427 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ | 752 | const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ |
| 428 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, | 753 | .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, |
| 429 | .pNext = nullptr, | 754 | .pNext = nullptr, |
| @@ -431,34 +756,33 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 431 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), | 756 | .dynamicStateCount = static_cast<u32>(dynamic_states.size()), |
| 432 | .pDynamicStates = dynamic_states.data(), | 757 | .pDynamicStates = dynamic_states.data(), |
| 433 | }; | 758 | }; |
| 434 | 759 | [[maybe_unused]] const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | |
| 435 | const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||
| 436 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | 760 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, |
| 437 | .pNext = nullptr, | 761 | .pNext = nullptr, |
| 438 | .requiredSubgroupSize = GuestWarpSize, | 762 | .requiredSubgroupSize = GuestWarpSize, |
| 439 | }; | 763 | }; |
| 440 | 764 | static_vector<VkPipelineShaderStageCreateInfo, 5> shader_stages; | |
| 441 | std::vector<VkPipelineShaderStageCreateInfo> shader_stages; | 765 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 442 | std::size_t module_index = 0; | 766 | if (!spv_modules[stage]) { |
| 443 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 444 | if (!program[stage]) { | ||
| 445 | continue; | 767 | continue; |
| 446 | } | 768 | } |
| 447 | 769 | [[maybe_unused]] auto& stage_ci = | |
| 448 | VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); | 770 | shader_stages.emplace_back(VkPipelineShaderStageCreateInfo{ |
| 449 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; | 771 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 450 | stage_ci.pNext = nullptr; | 772 | .pNext = nullptr, |
| 451 | stage_ci.flags = 0; | 773 | .flags = 0, |
| 452 | stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); | 774 | .stage = MaxwellToVK::ShaderStage(Shader::StageFromIndex(stage)), |
| 453 | stage_ci.module = *modules[module_index++]; | 775 | .module = *spv_modules[stage], |
| 454 | stage_ci.pName = "main"; | 776 | .pName = "main", |
| 455 | stage_ci.pSpecializationInfo = nullptr; | 777 | .pSpecializationInfo = nullptr, |
| 456 | 778 | }); | |
| 779 | /* | ||
| 457 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { | 780 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { |
| 458 | stage_ci.pNext = &subgroup_size_ci; | 781 | stage_ci.pNext = &subgroup_size_ci; |
| 459 | } | 782 | } |
| 783 | */ | ||
| 460 | } | 784 | } |
| 461 | return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{ | 785 | pipeline = device.GetLogical().CreateGraphicsPipeline({ |
| 462 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 786 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 463 | .pNext = nullptr, | 787 | .pNext = nullptr, |
| 464 | .flags = 0, | 788 | .flags = 0, |
| @@ -473,12 +797,31 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, | |||
| 473 | .pDepthStencilState = &depth_stencil_ci, | 797 | .pDepthStencilState = &depth_stencil_ci, |
| 474 | .pColorBlendState = &color_blend_ci, | 798 | .pColorBlendState = &color_blend_ci, |
| 475 | .pDynamicState = &dynamic_state_ci, | 799 | .pDynamicState = &dynamic_state_ci, |
| 476 | .layout = *layout, | 800 | .layout = *pipeline_layout, |
| 477 | .renderPass = renderpass, | 801 | .renderPass = render_pass, |
| 478 | .subpass = 0, | 802 | .subpass = 0, |
| 479 | .basePipelineHandle = nullptr, | 803 | .basePipelineHandle = nullptr, |
| 480 | .basePipelineIndex = 0, | 804 | .basePipelineIndex = 0, |
| 481 | }); | 805 | }); |
| 482 | } | 806 | } |
| 483 | 807 | ||
| 808 | void GraphicsPipeline::Validate() { | ||
| 809 | size_t num_images{}; | ||
| 810 | for (const auto& info : stage_infos) { | ||
| 811 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 812 | num_images += desc.count; | ||
| 813 | } | ||
| 814 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 815 | num_images += desc.count; | ||
| 816 | } | ||
| 817 | for (const auto& desc : info.texture_descriptors) { | ||
| 818 | num_images += desc.count; | ||
| 819 | } | ||
| 820 | for (const auto& desc : info.image_descriptors) { | ||
| 821 | num_images += desc.count; | ||
| 822 | } | ||
| 823 | } | ||
| 824 | ASSERT(num_images <= MAX_IMAGE_ELEMENTS); | ||
| 825 | } | ||
| 826 | |||
| 484 | } // namespace Vulkan | 827 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 8b6a98fe0..2bd48d697 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -1,30 +1,36 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 7 | #include <array> | 8 | #include <array> |
| 8 | #include <optional> | 9 | #include <atomic> |
| 9 | #include <vector> | 10 | #include <condition_variable> |
| 11 | #include <mutex> | ||
| 12 | #include <type_traits> | ||
| 10 | 13 | ||
| 11 | #include "common/common_types.h" | 14 | #include "common/thread_worker.h" |
| 15 | #include "shader_recompiler/shader_info.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 17 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 18 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 19 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 20 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 21 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 17 | 22 | ||
| 18 | namespace Vulkan { | 23 | namespace VideoCore { |
| 24 | class ShaderNotify; | ||
| 25 | } | ||
| 19 | 26 | ||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 27 | namespace Vulkan { |
| 21 | 28 | ||
| 22 | struct GraphicsPipelineCacheKey { | 29 | struct GraphicsPipelineCacheKey { |
| 23 | VkRenderPass renderpass; | 30 | std::array<u64, 6> unique_hashes; |
| 24 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | 31 | FixedPipelineState state; |
| 25 | FixedPipelineState fixed_state; | ||
| 26 | 32 | ||
| 27 | std::size_t Hash() const noexcept; | 33 | size_t Hash() const noexcept; |
| 28 | 34 | ||
| 29 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; | 35 | bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; |
| 30 | 36 | ||
| @@ -32,72 +38,115 @@ struct GraphicsPipelineCacheKey { | |||
| 32 | return !operator==(rhs); | 38 | return !operator==(rhs); |
| 33 | } | 39 | } |
| 34 | 40 | ||
| 35 | std::size_t Size() const noexcept { | 41 | size_t Size() const noexcept { |
| 36 | return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size(); | 42 | return sizeof(unique_hashes) + state.Size(); |
| 37 | } | 43 | } |
| 38 | }; | 44 | }; |
| 39 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | 45 | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); |
| 40 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | 46 | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); |
| 41 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | 47 | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); |
| 42 | 48 | ||
| 49 | } // namespace Vulkan | ||
| 50 | |||
| 51 | namespace std { | ||
| 52 | template <> | ||
| 53 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 54 | size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 55 | return k.Hash(); | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | } // namespace std | ||
| 59 | |||
| 60 | namespace Vulkan { | ||
| 61 | |||
| 43 | class Device; | 62 | class Device; |
| 44 | class VKDescriptorPool; | 63 | class RenderPassCache; |
| 45 | class VKScheduler; | 64 | class VKScheduler; |
| 46 | class VKUpdateDescriptorQueue; | 65 | class VKUpdateDescriptorQueue; |
| 47 | 66 | ||
| 48 | using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>; | 67 | class GraphicsPipeline { |
| 68 | static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||
| 49 | 69 | ||
| 50 | class VKGraphicsPipeline final { | ||
| 51 | public: | 70 | public: |
| 52 | explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_, | 71 | explicit GraphicsPipeline( |
| 53 | VKDescriptorPool& descriptor_pool, | 72 | Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, |
| 54 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 73 | VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, |
| 55 | const GraphicsPipelineCacheKey& key, | 74 | VideoCore::ShaderNotify* shader_notify, const Device& device, |
| 56 | vk::Span<VkDescriptorSetLayoutBinding> bindings, | 75 | DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, |
| 57 | const SPIRVProgram& program, u32 num_color_buffers); | 76 | Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, |
| 58 | ~VKGraphicsPipeline(); | 77 | const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, |
| 59 | 78 | const std::array<const Shader::Info*, NUM_STAGES>& infos); | |
| 60 | VkDescriptorSet CommitDescriptorSet(); | 79 | |
| 61 | 80 | GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; | |
| 62 | VkPipeline GetHandle() const { | 81 | GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; |
| 63 | return *pipeline; | 82 | |
| 83 | GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; | ||
| 84 | GraphicsPipeline(const GraphicsPipeline&) = delete; | ||
| 85 | |||
| 86 | void AddTransition(GraphicsPipeline* transition); | ||
| 87 | |||
| 88 | void Configure(bool is_indexed) { | ||
| 89 | configure_func(this, is_indexed); | ||
| 64 | } | 90 | } |
| 65 | 91 | ||
| 66 | VkPipelineLayout GetLayout() const { | 92 | [[nodiscard]] GraphicsPipeline* Next(const GraphicsPipelineCacheKey& current_key) noexcept { |
| 67 | return *layout; | 93 | if (key == current_key) { |
| 94 | return this; | ||
| 95 | } | ||
| 96 | const auto it{std::find(transition_keys.begin(), transition_keys.end(), current_key)}; | ||
| 97 | return it != transition_keys.end() ? transitions[std::distance(transition_keys.begin(), it)] | ||
| 98 | : nullptr; | ||
| 68 | } | 99 | } |
| 69 | 100 | ||
| 70 | GraphicsPipelineCacheKey GetCacheKey() const { | 101 | [[nodiscard]] bool IsBuilt() const noexcept { |
| 71 | return cache_key; | 102 | return is_built.load(std::memory_order::relaxed); |
| 72 | } | 103 | } |
| 73 | 104 | ||
| 74 | private: | 105 | template <typename Spec> |
| 75 | vk::DescriptorSetLayout CreateDescriptorSetLayout( | 106 | static auto MakeConfigureSpecFunc() { |
| 76 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const; | 107 | return [](GraphicsPipeline* pl, bool is_indexed) { pl->ConfigureImpl<Spec>(is_indexed); }; |
| 108 | } | ||
| 77 | 109 | ||
| 78 | vk::PipelineLayout CreatePipelineLayout() const; | 110 | private: |
| 111 | template <typename Spec> | ||
| 112 | void ConfigureImpl(bool is_indexed); | ||
| 79 | 113 | ||
| 80 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( | 114 | void ConfigureDraw(); |
| 81 | const SPIRVProgram& program) const; | ||
| 82 | 115 | ||
| 83 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | 116 | void MakePipeline(VkRenderPass render_pass); |
| 84 | 117 | ||
| 85 | vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass, | 118 | void Validate(); |
| 86 | u32 num_color_buffers) const; | ||
| 87 | 119 | ||
| 120 | const GraphicsPipelineCacheKey key; | ||
| 121 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 122 | Tegra::MemoryManager& gpu_memory; | ||
| 88 | const Device& device; | 123 | const Device& device; |
| 124 | TextureCache& texture_cache; | ||
| 125 | BufferCache& buffer_cache; | ||
| 89 | VKScheduler& scheduler; | 126 | VKScheduler& scheduler; |
| 90 | const GraphicsPipelineCacheKey cache_key; | 127 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 91 | const u64 hash; | 128 | |
| 129 | void (*configure_func)(GraphicsPipeline*, bool){}; | ||
| 130 | |||
| 131 | std::vector<GraphicsPipelineCacheKey> transition_keys; | ||
| 132 | std::vector<GraphicsPipeline*> transitions; | ||
| 133 | |||
| 134 | std::array<vk::ShaderModule, NUM_STAGES> spv_modules; | ||
| 135 | |||
| 136 | std::array<Shader::Info, NUM_STAGES> stage_infos; | ||
| 137 | std::array<u32, 5> enabled_uniform_buffer_masks{}; | ||
| 138 | VideoCommon::UniformBufferSizes uniform_buffer_sizes{}; | ||
| 92 | 139 | ||
| 93 | vk::DescriptorSetLayout descriptor_set_layout; | 140 | vk::DescriptorSetLayout descriptor_set_layout; |
| 94 | DescriptorAllocator descriptor_allocator; | 141 | DescriptorAllocator descriptor_allocator; |
| 95 | VKUpdateDescriptorQueue& update_descriptor_queue; | 142 | vk::PipelineLayout pipeline_layout; |
| 96 | vk::PipelineLayout layout; | 143 | vk::DescriptorUpdateTemplateKHR descriptor_update_template; |
| 97 | vk::DescriptorUpdateTemplateKHR descriptor_template; | ||
| 98 | std::vector<vk::ShaderModule> modules; | ||
| 99 | |||
| 100 | vk::Pipeline pipeline; | 144 | vk::Pipeline pipeline; |
| 145 | |||
| 146 | std::condition_variable build_condvar; | ||
| 147 | std::mutex build_mutex; | ||
| 148 | std::atomic_bool is_built{false}; | ||
| 149 | bool uses_push_descriptor{false}; | ||
| 101 | }; | 150 | }; |
| 102 | 151 | ||
| 103 | } // namespace Vulkan | 152 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index ee3cd35d0..4f8688118 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -39,9 +39,9 @@ public: | |||
| 39 | return KnownGpuTick() >= tick; | 39 | return KnownGpuTick() >= tick; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /// Advance to the logical tick. | 42 | /// Advance to the logical tick and return the old one |
| 43 | void NextTick() noexcept { | 43 | [[nodiscard]] u64 NextTick() noexcept { |
| 44 | ++current_tick; | 44 | return current_tick.fetch_add(1, std::memory_order::relaxed); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | /// Refresh the known GPU tick | 47 | /// Refresh the known GPU tick |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 8991505ca..57b163247 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -4,444 +4,613 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <fstream> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 9 | #include <thread> | ||
| 8 | #include <vector> | 10 | #include <vector> |
| 9 | 11 | ||
| 10 | #include "common/bit_cast.h" | 12 | #include "common/bit_cast.h" |
| 11 | #include "common/cityhash.h" | 13 | #include "common/cityhash.h" |
| 14 | #include "common/fs/fs.h" | ||
| 15 | #include "common/fs/path_util.h" | ||
| 12 | #include "common/microprofile.h" | 16 | #include "common/microprofile.h" |
| 17 | #include "common/thread_worker.h" | ||
| 13 | #include "core/core.h" | 18 | #include "core/core.h" |
| 14 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 21 | #include "shader_recompiler/environment.h" | ||
| 22 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 23 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||
| 24 | #include "shader_recompiler/program_header.h" | ||
| 25 | #include "video_core/dirty_flags.h" | ||
| 15 | #include "video_core/engines/kepler_compute.h" | 26 | #include "video_core/engines/kepler_compute.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 27 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/memory_manager.h" | 28 | #include "video_core/memory_manager.h" |
| 18 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 29 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 19 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 30 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 31 | #include "video_core/renderer_vulkan/pipeline_helper.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 32 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 33 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 34 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 35 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 36 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 37 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 38 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 27 | #include "video_core/shader/compiler_settings.h" | ||
| 28 | #include "video_core/shader/memory_util.h" | ||
| 29 | #include "video_core/shader_cache.h" | 39 | #include "video_core/shader_cache.h" |
| 40 | #include "video_core/shader_environment.h" | ||
| 30 | #include "video_core/shader_notify.h" | 41 | #include "video_core/shader_notify.h" |
| 31 | #include "video_core/vulkan_common/vulkan_device.h" | 42 | #include "video_core/vulkan_common/vulkan_device.h" |
| 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 43 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 33 | 44 | ||
| 34 | namespace Vulkan { | 45 | namespace Vulkan { |
| 35 | |||
| 36 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 46 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 37 | 47 | ||
| 38 | using Tegra::Engines::ShaderType; | ||
| 39 | using VideoCommon::Shader::GetShaderAddress; | ||
| 40 | using VideoCommon::Shader::GetShaderCode; | ||
| 41 | using VideoCommon::Shader::KERNEL_MAIN_OFFSET; | ||
| 42 | using VideoCommon::Shader::ProgramCode; | ||
| 43 | using VideoCommon::Shader::STAGE_MAIN_OFFSET; | ||
| 44 | |||
| 45 | namespace { | 48 | namespace { |
| 49 | using Shader::Backend::SPIRV::EmitSPIRV; | ||
| 50 | using Shader::Maxwell::MergeDualVertexPrograms; | ||
| 51 | using Shader::Maxwell::TranslateProgram; | ||
| 52 | using VideoCommon::ComputeEnvironment; | ||
| 53 | using VideoCommon::FileEnvironment; | ||
| 54 | using VideoCommon::GenericEnvironment; | ||
| 55 | using VideoCommon::GraphicsEnvironment; | ||
| 56 | |||
| 57 | constexpr u32 CACHE_VERSION = 5; | ||
| 58 | |||
| 59 | template <typename Container> | ||
| 60 | auto MakeSpan(Container& container) { | ||
| 61 | return std::span(container.data(), container.size()); | ||
| 62 | } | ||
| 46 | 63 | ||
| 47 | constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; | 64 | Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { |
| 48 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | 65 | switch (comparison) { |
| 49 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; | 66 | case Maxwell::ComparisonOp::Never: |
| 50 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | 67 | case Maxwell::ComparisonOp::NeverOld: |
| 51 | constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; | 68 | return Shader::CompareFunction::Never; |
| 52 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | 69 | case Maxwell::ComparisonOp::Less: |
| 53 | 70 | case Maxwell::ComparisonOp::LessOld: | |
| 54 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 71 | return Shader::CompareFunction::Less; |
| 55 | .depth = VideoCommon::Shader::CompileDepth::FullDecompile, | 72 | case Maxwell::ComparisonOp::Equal: |
| 56 | .disable_else_derivation = true, | 73 | case Maxwell::ComparisonOp::EqualOld: |
| 57 | }; | 74 | return Shader::CompareFunction::Equal; |
| 58 | 75 | case Maxwell::ComparisonOp::LessEqual: | |
| 59 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | 76 | case Maxwell::ComparisonOp::LessEqualOld: |
| 60 | return program == 0 ? 0 : program - 1; | 77 | return Shader::CompareFunction::LessThanEqual; |
| 78 | case Maxwell::ComparisonOp::Greater: | ||
| 79 | case Maxwell::ComparisonOp::GreaterOld: | ||
| 80 | return Shader::CompareFunction::Greater; | ||
| 81 | case Maxwell::ComparisonOp::NotEqual: | ||
| 82 | case Maxwell::ComparisonOp::NotEqualOld: | ||
| 83 | return Shader::CompareFunction::NotEqual; | ||
| 84 | case Maxwell::ComparisonOp::GreaterEqual: | ||
| 85 | case Maxwell::ComparisonOp::GreaterEqualOld: | ||
| 86 | return Shader::CompareFunction::GreaterThanEqual; | ||
| 87 | case Maxwell::ComparisonOp::Always: | ||
| 88 | case Maxwell::ComparisonOp::AlwaysOld: | ||
| 89 | return Shader::CompareFunction::Always; | ||
| 90 | } | ||
| 91 | UNIMPLEMENTED_MSG("Unimplemented comparison op={}", comparison); | ||
| 92 | return {}; | ||
| 61 | } | 93 | } |
| 62 | 94 | ||
| 63 | constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | 95 | Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { |
| 64 | return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); | 96 | if (attr.enabled == 0) { |
| 97 | return Shader::AttributeType::Disabled; | ||
| 98 | } | ||
| 99 | switch (attr.Type()) { | ||
| 100 | case Maxwell::VertexAttribute::Type::SignedNorm: | ||
| 101 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 102 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 103 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 104 | case Maxwell::VertexAttribute::Type::Float: | ||
| 105 | return Shader::AttributeType::Float; | ||
| 106 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 107 | return Shader::AttributeType::SignedInt; | ||
| 108 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 109 | return Shader::AttributeType::UnsignedInt; | ||
| 110 | } | ||
| 111 | return Shader::AttributeType::Float; | ||
| 65 | } | 112 | } |
| 66 | 113 | ||
| 67 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | 114 | Shader::AttributeType AttributeType(const FixedPipelineState& state, size_t index) { |
| 68 | switch (program) { | 115 | switch (state.DynamicAttributeType(index)) { |
| 69 | case Maxwell::ShaderProgram::VertexB: | 116 | case 0: |
| 70 | return ShaderType::Vertex; | 117 | return Shader::AttributeType::Disabled; |
| 71 | case Maxwell::ShaderProgram::TesselationControl: | 118 | case 1: |
| 72 | return ShaderType::TesselationControl; | 119 | return Shader::AttributeType::Float; |
| 73 | case Maxwell::ShaderProgram::TesselationEval: | 120 | case 2: |
| 74 | return ShaderType::TesselationEval; | 121 | return Shader::AttributeType::SignedInt; |
| 75 | case Maxwell::ShaderProgram::Geometry: | 122 | case 3: |
| 76 | return ShaderType::Geometry; | 123 | return Shader::AttributeType::UnsignedInt; |
| 77 | case Maxwell::ShaderProgram::Fragment: | ||
| 78 | return ShaderType::Fragment; | ||
| 79 | default: | ||
| 80 | UNIMPLEMENTED_MSG("program={}", program); | ||
| 81 | return ShaderType::Vertex; | ||
| 82 | } | 124 | } |
| 125 | return Shader::AttributeType::Disabled; | ||
| 83 | } | 126 | } |
| 84 | 127 | ||
| 85 | template <VkDescriptorType descriptor_type, class Container> | 128 | Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> programs, |
| 86 | void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, | 129 | const GraphicsPipelineCacheKey& key, |
| 87 | VkShaderStageFlags stage_flags, const Container& container) { | 130 | const Shader::IR::Program& program, |
| 88 | const u32 num_entries = static_cast<u32>(std::size(container)); | 131 | const Shader::IR::Program* previous_program) { |
| 89 | for (std::size_t i = 0; i < num_entries; ++i) { | 132 | Shader::RuntimeInfo info; |
| 90 | u32 count = 1; | 133 | if (previous_program) { |
| 91 | if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { | 134 | info.previous_stage_stores = previous_program->info.stores; |
| 92 | // Combined image samplers can be arrayed. | 135 | if (previous_program->is_geometry_passthrough) { |
| 93 | count = container[i].size; | 136 | info.previous_stage_stores.mask |= previous_program->info.passthrough.mask; |
| 94 | } | 137 | } |
| 95 | bindings.push_back({ | 138 | } else { |
| 96 | .binding = binding++, | 139 | info.previous_stage_stores.mask.set(); |
| 97 | .descriptorType = descriptor_type, | 140 | } |
| 98 | .descriptorCount = count, | 141 | const Shader::Stage stage{program.stage}; |
| 99 | .stageFlags = stage_flags, | 142 | const bool has_geometry{key.unique_hashes[4] != 0 && !programs[4].is_geometry_passthrough}; |
| 100 | .pImmutableSamplers = nullptr, | 143 | const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; |
| 101 | }); | 144 | const float point_size{Common::BitCast<float>(key.state.point_size)}; |
| 145 | switch (stage) { | ||
| 146 | case Shader::Stage::VertexB: | ||
| 147 | if (!has_geometry) { | ||
| 148 | if (key.state.topology == Maxwell::PrimitiveTopology::Points) { | ||
| 149 | info.fixed_state_point_size = point_size; | ||
| 150 | } | ||
| 151 | if (key.state.xfb_enabled) { | ||
| 152 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 153 | } | ||
| 154 | info.convert_depth_mode = gl_ndc; | ||
| 155 | } | ||
| 156 | if (key.state.dynamic_vertex_input) { | ||
| 157 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 158 | info.generic_input_types[index] = AttributeType(key.state, index); | ||
| 159 | } | ||
| 160 | } else { | ||
| 161 | std::ranges::transform(key.state.attributes, info.generic_input_types.begin(), | ||
| 162 | &CastAttributeType); | ||
| 163 | } | ||
| 164 | break; | ||
| 165 | case Shader::Stage::TessellationEval: | ||
| 166 | // We have to flip tessellation clockwise for some reason... | ||
| 167 | info.tess_clockwise = key.state.tessellation_clockwise == 0; | ||
| 168 | info.tess_primitive = [&key] { | ||
| 169 | const u32 raw{key.state.tessellation_primitive.Value()}; | ||
| 170 | switch (static_cast<Maxwell::TessellationPrimitive>(raw)) { | ||
| 171 | case Maxwell::TessellationPrimitive::Isolines: | ||
| 172 | return Shader::TessPrimitive::Isolines; | ||
| 173 | case Maxwell::TessellationPrimitive::Triangles: | ||
| 174 | return Shader::TessPrimitive::Triangles; | ||
| 175 | case Maxwell::TessellationPrimitive::Quads: | ||
| 176 | return Shader::TessPrimitive::Quads; | ||
| 177 | } | ||
| 178 | UNREACHABLE(); | ||
| 179 | return Shader::TessPrimitive::Triangles; | ||
| 180 | }(); | ||
| 181 | info.tess_spacing = [&] { | ||
| 182 | const u32 raw{key.state.tessellation_spacing}; | ||
| 183 | switch (static_cast<Maxwell::TessellationSpacing>(raw)) { | ||
| 184 | case Maxwell::TessellationSpacing::Equal: | ||
| 185 | return Shader::TessSpacing::Equal; | ||
| 186 | case Maxwell::TessellationSpacing::FractionalOdd: | ||
| 187 | return Shader::TessSpacing::FractionalOdd; | ||
| 188 | case Maxwell::TessellationSpacing::FractionalEven: | ||
| 189 | return Shader::TessSpacing::FractionalEven; | ||
| 190 | } | ||
| 191 | UNREACHABLE(); | ||
| 192 | return Shader::TessSpacing::Equal; | ||
| 193 | }(); | ||
| 194 | break; | ||
| 195 | case Shader::Stage::Geometry: | ||
| 196 | if (program.output_topology == Shader::OutputTopology::PointList) { | ||
| 197 | info.fixed_state_point_size = point_size; | ||
| 198 | } | ||
| 199 | if (key.state.xfb_enabled != 0) { | ||
| 200 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 201 | } | ||
| 202 | info.convert_depth_mode = gl_ndc; | ||
| 203 | break; | ||
| 204 | case Shader::Stage::Fragment: | ||
| 205 | info.alpha_test_func = MaxwellToCompareFunction( | ||
| 206 | key.state.UnpackComparisonOp(key.state.alpha_test_func.Value())); | ||
| 207 | info.alpha_test_reference = Common::BitCast<float>(key.state.alpha_test_ref); | ||
| 208 | break; | ||
| 209 | default: | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | switch (key.state.topology) { | ||
| 213 | case Maxwell::PrimitiveTopology::Points: | ||
| 214 | info.input_topology = Shader::InputTopology::Points; | ||
| 215 | break; | ||
| 216 | case Maxwell::PrimitiveTopology::Lines: | ||
| 217 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 218 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 219 | info.input_topology = Shader::InputTopology::Lines; | ||
| 220 | break; | ||
| 221 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 222 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 223 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 224 | case Maxwell::PrimitiveTopology::Quads: | ||
| 225 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 226 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 227 | case Maxwell::PrimitiveTopology::Patches: | ||
| 228 | info.input_topology = Shader::InputTopology::Triangles; | ||
| 229 | break; | ||
| 230 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 231 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 232 | info.input_topology = Shader::InputTopology::LinesAdjacency; | ||
| 233 | break; | ||
| 234 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 235 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 236 | info.input_topology = Shader::InputTopology::TrianglesAdjacency; | ||
| 237 | break; | ||
| 102 | } | 238 | } |
| 239 | info.force_early_z = key.state.early_z != 0; | ||
| 240 | info.y_negate = key.state.y_negate != 0; | ||
| 241 | return info; | ||
| 103 | } | 242 | } |
| 243 | } // Anonymous namespace | ||
| 104 | 244 | ||
| 105 | u32 FillDescriptorLayout(const ShaderEntries& entries, | 245 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| 106 | std::vector<VkDescriptorSetLayoutBinding>& bindings, | 246 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 107 | Maxwell::ShaderProgram program_type, u32 base_binding) { | 247 | return static_cast<size_t>(hash); |
| 108 | const ShaderType stage = GetStageFromProgram(program_type); | ||
| 109 | const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); | ||
| 110 | |||
| 111 | u32 binding = base_binding; | ||
| 112 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); | ||
| 113 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); | ||
| 114 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); | ||
| 115 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); | ||
| 116 | AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); | ||
| 117 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); | ||
| 118 | return binding; | ||
| 119 | } | 248 | } |
| 120 | 249 | ||
| 121 | } // Anonymous namespace | 250 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { |
| 251 | return std::memcmp(&rhs, this, sizeof *this) == 0; | ||
| 252 | } | ||
| 122 | 253 | ||
| 123 | std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { | 254 | size_t GraphicsPipelineCacheKey::Hash() const noexcept { |
| 124 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); | 255 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size()); |
| 125 | return static_cast<std::size_t>(hash); | 256 | return static_cast<size_t>(hash); |
| 126 | } | 257 | } |
| 127 | 258 | ||
| 128 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { | 259 | bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { |
| 129 | return std::memcmp(&rhs, this, Size()) == 0; | 260 | return std::memcmp(&rhs, this, Size()) == 0; |
| 130 | } | 261 | } |
| 131 | 262 | ||
| 132 | std::size_t ComputePipelineCacheKey::Hash() const noexcept { | 263 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 133 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 264 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 134 | return static_cast<std::size_t>(hash); | 265 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 135 | } | 266 | VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 136 | 267 | VKUpdateDescriptorQueue& update_descriptor_queue_, | |
| 137 | bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { | 268 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 138 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 269 | TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_) |
| 270 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, | ||
| 271 | device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, | ||
| 272 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, | ||
| 273 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_}, | ||
| 274 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, | ||
| 275 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), | ||
| 276 | serialization_thread(1, "yuzu:PipelineSerialization") { | ||
| 277 | const auto& float_control{device.FloatControlProperties()}; | ||
| 278 | const VkDriverIdKHR driver_id{device.GetDriverID()}; | ||
| 279 | profile = Shader::Profile{ | ||
| 280 | .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, | ||
| 281 | .unified_descriptor_binding = true, | ||
| 282 | .support_descriptor_aliasing = true, | ||
| 283 | .support_int8 = true, | ||
| 284 | .support_int16 = device.IsShaderInt16Supported(), | ||
| 285 | .support_int64 = device.IsShaderInt64Supported(), | ||
| 286 | .support_vertex_instance_id = false, | ||
| 287 | .support_float_controls = true, | ||
| 288 | .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == | ||
| 289 | VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 290 | .support_separate_rounding_mode = | ||
| 291 | float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||
| 292 | .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, | ||
| 293 | .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, | ||
| 294 | .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, | ||
| 295 | .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, | ||
| 296 | .support_fp16_signed_zero_nan_preserve = | ||
| 297 | float_control.shaderSignedZeroInfNanPreserveFloat16 != VK_FALSE, | ||
| 298 | .support_fp32_signed_zero_nan_preserve = | ||
| 299 | float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE, | ||
| 300 | .support_fp64_signed_zero_nan_preserve = | ||
| 301 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, | ||
| 302 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), | ||
| 303 | .support_vote = true, | ||
| 304 | .support_viewport_index_layer_non_geometry = | ||
| 305 | device.IsExtShaderViewportIndexLayerSupported(), | ||
| 306 | .support_viewport_mask = device.IsNvViewportArray2Supported(), | ||
| 307 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), | ||
| 308 | .support_demote_to_helper_invocation = true, | ||
| 309 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), | ||
| 310 | .support_derivative_control = true, | ||
| 311 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | ||
| 312 | |||
| 313 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), | ||
| 314 | |||
| 315 | .lower_left_origin_mode = false, | ||
| 316 | .need_declared_frag_colors = false, | ||
| 317 | |||
| 318 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, | ||
| 319 | .has_broken_unsigned_image_offsets = false, | ||
| 320 | .has_broken_signed_operations = false, | ||
| 321 | .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, | ||
| 322 | .ignore_nan_fp_comparisons = false, | ||
| 323 | }; | ||
| 324 | host_info = Shader::HostTranslateInfo{ | ||
| 325 | .support_float16 = device.IsFloat16Supported(), | ||
| 326 | .support_int64 = device.IsShaderInt64Supported(), | ||
| 327 | }; | ||
| 139 | } | 328 | } |
| 140 | 329 | ||
| 141 | Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_, | 330 | PipelineCache::~PipelineCache() = default; |
| 142 | GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_) | ||
| 143 | : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_), | ||
| 144 | shader_ir(program_code, main_offset_, compiler_settings, registry), | ||
| 145 | entries(GenerateShaderEntries(shader_ir)) {} | ||
| 146 | |||
| 147 | Shader::~Shader() = default; | ||
| 148 | |||
| 149 | VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | ||
| 150 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 151 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 152 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||
| 153 | VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, | ||
| 154 | VKUpdateDescriptorQueue& update_descriptor_queue_) | ||
| 155 | : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | ||
| 156 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | ||
| 157 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{ | ||
| 158 | update_descriptor_queue_} {} | ||
| 159 | |||
| 160 | VKPipelineCache::~VKPipelineCache() = default; | ||
| 161 | 331 | ||
| 162 | std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | 332 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { |
| 163 | std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; | 333 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 164 | |||
| 165 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 166 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 167 | |||
| 168 | // Skip stages that are not enabled | ||
| 169 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 170 | continue; | ||
| 171 | } | ||
| 172 | |||
| 173 | const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; | ||
| 174 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 175 | ASSERT(cpu_addr); | ||
| 176 | |||
| 177 | Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 178 | if (!result) { | ||
| 179 | const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; | ||
| 180 | |||
| 181 | // No shader found - create a new one | ||
| 182 | static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; | ||
| 183 | const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); | ||
| 184 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); | ||
| 185 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 186 | |||
| 187 | auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, | ||
| 188 | std::move(code), stage_offset); | ||
| 189 | result = shader.get(); | ||
| 190 | 334 | ||
| 191 | if (cpu_addr) { | 335 | if (!RefreshStages(graphics_key.unique_hashes)) { |
| 192 | Register(std::move(shader), *cpu_addr, size_in_bytes); | 336 | current_pipeline = nullptr; |
| 193 | } else { | 337 | return nullptr; |
| 194 | null_shader = std::move(shader); | 338 | } |
| 195 | } | 339 | graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(), |
| 340 | device.IsExtVertexInputDynamicStateSupported()); | ||
| 341 | |||
| 342 | if (current_pipeline) { | ||
| 343 | GraphicsPipeline* const next{current_pipeline->Next(graphics_key)}; | ||
| 344 | if (next) { | ||
| 345 | current_pipeline = next; | ||
| 346 | return BuiltPipeline(current_pipeline); | ||
| 196 | } | 347 | } |
| 197 | shaders[index] = result; | ||
| 198 | } | 348 | } |
| 199 | return last_shaders = shaders; | 349 | return CurrentGraphicsPipelineSlowPath(); |
| 200 | } | 350 | } |
| 201 | 351 | ||
| 202 | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | 352 | ComputePipeline* PipelineCache::CurrentComputePipeline() { |
| 203 | const GraphicsPipelineCacheKey& key, u32 num_color_buffers, | ||
| 204 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 205 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 353 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 206 | 354 | ||
| 207 | if (last_graphics_pipeline && last_graphics_key == key) { | 355 | const ShaderInfo* const shader{ComputeShader()}; |
| 208 | return last_graphics_pipeline; | 356 | if (!shader) { |
| 209 | } | 357 | return nullptr; |
| 210 | last_graphics_key = key; | ||
| 211 | |||
| 212 | if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { | ||
| 213 | std::unique_lock lock{pipeline_cache}; | ||
| 214 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 215 | if (is_cache_miss) { | ||
| 216 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 217 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 218 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||
| 219 | async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | ||
| 220 | update_descriptor_queue, bindings, program, key, | ||
| 221 | num_color_buffers); | ||
| 222 | } | ||
| 223 | last_graphics_pipeline = pair->second.get(); | ||
| 224 | return last_graphics_pipeline; | ||
| 225 | } | 358 | } |
| 226 | 359 | const auto& qmd{kepler_compute.launch_description}; | |
| 227 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | 360 | const ComputePipelineCacheKey key{ |
| 228 | auto& entry = pair->second; | 361 | .unique_hash = shader->unique_hash, |
| 229 | if (is_cache_miss) { | 362 | .shared_memory_size = qmd.shared_alloc, |
| 230 | gpu.ShaderNotify().MarkSharderBuilding(); | 363 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, |
| 231 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 364 | }; |
| 232 | const auto [program, bindings] = DecompileShaders(key.fixed_state); | 365 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; |
| 233 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | 366 | auto& pipeline{pair->second}; |
| 234 | update_descriptor_queue, key, bindings, | 367 | if (!is_new) { |
| 235 | program, num_color_buffers); | 368 | return pipeline.get(); |
| 236 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 237 | } | 369 | } |
| 238 | last_graphics_pipeline = entry.get(); | 370 | pipeline = CreateComputePipeline(key, shader); |
| 239 | return last_graphics_pipeline; | 371 | return pipeline.get(); |
| 240 | } | 372 | } |
| 241 | 373 | ||
| 242 | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | 374 | void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 243 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 375 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 244 | 376 | if (title_id == 0) { | |
| 245 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | 377 | return; |
| 246 | auto& entry = pair->second; | ||
| 247 | if (!is_cache_miss) { | ||
| 248 | return *entry; | ||
| 249 | } | 378 | } |
| 250 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | 379 | const auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; |
| 251 | 380 | const auto base_dir{shader_dir / fmt::format("{:016x}", title_id)}; | |
| 252 | const GPUVAddr gpu_addr = key.shader; | 381 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir)) { |
| 253 | 382 | LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); | |
| 254 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 383 | return; |
| 255 | ASSERT(cpu_addr); | 384 | } |
| 385 | pipeline_cache_filename = base_dir / "vulkan.bin"; | ||
| 386 | |||
| 387 | struct { | ||
| 388 | std::mutex mutex; | ||
| 389 | size_t total{}; | ||
| 390 | size_t built{}; | ||
| 391 | bool has_loaded{}; | ||
| 392 | } state; | ||
| 393 | |||
| 394 | const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { | ||
| 395 | ComputePipelineCacheKey key; | ||
| 396 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 397 | |||
| 398 | workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { | ||
| 399 | ShaderPools pools; | ||
| 400 | auto pipeline{CreateComputePipeline(pools, key, env, false)}; | ||
| 401 | std::lock_guard lock{state.mutex}; | ||
| 402 | if (pipeline) { | ||
| 403 | compute_cache.emplace(key, std::move(pipeline)); | ||
| 404 | } | ||
| 405 | ++state.built; | ||
| 406 | if (state.has_loaded) { | ||
| 407 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 408 | } | ||
| 409 | }); | ||
| 410 | ++state.total; | ||
| 411 | }}; | ||
| 412 | const bool extended_dynamic_state = device.IsExtExtendedDynamicStateSupported(); | ||
| 413 | const bool dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(); | ||
| 414 | const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { | ||
| 415 | GraphicsPipelineCacheKey key; | ||
| 416 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 417 | |||
| 418 | if ((key.state.extended_dynamic_state != 0) != extended_dynamic_state || | ||
| 419 | (key.state.dynamic_vertex_input != 0) != dynamic_vertex_input) { | ||
| 420 | return; | ||
| 421 | } | ||
| 422 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 423 | ShaderPools pools; | ||
| 424 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 425 | for (auto& env : envs) { | ||
| 426 | env_ptrs.push_back(&env); | ||
| 427 | } | ||
| 428 | auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; | ||
| 256 | 429 | ||
| 257 | Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); | 430 | std::lock_guard lock{state.mutex}; |
| 258 | if (!shader) { | 431 | graphics_cache.emplace(key, std::move(pipeline)); |
| 259 | // No shader found - create a new one | 432 | ++state.built; |
| 260 | const auto host_ptr = gpu_memory.GetPointer(gpu_addr); | 433 | if (state.has_loaded) { |
| 434 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 435 | } | ||
| 436 | }); | ||
| 437 | ++state.total; | ||
| 438 | }}; | ||
| 439 | VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, CACHE_VERSION, load_compute, | ||
| 440 | load_graphics); | ||
| 261 | 441 | ||
| 262 | ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); | 442 | std::unique_lock lock{state.mutex}; |
| 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 443 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); |
| 444 | state.has_loaded = true; | ||
| 445 | lock.unlock(); | ||
| 264 | 446 | ||
| 265 | auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, | 447 | workers.WaitForRequests(); |
| 266 | *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); | 448 | } |
| 267 | shader = shader_info.get(); | ||
| 268 | 449 | ||
| 269 | if (cpu_addr) { | 450 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipelineSlowPath() { |
| 270 | Register(std::move(shader_info), *cpu_addr, size_in_bytes); | 451 | const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; |
| 271 | } else { | 452 | auto& pipeline{pair->second}; |
| 272 | null_kernel = std::move(shader_info); | 453 | if (is_new) { |
| 273 | } | 454 | pipeline = CreateGraphicsPipeline(); |
| 274 | } | 455 | } |
| 275 | 456 | if (!pipeline) { | |
| 276 | const Specialization specialization{ | 457 | return nullptr; |
| 277 | .base_binding = 0, | 458 | } |
| 278 | .workgroup_size = key.workgroup_size, | 459 | if (current_pipeline) { |
| 279 | .shared_memory_size = key.shared_memory_size, | 460 | current_pipeline->AddTransition(pipeline.get()); |
| 280 | .point_size = std::nullopt, | 461 | } |
| 281 | .enabled_attributes = {}, | 462 | current_pipeline = pipeline.get(); |
| 282 | .attribute_types = {}, | 463 | return BuiltPipeline(current_pipeline); |
| 283 | .ndc_minus_one_to_one = false, | ||
| 284 | }; | ||
| 285 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, | ||
| 286 | shader->GetRegistry(), specialization), | ||
| 287 | shader->GetEntries()}; | ||
| 288 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | ||
| 289 | update_descriptor_queue, spirv_shader); | ||
| 290 | return *entry; | ||
| 291 | } | 464 | } |
| 292 | 465 | ||
| 293 | void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { | 466 | GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const noexcept { |
| 294 | gpu.ShaderNotify().MarkShaderComplete(); | 467 | if (pipeline->IsBuilt()) { |
| 295 | std::unique_lock lock{pipeline_cache}; | 468 | return pipeline; |
| 296 | graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); | 469 | } |
| 470 | if (!use_asynchronous_shaders) { | ||
| 471 | return pipeline; | ||
| 472 | } | ||
| 473 | // If something is using depth, we can assume that games are not rendering anything which | ||
| 474 | // will be used one time. | ||
| 475 | if (maxwell3d.regs.zeta_enable) { | ||
| 476 | return nullptr; | ||
| 477 | } | ||
| 478 | // If games are using a small index count, we can assume these are full screen quads. | ||
| 479 | // Usually these shaders are only used once for building textures so we can assume they | ||
| 480 | // can't be built async | ||
| 481 | if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) { | ||
| 482 | return pipeline; | ||
| 483 | } | ||
| 484 | return nullptr; | ||
| 297 | } | 485 | } |
| 298 | 486 | ||
| 299 | void VKPipelineCache::OnShaderRemoval(Shader* shader) { | 487 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( |
| 300 | bool finished = false; | 488 | ShaderPools& pools, const GraphicsPipelineCacheKey& key, |
| 301 | const auto Finish = [&] { | 489 | std::span<Shader::Environment* const> envs, bool build_in_parallel) try { |
| 302 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | 490 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 303 | // flush. | 491 | size_t env_index{0}; |
| 304 | if (finished) { | 492 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 305 | return; | 493 | const bool uses_vertex_a{key.unique_hashes[0] != 0}; |
| 306 | } | 494 | const bool uses_vertex_b{key.unique_hashes[1] != 0}; |
| 307 | finished = true; | 495 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 308 | scheduler.Finish(); | 496 | if (key.unique_hashes[index] == 0) { |
| 309 | }; | ||
| 310 | |||
| 311 | const GPUVAddr invalidated_addr = shader->GetGpuAddr(); | ||
| 312 | for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { | ||
| 313 | auto& entry = it->first; | ||
| 314 | if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == | ||
| 315 | entry.shaders.end()) { | ||
| 316 | ++it; | ||
| 317 | continue; | 497 | continue; |
| 318 | } | 498 | } |
| 319 | Finish(); | 499 | Shader::Environment& env{*envs[env_index]}; |
| 320 | it = graphics_cache.erase(it); | 500 | ++env_index; |
| 501 | |||
| 502 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | ||
| 503 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | ||
| 504 | if (!uses_vertex_a || index != 1) { | ||
| 505 | // Normal path | ||
| 506 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); | ||
| 507 | } else { | ||
| 508 | // VertexB path when VertexA is present. | ||
| 509 | auto& program_va{programs[0]}; | ||
| 510 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||
| 511 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||
| 512 | } | ||
| 321 | } | 513 | } |
| 322 | for (auto it = compute_cache.begin(); it != compute_cache.end();) { | 514 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; |
| 323 | auto& entry = it->first; | 515 | std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; |
| 324 | if (entry.shader != invalidated_addr) { | 516 | |
| 325 | ++it; | 517 | const Shader::IR::Program* previous_stage{}; |
| 518 | Shader::Backend::Bindings binding; | ||
| 519 | for (size_t index = uses_vertex_a && uses_vertex_b ? 1 : 0; index < Maxwell::MaxShaderProgram; | ||
| 520 | ++index) { | ||
| 521 | if (key.unique_hashes[index] == 0) { | ||
| 326 | continue; | 522 | continue; |
| 327 | } | 523 | } |
| 328 | Finish(); | 524 | UNIMPLEMENTED_IF(index == 0); |
| 329 | it = compute_cache.erase(it); | 525 | |
| 526 | Shader::IR::Program& program{programs[index]}; | ||
| 527 | const size_t stage_index{index - 1}; | ||
| 528 | infos[stage_index] = &program.info; | ||
| 529 | |||
| 530 | const auto runtime_info{MakeRuntimeInfo(programs, key, program, previous_stage)}; | ||
| 531 | const std::vector<u32> code{EmitSPIRV(profile, runtime_info, program, binding)}; | ||
| 532 | device.SaveShader(code); | ||
| 533 | modules[stage_index] = BuildShader(device, code); | ||
| 534 | if (device.HasDebuggingToolAttached()) { | ||
| 535 | const std::string name{fmt::format("Shader {:016x}", key.unique_hashes[index])}; | ||
| 536 | modules[stage_index].SetObjectNameEXT(name.c_str()); | ||
| 537 | } | ||
| 538 | previous_stage = &program; | ||
| 330 | } | 539 | } |
| 540 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | ||
| 541 | return std::make_unique<GraphicsPipeline>( | ||
| 542 | maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device, | ||
| 543 | descriptor_pool, update_descriptor_queue, thread_worker, render_pass_cache, key, | ||
| 544 | std::move(modules), infos); | ||
| 545 | |||
| 546 | } catch (const Shader::Exception& exception) { | ||
| 547 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); | ||
| 548 | return nullptr; | ||
| 331 | } | 549 | } |
| 332 | 550 | ||
| 333 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | 551 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { |
| 334 | VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { | 552 | GraphicsEnvironments environments; |
| 335 | Specialization specialization; | 553 | GetGraphicsEnvironments(environments, graphics_key.unique_hashes); |
| 336 | if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { | ||
| 337 | float point_size; | ||
| 338 | std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); | ||
| 339 | specialization.point_size = point_size; | ||
| 340 | ASSERT(point_size != 0.0f); | ||
| 341 | } | ||
| 342 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | ||
| 343 | const auto& attribute = fixed_state.attributes[i]; | ||
| 344 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 345 | specialization.attribute_types[i] = attribute.Type(); | ||
| 346 | } | ||
| 347 | specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one; | ||
| 348 | specialization.early_fragment_tests = fixed_state.early_z; | ||
| 349 | |||
| 350 | // Alpha test | ||
| 351 | specialization.alpha_test_func = | ||
| 352 | FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value()); | ||
| 353 | specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref); | ||
| 354 | |||
| 355 | SPIRVProgram program; | ||
| 356 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 357 | 554 | ||
| 358 | for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) { | 555 | main_pools.ReleaseContents(); |
| 359 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | 556 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; |
| 360 | // Skip stages that are not enabled | 557 | if (!pipeline || pipeline_cache_filename.empty()) { |
| 361 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | 558 | return pipeline; |
| 362 | continue; | ||
| 363 | } | ||
| 364 | const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); | ||
| 365 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 366 | Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); | ||
| 367 | |||
| 368 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | ||
| 369 | const ShaderType program_type = GetShaderType(program_enum); | ||
| 370 | const auto& entries = shader->GetEntries(); | ||
| 371 | program[stage] = { | ||
| 372 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), | ||
| 373 | entries, | ||
| 374 | }; | ||
| 375 | |||
| 376 | const u32 old_binding = specialization.base_binding; | ||
| 377 | specialization.base_binding = | ||
| 378 | FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); | ||
| 379 | ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); | ||
| 380 | } | 559 | } |
| 381 | return {std::move(program), std::move(bindings)}; | 560 | serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { |
| 382 | } | 561 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> |
| 383 | 562 | env_ptrs; | |
| 384 | template <VkDescriptorType descriptor_type, class Container> | 563 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 385 | void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, | 564 | if (key.unique_hashes[index] != 0) { |
| 386 | u32& offset, const Container& container) { | 565 | env_ptrs.push_back(&envs[index]); |
| 387 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | 566 | } |
| 388 | const u32 count = static_cast<u32>(std::size(container)); | ||
| 389 | |||
| 390 | if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { | ||
| 391 | for (u32 i = 0; i < count; ++i) { | ||
| 392 | const u32 num_samplers = container[i].size; | ||
| 393 | template_entries.push_back({ | ||
| 394 | .dstBinding = binding, | ||
| 395 | .dstArrayElement = 0, | ||
| 396 | .descriptorCount = num_samplers, | ||
| 397 | .descriptorType = descriptor_type, | ||
| 398 | .offset = offset, | ||
| 399 | .stride = entry_size, | ||
| 400 | }); | ||
| 401 | |||
| 402 | ++binding; | ||
| 403 | offset += num_samplers * entry_size; | ||
| 404 | } | 567 | } |
| 405 | return; | 568 | SerializePipeline(key, env_ptrs, pipeline_cache_filename, CACHE_VERSION); |
| 406 | } | 569 | }); |
| 570 | return pipeline; | ||
| 571 | } | ||
| 407 | 572 | ||
| 408 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || | 573 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 409 | descriptor_type == STORAGE_TEXEL_BUFFER) { | 574 | const ComputePipelineCacheKey& key, const ShaderInfo* shader) { |
| 410 | // Nvidia has a bug where updating multiple texels at once causes the driver to crash. | 575 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; |
| 411 | // Note: Fixed in driver Windows 443.24, Linux 440.66.15 | 576 | const auto& qmd{kepler_compute.launch_description}; |
| 412 | for (u32 i = 0; i < count; ++i) { | 577 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; |
| 413 | template_entries.push_back({ | 578 | env.SetCachedSize(shader->size_bytes); |
| 414 | .dstBinding = binding + i, | 579 | |
| 415 | .dstArrayElement = 0, | 580 | main_pools.ReleaseContents(); |
| 416 | .descriptorCount = 1, | 581 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; |
| 417 | .descriptorType = descriptor_type, | 582 | if (!pipeline || pipeline_cache_filename.empty()) { |
| 418 | .offset = static_cast<std::size_t>(offset + i * entry_size), | 583 | return pipeline; |
| 419 | .stride = entry_size, | ||
| 420 | }); | ||
| 421 | } | ||
| 422 | } else if (count > 0) { | ||
| 423 | template_entries.push_back({ | ||
| 424 | .dstBinding = binding, | ||
| 425 | .dstArrayElement = 0, | ||
| 426 | .descriptorCount = count, | ||
| 427 | .descriptorType = descriptor_type, | ||
| 428 | .offset = offset, | ||
| 429 | .stride = entry_size, | ||
| 430 | }); | ||
| 431 | } | 584 | } |
| 432 | offset += count * entry_size; | 585 | serialization_thread.QueueWork([this, key, env = std::move(env)] { |
| 433 | binding += count; | 586 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, |
| 587 | pipeline_cache_filename, CACHE_VERSION); | ||
| 588 | }); | ||
| 589 | return pipeline; | ||
| 434 | } | 590 | } |
| 435 | 591 | ||
| 436 | void FillDescriptorUpdateTemplateEntries( | 592 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 437 | const ShaderEntries& entries, u32& binding, u32& offset, | 593 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, |
| 438 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { | 594 | bool build_in_parallel) try { |
| 439 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); | 595 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 440 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); | 596 | |
| 441 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); | 597 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 442 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); | 598 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 443 | AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); | 599 | const std::vector<u32> code{EmitSPIRV(profile, program)}; |
| 444 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); | 600 | device.SaveShader(code); |
| 601 | vk::ShaderModule spv_module{BuildShader(device, code)}; | ||
| 602 | if (device.HasDebuggingToolAttached()) { | ||
| 603 | const auto name{fmt::format("Shader {:016x}", key.unique_hash)}; | ||
| 604 | spv_module.SetObjectNameEXT(name.c_str()); | ||
| 605 | } | ||
| 606 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | ||
| 607 | return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, | ||
| 608 | thread_worker, &shader_notify, program.info, | ||
| 609 | std::move(spv_module)); | ||
| 610 | |||
| 611 | } catch (const Shader::Exception& exception) { | ||
| 612 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); | ||
| 613 | return nullptr; | ||
| 445 | } | 614 | } |
| 446 | 615 | ||
| 447 | } // namespace Vulkan | 616 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 89d635a3d..efe5a7ed8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -6,24 +6,28 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <filesystem> | ||
| 10 | #include <iosfwd> | ||
| 9 | #include <memory> | 11 | #include <memory> |
| 10 | #include <type_traits> | 12 | #include <type_traits> |
| 11 | #include <unordered_map> | 13 | #include <unordered_map> |
| 12 | #include <utility> | 14 | #include <utility> |
| 13 | #include <vector> | 15 | #include <vector> |
| 14 | 16 | ||
| 15 | #include <boost/functional/hash.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | 18 | #include "common/thread_worker.h" |
| 19 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 20 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 21 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 22 | #include "shader_recompiler/host_translate_info.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | ||
| 24 | #include "shader_recompiler/profile.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 26 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 27 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 29 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 30 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 23 | #include "video_core/shader/async_shaders.h" | ||
| 24 | #include "video_core/shader/memory_util.h" | ||
| 25 | #include "video_core/shader/registry.h" | ||
| 26 | #include "video_core/shader/shader_ir.h" | ||
| 27 | #include "video_core/shader_cache.h" | 31 | #include "video_core/shader_cache.h" |
| 28 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 29 | 33 | ||
| @@ -31,23 +35,24 @@ namespace Core { | |||
| 31 | class System; | 35 | class System; |
| 32 | } | 36 | } |
| 33 | 37 | ||
| 34 | namespace Vulkan { | 38 | namespace Shader::IR { |
| 39 | struct Program; | ||
| 40 | } | ||
| 35 | 41 | ||
| 36 | class Device; | 42 | namespace VideoCore { |
| 37 | class RasterizerVulkan; | 43 | class ShaderNotify; |
| 38 | class VKComputePipeline; | 44 | } |
| 39 | class VKDescriptorPool; | 45 | |
| 40 | class VKScheduler; | 46 | namespace Vulkan { |
| 41 | class VKUpdateDescriptorQueue; | ||
| 42 | 47 | ||
| 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 48 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 44 | 49 | ||
| 45 | struct ComputePipelineCacheKey { | 50 | struct ComputePipelineCacheKey { |
| 46 | GPUVAddr shader; | 51 | u64 unique_hash; |
| 47 | u32 shared_memory_size; | 52 | u32 shared_memory_size; |
| 48 | std::array<u32, 3> workgroup_size; | 53 | std::array<u32, 3> workgroup_size; |
| 49 | 54 | ||
| 50 | std::size_t Hash() const noexcept; | 55 | size_t Hash() const noexcept; |
| 51 | 56 | ||
| 52 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; | 57 | bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; |
| 53 | 58 | ||
| @@ -64,15 +69,8 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>); | |||
| 64 | namespace std { | 69 | namespace std { |
| 65 | 70 | ||
| 66 | template <> | 71 | template <> |
| 67 | struct hash<Vulkan::GraphicsPipelineCacheKey> { | ||
| 68 | std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept { | ||
| 69 | return k.Hash(); | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | |||
| 73 | template <> | ||
| 74 | struct hash<Vulkan::ComputePipelineCacheKey> { | 72 | struct hash<Vulkan::ComputePipelineCacheKey> { |
| 75 | std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { | 73 | size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept { |
| 76 | return k.Hash(); | 74 | return k.Hash(); |
| 77 | } | 75 | } |
| 78 | }; | 76 | }; |
| @@ -81,94 +79,90 @@ struct hash<Vulkan::ComputePipelineCacheKey> { | |||
| 81 | 79 | ||
| 82 | namespace Vulkan { | 80 | namespace Vulkan { |
| 83 | 81 | ||
| 84 | class Shader { | 82 | class ComputePipeline; |
| 85 | public: | 83 | class Device; |
| 86 | explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, | 84 | class DescriptorPool; |
| 87 | Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_, | 85 | class RasterizerVulkan; |
| 88 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset_); | 86 | class RenderPassCache; |
| 89 | ~Shader(); | 87 | class VKScheduler; |
| 90 | 88 | class VKUpdateDescriptorQueue; | |
| 91 | GPUVAddr GetGpuAddr() const { | ||
| 92 | return gpu_addr; | ||
| 93 | } | ||
| 94 | |||
| 95 | VideoCommon::Shader::ShaderIR& GetIR() { | ||
| 96 | return shader_ir; | ||
| 97 | } | ||
| 98 | |||
| 99 | const VideoCommon::Shader::ShaderIR& GetIR() const { | ||
| 100 | return shader_ir; | ||
| 101 | } | ||
| 102 | 89 | ||
| 103 | const VideoCommon::Shader::Registry& GetRegistry() const { | 90 | using VideoCommon::ShaderInfo; |
| 104 | return registry; | ||
| 105 | } | ||
| 106 | 91 | ||
| 107 | const ShaderEntries& GetEntries() const { | 92 | struct ShaderPools { |
| 108 | return entries; | 93 | void ReleaseContents() { |
| 94 | flow_block.ReleaseContents(); | ||
| 95 | block.ReleaseContents(); | ||
| 96 | inst.ReleaseContents(); | ||
| 109 | } | 97 | } |
| 110 | 98 | ||
| 111 | private: | 99 | Shader::ObjectPool<Shader::IR::Inst> inst; |
| 112 | GPUVAddr gpu_addr{}; | 100 | Shader::ObjectPool<Shader::IR::Block> block; |
| 113 | VideoCommon::Shader::ProgramCode program_code; | 101 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; |
| 114 | VideoCommon::Shader::Registry registry; | ||
| 115 | VideoCommon::Shader::ShaderIR shader_ir; | ||
| 116 | ShaderEntries entries; | ||
| 117 | }; | 102 | }; |
| 118 | 103 | ||
| 119 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { | 104 | class PipelineCache : public VideoCommon::ShaderCache { |
| 120 | public: | 105 | public: |
| 121 | explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | 106 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, |
| 122 | Tegra::Engines::Maxwell3D& maxwell3d, | 107 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 123 | Tegra::Engines::KeplerCompute& kepler_compute, | 108 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 124 | Tegra::MemoryManager& gpu_memory, const Device& device, | 109 | VKScheduler& scheduler, DescriptorPool& descriptor_pool, |
| 125 | VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, | 110 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 126 | VKUpdateDescriptorQueue& update_descriptor_queue); | 111 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
| 127 | ~VKPipelineCache() override; | 112 | TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_); |
| 113 | ~PipelineCache(); | ||
| 114 | |||
| 115 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); | ||
| 128 | 116 | ||
| 129 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | 117 | [[nodiscard]] ComputePipeline* CurrentComputePipeline(); |
| 130 | 118 | ||
| 131 | VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | 119 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 132 | u32 num_color_buffers, | 120 | const VideoCore::DiskResourceLoadCallback& callback); |
| 133 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 134 | 121 | ||
| 135 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | 122 | private: |
| 123 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipelineSlowPath(); | ||
| 136 | 124 | ||
| 137 | void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); | 125 | [[nodiscard]] GraphicsPipeline* BuiltPipeline(GraphicsPipeline* pipeline) const noexcept; |
| 138 | 126 | ||
| 139 | protected: | 127 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); |
| 140 | void OnShaderRemoval(Shader* shader) final; | ||
| 141 | 128 | ||
| 142 | private: | 129 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( |
| 143 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | 130 | ShaderPools& pools, const GraphicsPipelineCacheKey& key, |
| 144 | const FixedPipelineState& fixed_state); | 131 | std::span<Shader::Environment* const> envs, bool build_in_parallel); |
| 145 | 132 | ||
| 146 | Tegra::GPU& gpu; | 133 | std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineCacheKey& key, |
| 147 | Tegra::Engines::Maxwell3D& maxwell3d; | 134 | const ShaderInfo* shader); |
| 148 | Tegra::Engines::KeplerCompute& kepler_compute; | 135 | |
| 149 | Tegra::MemoryManager& gpu_memory; | 136 | std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools, |
| 137 | const ComputePipelineCacheKey& key, | ||
| 138 | Shader::Environment& env, | ||
| 139 | bool build_in_parallel); | ||
| 150 | 140 | ||
| 151 | const Device& device; | 141 | const Device& device; |
| 152 | VKScheduler& scheduler; | 142 | VKScheduler& scheduler; |
| 153 | VKDescriptorPool& descriptor_pool; | 143 | DescriptorPool& descriptor_pool; |
| 154 | VKUpdateDescriptorQueue& update_descriptor_queue; | 144 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 145 | RenderPassCache& render_pass_cache; | ||
| 146 | BufferCache& buffer_cache; | ||
| 147 | TextureCache& texture_cache; | ||
| 148 | VideoCore::ShaderNotify& shader_notify; | ||
| 149 | bool use_asynchronous_shaders{}; | ||
| 155 | 150 | ||
| 156 | std::unique_ptr<Shader> null_shader; | 151 | GraphicsPipelineCacheKey graphics_key{}; |
| 157 | std::unique_ptr<Shader> null_kernel; | 152 | GraphicsPipeline* current_pipeline{}; |
| 158 | 153 | ||
| 159 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; | 154 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 155 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; | ||
| 160 | 156 | ||
| 161 | GraphicsPipelineCacheKey last_graphics_key; | 157 | ShaderPools main_pools; |
| 162 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||
| 163 | 158 | ||
| 164 | std::mutex pipeline_cache; | 159 | Shader::Profile profile; |
| 165 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | 160 | Shader::HostTranslateInfo host_info; |
| 166 | graphics_cache; | ||
| 167 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||
| 168 | }; | ||
| 169 | 161 | ||
| 170 | void FillDescriptorUpdateTemplateEntries( | 162 | std::filesystem::path pipeline_cache_filename; |
| 171 | const ShaderEntries& entries, u32& binding, u32& offset, | 163 | |
| 172 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); | 164 | Common::ThreadWorker workers; |
| 165 | Common::ThreadWorker serialization_thread; | ||
| 166 | }; | ||
| 173 | 167 | ||
| 174 | } // namespace Vulkan | 168 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 7cadd5147..c9cb32d71 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -114,14 +114,10 @@ void HostCounter::EndQuery() { | |||
| 114 | } | 114 | } |
| 115 | 115 | ||
| 116 | u64 HostCounter::BlockingQuery() const { | 116 | u64 HostCounter::BlockingQuery() const { |
| 117 | if (tick >= cache.GetScheduler().CurrentTick()) { | 117 | cache.GetScheduler().Wait(tick); |
| 118 | cache.GetScheduler().Flush(); | ||
| 119 | } | ||
| 120 | |||
| 121 | u64 data; | 118 | u64 data; |
| 122 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( | 119 | const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( |
| 123 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), | 120 | query.first, query.second, 1, sizeof(data), &data, sizeof(data), VK_QUERY_RESULT_64_BIT); |
| 124 | VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); | ||
| 125 | 121 | ||
| 126 | switch (query_result) { | 122 | switch (query_result) { |
| 127 | case VK_SUCCESS: | 123 | case VK_SUCCESS: |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f57c15b37..c7a07fdd8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 29 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 28 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| @@ -55,11 +54,10 @@ struct DrawParams { | |||
| 55 | u32 num_instances; | 54 | u32 num_instances; |
| 56 | u32 base_vertex; | 55 | u32 base_vertex; |
| 57 | u32 num_vertices; | 56 | u32 num_vertices; |
| 57 | u32 first_index; | ||
| 58 | bool is_indexed; | 58 | bool is_indexed; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); | ||
| 62 | |||
| 63 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { | 61 | VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index) { |
| 64 | const auto& src = regs.viewport_transform[index]; | 62 | const auto& src = regs.viewport_transform[index]; |
| 65 | const float width = src.scale_x * 2.0f; | 63 | const float width = src.scale_x * 2.0f; |
| @@ -97,118 +95,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | |||
| 97 | return scissor; | 95 | return scissor; |
| 98 | } | 96 | } |
| 99 | 97 | ||
| 100 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | ||
| 101 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | ||
| 102 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | ||
| 103 | for (size_t i = 0; i < std::size(addresses); ++i) { | ||
| 104 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | ||
| 105 | } | ||
| 106 | return addresses; | ||
| 107 | } | ||
| 108 | |||
| 109 | struct TextureHandle { | ||
| 110 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 111 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 112 | image = handle.tic_id; | ||
| 113 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 114 | } | ||
| 115 | |||
| 116 | u32 image; | ||
| 117 | u32 sampler; | ||
| 118 | }; | ||
| 119 | |||
| 120 | template <typename Engine, typename Entry> | ||
| 121 | TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry, | ||
| 122 | size_t stage, size_t index = 0) { | ||
| 123 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 124 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 125 | if (entry.is_separated) { | ||
| 126 | const u32 buffer_1 = entry.buffer; | ||
| 127 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 128 | const u32 offset_1 = entry.offset; | ||
| 129 | const u32 offset_2 = entry.secondary_offset; | ||
| 130 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 131 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 132 | return TextureHandle(handle_1 | handle_2, via_header_index); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | if (entry.is_bindless) { | ||
| 136 | const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | ||
| 137 | return TextureHandle(raw, via_header_index); | ||
| 138 | } | ||
| 139 | const u32 buffer = engine.GetBoundBuffer(); | ||
| 140 | const u64 offset = (entry.offset + index) * sizeof(u32); | ||
| 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | ||
| 142 | } | ||
| 143 | |||
| 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | ||
| 145 | if (entry.is_buffer) { | ||
| 146 | return ImageViewType::e2D; | ||
| 147 | } | ||
| 148 | switch (entry.type) { | ||
| 149 | case Tegra::Shader::TextureType::Texture1D: | ||
| 150 | return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D; | ||
| 151 | case Tegra::Shader::TextureType::Texture2D: | ||
| 152 | return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 153 | case Tegra::Shader::TextureType::Texture3D: | ||
| 154 | return ImageViewType::e3D; | ||
| 155 | case Tegra::Shader::TextureType::TextureCube: | ||
| 156 | return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube; | ||
| 157 | } | ||
| 158 | UNREACHABLE(); | ||
| 159 | return ImageViewType::e2D; | ||
| 160 | } | ||
| 161 | |||
| 162 | ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) { | ||
| 163 | switch (entry.type) { | ||
| 164 | case Tegra::Shader::ImageType::Texture1D: | ||
| 165 | return ImageViewType::e1D; | ||
| 166 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 167 | return ImageViewType::e1DArray; | ||
| 168 | case Tegra::Shader::ImageType::Texture2D: | ||
| 169 | return ImageViewType::e2D; | ||
| 170 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 171 | return ImageViewType::e2DArray; | ||
| 172 | case Tegra::Shader::ImageType::Texture3D: | ||
| 173 | return ImageViewType::e3D; | ||
| 174 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 175 | return ImageViewType::Buffer; | ||
| 176 | } | ||
| 177 | UNREACHABLE(); | ||
| 178 | return ImageViewType::e2D; | ||
| 179 | } | ||
| 180 | |||
| 181 | void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache, | ||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 183 | ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) { | ||
| 184 | for ([[maybe_unused]] const auto& entry : entries.uniform_texels) { | ||
| 185 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 186 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 187 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 188 | } | ||
| 189 | for (const auto& entry : entries.samplers) { | ||
| 190 | for (size_t i = 0; i < entry.size; ++i) { | ||
| 191 | const VkSampler sampler = *sampler_ptr++; | ||
| 192 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 193 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 194 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 195 | update_descriptor_queue.AddSampledImage(handle, sampler); | ||
| 196 | } | ||
| 197 | } | ||
| 198 | for ([[maybe_unused]] const auto& entry : entries.storage_texels) { | ||
| 199 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 200 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 201 | update_descriptor_queue.AddTexelBuffer(image_view.BufferView()); | ||
| 202 | } | ||
| 203 | for (const auto& entry : entries.images) { | ||
| 204 | // TODO: Mark as modified | ||
| 205 | const ImageViewId image_view_id = *image_view_id_ptr++; | ||
| 206 | const ImageView& image_view = texture_cache.GetImageView(image_view_id); | ||
| 207 | const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry)); | ||
| 208 | update_descriptor_queue.AddImage(handle); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | 98 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 213 | bool is_indexed) { | 99 | bool is_indexed) { |
| 214 | DrawParams params{ | 100 | DrawParams params{ |
| @@ -216,6 +102,7 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan | |||
| 216 | .num_instances = is_instanced ? num_instances : 1, | 102 | .num_instances = is_instanced ? num_instances : 1, |
| 217 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, | 103 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, |
| 218 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, | 104 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, |
| 105 | .first_index = is_indexed ? regs.index_array.first : 0, | ||
| 219 | .is_indexed = is_indexed, | 106 | .is_indexed = is_indexed, |
| 220 | }; | 107 | }; |
| 221 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | 108 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| @@ -243,21 +130,21 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 130 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 244 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, | 131 | astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue, |
| 245 | memory_allocator), | 132 | memory_allocator), |
| 246 | texture_cache_runtime{device, scheduler, memory_allocator, | 133 | render_pass_cache(device), texture_cache_runtime{device, scheduler, |
| 247 | staging_pool, blit_image, astc_decoder_pass}, | 134 | memory_allocator, staging_pool, |
| 135 | blit_image, astc_decoder_pass, | ||
| 136 | render_pass_cache}, | ||
| 248 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 137 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 249 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 138 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 250 | update_descriptor_queue, descriptor_pool), | 139 | update_descriptor_queue, descriptor_pool), |
| 251 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 140 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 252 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 141 | pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 253 | descriptor_pool, update_descriptor_queue), | 142 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, |
| 143 | texture_cache, gpu.ShaderNotify()), | ||
| 254 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | 144 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, |
| 255 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | 145 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 256 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 146 | wfi_event(device.GetLogical().CreateEvent()) { |
| 257 | scheduler.SetQueryCache(query_cache); | 147 | scheduler.SetQueryCache(query_cache); |
| 258 | if (device.UseAsynchronousShaders()) { | ||
| 259 | async_shaders.AllocateWorkers(); | ||
| 260 | } | ||
| 261 | } | 148 | } |
| 262 | 149 | ||
| 263 | RasterizerVulkan::~RasterizerVulkan() = default; | 150 | RasterizerVulkan::~RasterizerVulkan() = default; |
| @@ -270,53 +157,30 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 270 | 157 | ||
| 271 | query_cache.UpdateCounters(); | 158 | query_cache.UpdateCounters(); |
| 272 | 159 | ||
| 273 | graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); | 160 | GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; |
| 274 | 161 | if (!pipeline) { | |
| 275 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 276 | |||
| 277 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 278 | texture_cache.UpdateRenderTargets(false); | ||
| 279 | |||
| 280 | const auto shaders = pipeline_cache.GetShaders(); | ||
| 281 | graphics_key.shaders = GetShaderAddresses(shaders); | ||
| 282 | |||
| 283 | SetupShaderDescriptors(shaders, is_indexed); | ||
| 284 | |||
| 285 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | ||
| 286 | graphics_key.renderpass = framebuffer->RenderPass(); | ||
| 287 | |||
| 288 | VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( | ||
| 289 | graphics_key, framebuffer->NumColorBuffers(), async_shaders); | ||
| 290 | if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | ||
| 291 | // Async graphics pipeline was not ready. | ||
| 292 | return; | 162 | return; |
| 293 | } | 163 | } |
| 164 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 165 | pipeline->Configure(is_indexed); | ||
| 294 | 166 | ||
| 295 | BeginTransformFeedback(); | 167 | BeginTransformFeedback(); |
| 296 | 168 | ||
| 297 | scheduler.RequestRenderpass(framebuffer); | ||
| 298 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||
| 299 | UpdateDynamicStates(); | 169 | UpdateDynamicStates(); |
| 300 | 170 | ||
| 301 | const auto& regs = maxwell3d.regs; | 171 | const auto& regs{maxwell3d.regs}; |
| 302 | const u32 num_instances = maxwell3d.mme_draw.instance_count; | 172 | const u32 num_instances{maxwell3d.mme_draw.instance_count}; |
| 303 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | 173 | const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)}; |
| 304 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | 174 | scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { |
| 305 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 306 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | ||
| 307 | if (descriptor_set) { | ||
| 308 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | ||
| 309 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 310 | } | ||
| 311 | if (draw_params.is_indexed) { | 175 | if (draw_params.is_indexed) { |
| 312 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | 176 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, |
| 313 | draw_params.base_vertex, draw_params.base_instance); | 177 | draw_params.first_index, draw_params.base_vertex, |
| 178 | draw_params.base_instance); | ||
| 314 | } else { | 179 | } else { |
| 315 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | 180 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, |
| 316 | draw_params.base_vertex, draw_params.base_instance); | 181 | draw_params.base_vertex, draw_params.base_instance); |
| 317 | } | 182 | } |
| 318 | }); | 183 | }); |
| 319 | |||
| 320 | EndTransformFeedback(); | 184 | EndTransformFeedback(); |
| 321 | } | 185 | } |
| 322 | 186 | ||
| @@ -326,6 +190,7 @@ void RasterizerVulkan::Clear() { | |||
| 326 | if (!maxwell3d.ShouldExecute()) { | 190 | if (!maxwell3d.ShouldExecute()) { |
| 327 | return; | 191 | return; |
| 328 | } | 192 | } |
| 193 | FlushWork(); | ||
| 329 | 194 | ||
| 330 | query_cache.UpdateCounters(); | 195 | query_cache.UpdateCounters(); |
| 331 | 196 | ||
| @@ -395,73 +260,20 @@ void RasterizerVulkan::Clear() { | |||
| 395 | }); | 260 | }); |
| 396 | } | 261 | } |
| 397 | 262 | ||
| 398 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 263 | void RasterizerVulkan::DispatchCompute() { |
| 399 | MICROPROFILE_SCOPE(Vulkan_Compute); | 264 | FlushWork(); |
| 400 | |||
| 401 | query_cache.UpdateCounters(); | ||
| 402 | 265 | ||
| 403 | const auto& launch_desc = kepler_compute.launch_description; | 266 | ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; |
| 404 | auto& pipeline = pipeline_cache.GetComputePipeline({ | 267 | if (!pipeline) { |
| 405 | .shader = code_addr, | 268 | return; |
| 406 | .shared_memory_size = launch_desc.shared_alloc, | 269 | } |
| 407 | .workgroup_size{ | 270 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; |
| 408 | launch_desc.block_dim_x, | 271 | pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); |
| 409 | launch_desc.block_dim_y, | ||
| 410 | launch_desc.block_dim_z, | ||
| 411 | }, | ||
| 412 | }); | ||
| 413 | 272 | ||
| 414 | // Compute dispatches can't be executed inside a renderpass | 273 | const auto& qmd{kepler_compute.launch_description}; |
| 274 | const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; | ||
| 415 | scheduler.RequestOutsideRenderPassOperationContext(); | 275 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 416 | 276 | scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); | |
| 417 | image_view_indices.clear(); | ||
| 418 | sampler_handles.clear(); | ||
| 419 | |||
| 420 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 421 | |||
| 422 | const auto& entries = pipeline.GetEntries(); | ||
| 423 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 424 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 425 | u32 ssbo_index = 0; | ||
| 426 | for (const auto& buffer : entries.global_buffers) { | ||
| 427 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 428 | buffer.is_written); | ||
| 429 | ++ssbo_index; | ||
| 430 | } | ||
| 431 | buffer_cache.UpdateComputeBuffers(); | ||
| 432 | |||
| 433 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 434 | |||
| 435 | SetupComputeUniformTexels(entries); | ||
| 436 | SetupComputeTextures(entries); | ||
| 437 | SetupComputeStorageTexels(entries); | ||
| 438 | SetupComputeImages(entries); | ||
| 439 | |||
| 440 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 441 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 442 | |||
| 443 | update_descriptor_queue.Acquire(); | ||
| 444 | |||
| 445 | buffer_cache.BindHostComputeBuffers(); | ||
| 446 | |||
| 447 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 448 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 449 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 450 | sampler_ptr); | ||
| 451 | |||
| 452 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | ||
| 453 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | ||
| 454 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | ||
| 455 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | ||
| 456 | grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout, | ||
| 457 | descriptor_set](vk::CommandBuffer cmdbuf) { | ||
| 458 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); | ||
| 459 | if (descriptor_set) { | ||
| 460 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, | ||
| 461 | DESCRIPTOR_SET, descriptor_set, nullptr); | ||
| 462 | } | ||
| 463 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | ||
| 464 | }); | ||
| 465 | } | 277 | } |
| 466 | 278 | ||
| 467 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 279 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { |
| @@ -626,6 +438,7 @@ void RasterizerVulkan::WaitForIdle() { | |||
| 626 | 438 | ||
| 627 | void RasterizerVulkan::FragmentBarrier() { | 439 | void RasterizerVulkan::FragmentBarrier() { |
| 628 | // We already put barriers when a render pass finishes | 440 | // We already put barriers when a render pass finishes |
| 441 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 629 | } | 442 | } |
| 630 | 443 | ||
| 631 | void RasterizerVulkan::TiledCacheBarrier() { | 444 | void RasterizerVulkan::TiledCacheBarrier() { |
| @@ -633,10 +446,11 @@ void RasterizerVulkan::TiledCacheBarrier() { | |||
| 633 | } | 446 | } |
| 634 | 447 | ||
| 635 | void RasterizerVulkan::FlushCommands() { | 448 | void RasterizerVulkan::FlushCommands() { |
| 636 | if (draw_counter > 0) { | 449 | if (draw_counter == 0) { |
| 637 | draw_counter = 0; | 450 | return; |
| 638 | scheduler.Flush(); | ||
| 639 | } | 451 | } |
| 452 | draw_counter = 0; | ||
| 453 | scheduler.Flush(); | ||
| 640 | } | 454 | } |
| 641 | 455 | ||
| 642 | void RasterizerVulkan::TickFrame() { | 456 | void RasterizerVulkan::TickFrame() { |
| @@ -676,13 +490,18 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 676 | if (!image_view) { | 490 | if (!image_view) { |
| 677 | return false; | 491 | return false; |
| 678 | } | 492 | } |
| 679 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); | 493 | screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); |
| 680 | screen_info.width = image_view->size.width; | 494 | screen_info.width = image_view->size.width; |
| 681 | screen_info.height = image_view->size.height; | 495 | screen_info.height = image_view->size.height; |
| 682 | screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); | 496 | screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 683 | return true; | 497 | return true; |
| 684 | } | 498 | } |
| 685 | 499 | ||
| 500 | void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||
| 501 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 502 | pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); | ||
| 503 | } | ||
| 504 | |||
| 686 | void RasterizerVulkan::FlushWork() { | 505 | void RasterizerVulkan::FlushWork() { |
| 687 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; | 506 | static constexpr u32 DRAWS_TO_DISPATCH = 4096; |
| 688 | 507 | ||
| @@ -691,13 +510,11 @@ void RasterizerVulkan::FlushWork() { | |||
| 691 | if ((++draw_counter & 7) != 7) { | 510 | if ((++draw_counter & 7) != 7) { |
| 692 | return; | 511 | return; |
| 693 | } | 512 | } |
| 694 | |||
| 695 | if (draw_counter < DRAWS_TO_DISPATCH) { | 513 | if (draw_counter < DRAWS_TO_DISPATCH) { |
| 696 | // Send recorded tasks to the worker thread | 514 | // Send recorded tasks to the worker thread |
| 697 | scheduler.DispatchWork(); | 515 | scheduler.DispatchWork(); |
| 698 | return; | 516 | return; |
| 699 | } | 517 | } |
| 700 | |||
| 701 | // Otherwise (every certain number of draws) flush execution. | 518 | // Otherwise (every certain number of draws) flush execution. |
| 702 | // This submits commands to the Vulkan driver. | 519 | // This submits commands to the Vulkan driver. |
| 703 | scheduler.Flush(); | 520 | scheduler.Flush(); |
| @@ -716,52 +533,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | |||
| 716 | return buffer_cache.DMACopy(src_address, dest_address, amount); | 533 | return buffer_cache.DMACopy(src_address, dest_address, amount); |
| 717 | } | 534 | } |
| 718 | 535 | ||
| 719 | void RasterizerVulkan::SetupShaderDescriptors( | ||
| 720 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | ||
| 721 | image_view_indices.clear(); | ||
| 722 | sampler_handles.clear(); | ||
| 723 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 724 | Shader* const shader = shaders[stage + 1]; | ||
| 725 | if (!shader) { | ||
| 726 | continue; | ||
| 727 | } | ||
| 728 | const ShaderEntries& entries = shader->GetEntries(); | ||
| 729 | SetupGraphicsUniformTexels(entries, stage); | ||
| 730 | SetupGraphicsTextures(entries, stage); | ||
| 731 | SetupGraphicsStorageTexels(entries, stage); | ||
| 732 | SetupGraphicsImages(entries, stage); | ||
| 733 | |||
| 734 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 735 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 736 | u32 ssbo_index = 0; | ||
| 737 | for (const auto& buffer : entries.global_buffers) { | ||
| 738 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 739 | buffer.cbuf_offset, buffer.is_written); | ||
| 740 | ++ssbo_index; | ||
| 741 | } | ||
| 742 | } | ||
| 743 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 744 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 745 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 746 | |||
| 747 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 748 | |||
| 749 | update_descriptor_queue.Acquire(); | ||
| 750 | |||
| 751 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | ||
| 752 | VkSampler* sampler_ptr = sampler_handles.data(); | ||
| 753 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | ||
| 754 | // Skip VertexA stage | ||
| 755 | Shader* const shader = shaders[stage + 1]; | ||
| 756 | if (!shader) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 759 | buffer_cache.BindHostStageBuffers(stage); | ||
| 760 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, | ||
| 761 | image_view_id_ptr, sampler_ptr); | ||
| 762 | } | ||
| 763 | } | ||
| 764 | |||
| 765 | void RasterizerVulkan::UpdateDynamicStates() { | 536 | void RasterizerVulkan::UpdateDynamicStates() { |
| 766 | auto& regs = maxwell3d.regs; | 537 | auto& regs = maxwell3d.regs; |
| 767 | UpdateViewportsState(regs); | 538 | UpdateViewportsState(regs); |
| @@ -770,6 +541,7 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 770 | UpdateBlendConstants(regs); | 541 | UpdateBlendConstants(regs); |
| 771 | UpdateDepthBounds(regs); | 542 | UpdateDepthBounds(regs); |
| 772 | UpdateStencilFaces(regs); | 543 | UpdateStencilFaces(regs); |
| 544 | UpdateLineWidth(regs); | ||
| 773 | if (device.IsExtExtendedDynamicStateSupported()) { | 545 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 774 | UpdateCullMode(regs); | 546 | UpdateCullMode(regs); |
| 775 | UpdateDepthBoundsTestEnable(regs); | 547 | UpdateDepthBoundsTestEnable(regs); |
| @@ -779,6 +551,9 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 779 | UpdateFrontFace(regs); | 551 | UpdateFrontFace(regs); |
| 780 | UpdateStencilOp(regs); | 552 | UpdateStencilOp(regs); |
| 781 | UpdateStencilTestEnable(regs); | 553 | UpdateStencilTestEnable(regs); |
| 554 | if (device.IsExtVertexInputDynamicStateSupported()) { | ||
| 555 | UpdateVertexInput(regs); | ||
| 556 | } | ||
| 782 | } | 557 | } |
| 783 | } | 558 | } |
| 784 | 559 | ||
| @@ -810,89 +585,6 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 810 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 585 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 811 | } | 586 | } |
| 812 | 587 | ||
| 813 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | ||
| 814 | const auto& regs = maxwell3d.regs; | ||
| 815 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 816 | for (const auto& entry : entries.uniform_texels) { | ||
| 817 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 818 | image_view_indices.push_back(handle.image); | ||
| 819 | } | ||
| 820 | } | ||
| 821 | |||
| 822 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | ||
| 823 | const auto& regs = maxwell3d.regs; | ||
| 824 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 825 | for (const auto& entry : entries.samplers) { | ||
| 826 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 827 | const TextureHandle handle = | ||
| 828 | GetTextureInfo(maxwell3d, via_header_index, entry, stage, index); | ||
| 829 | image_view_indices.push_back(handle.image); | ||
| 830 | |||
| 831 | Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler); | ||
| 832 | sampler_handles.push_back(sampler->Handle()); | ||
| 833 | } | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | ||
| 838 | const auto& regs = maxwell3d.regs; | ||
| 839 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 840 | for (const auto& entry : entries.storage_texels) { | ||
| 841 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 842 | image_view_indices.push_back(handle.image); | ||
| 843 | } | ||
| 844 | } | ||
| 845 | |||
| 846 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | ||
| 847 | const auto& regs = maxwell3d.regs; | ||
| 848 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | ||
| 849 | for (const auto& entry : entries.images) { | ||
| 850 | const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage); | ||
| 851 | image_view_indices.push_back(handle.image); | ||
| 852 | } | ||
| 853 | } | ||
| 854 | |||
| 855 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | ||
| 856 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 857 | for (const auto& entry : entries.uniform_texels) { | ||
| 858 | const TextureHandle handle = | ||
| 859 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 860 | image_view_indices.push_back(handle.image); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | ||
| 865 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 866 | for (const auto& entry : entries.samplers) { | ||
| 867 | for (size_t index = 0; index < entry.size; ++index) { | ||
| 868 | const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry, | ||
| 869 | COMPUTE_SHADER_INDEX, index); | ||
| 870 | image_view_indices.push_back(handle.image); | ||
| 871 | |||
| 872 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||
| 873 | sampler_handles.push_back(sampler->Handle()); | ||
| 874 | } | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||
| 879 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 880 | for (const auto& entry : entries.storage_texels) { | ||
| 881 | const TextureHandle handle = | ||
| 882 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 883 | image_view_indices.push_back(handle.image); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | ||
| 888 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | ||
| 889 | for (const auto& entry : entries.images) { | ||
| 890 | const TextureHandle handle = | ||
| 891 | GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX); | ||
| 892 | image_view_indices.push_back(handle.image); | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 588 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 897 | if (!state_tracker.TouchViewports()) { | 589 | if (!state_tracker.TouchViewports()) { |
| 898 | return; | 590 | return; |
| @@ -985,6 +677,14 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) | |||
| 985 | } | 677 | } |
| 986 | } | 678 | } |
| 987 | 679 | ||
| 680 | void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 681 | if (!state_tracker.TouchLineWidth()) { | ||
| 682 | return; | ||
| 683 | } | ||
| 684 | const float width = regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased; | ||
| 685 | scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); | ||
| 686 | } | ||
| 687 | |||
| 988 | void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { | 688 | void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 989 | if (!state_tracker.TouchCullMode()) { | 689 | if (!state_tracker.TouchCullMode()) { |
| 990 | return; | 690 | return; |
| @@ -999,6 +699,11 @@ void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Re | |||
| 999 | if (!state_tracker.TouchDepthBoundsTestEnable()) { | 699 | if (!state_tracker.TouchDepthBoundsTestEnable()) { |
| 1000 | return; | 700 | return; |
| 1001 | } | 701 | } |
| 702 | bool enabled = regs.depth_bounds_enable; | ||
| 703 | if (enabled && !device.IsDepthBoundsSupported()) { | ||
| 704 | LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); | ||
| 705 | enabled = false; | ||
| 706 | } | ||
| 1002 | scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { | 707 | scheduler.Record([enable = regs.depth_bounds_enable](vk::CommandBuffer cmdbuf) { |
| 1003 | cmdbuf.SetDepthBoundsTestEnableEXT(enable); | 708 | cmdbuf.SetDepthBoundsTestEnableEXT(enable); |
| 1004 | }); | 709 | }); |
| @@ -1086,4 +791,62 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1086 | }); | 791 | }); |
| 1087 | } | 792 | } |
| 1088 | 793 | ||
| 794 | void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { | ||
| 795 | auto& dirty{maxwell3d.dirty.flags}; | ||
| 796 | if (!dirty[Dirty::VertexInput]) { | ||
| 797 | return; | ||
| 798 | } | ||
| 799 | dirty[Dirty::VertexInput] = false; | ||
| 800 | |||
| 801 | boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings; | ||
| 802 | boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes; | ||
| 803 | |||
| 804 | // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up | ||
| 805 | // generating dirty state. Track the highest dirty attribute and update all attributes until | ||
| 806 | // that one. | ||
| 807 | size_t highest_dirty_attr{}; | ||
| 808 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 809 | if (dirty[Dirty::VertexAttribute0 + index]) { | ||
| 810 | highest_dirty_attr = index; | ||
| 811 | } | ||
| 812 | } | ||
| 813 | for (size_t index = 0; index < highest_dirty_attr; ++index) { | ||
| 814 | const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; | ||
| 815 | const u32 binding{attribute.buffer}; | ||
| 816 | dirty[Dirty::VertexAttribute0 + index] = false; | ||
| 817 | dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true; | ||
| 818 | if (!attribute.constant) { | ||
| 819 | attributes.push_back({ | ||
| 820 | .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, | ||
| 821 | .pNext = nullptr, | ||
| 822 | .location = static_cast<u32>(index), | ||
| 823 | .binding = binding, | ||
| 824 | .format = MaxwellToVK::VertexFormat(attribute.type, attribute.size), | ||
| 825 | .offset = attribute.offset, | ||
| 826 | }); | ||
| 827 | } | ||
| 828 | } | ||
| 829 | for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | ||
| 830 | if (!dirty[Dirty::VertexBinding0 + index]) { | ||
| 831 | continue; | ||
| 832 | } | ||
| 833 | dirty[Dirty::VertexBinding0 + index] = false; | ||
| 834 | |||
| 835 | const u32 binding{static_cast<u32>(index)}; | ||
| 836 | const auto& input_binding{regs.vertex_array[binding]}; | ||
| 837 | const bool is_instanced{regs.instanced_arrays.IsInstancingEnabled(binding)}; | ||
| 838 | bindings.push_back({ | ||
| 839 | .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, | ||
| 840 | .pNext = nullptr, | ||
| 841 | .binding = binding, | ||
| 842 | .stride = input_binding.stride, | ||
| 843 | .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, | ||
| 844 | .divisor = is_instanced ? input_binding.divisor : 1, | ||
| 845 | }); | ||
| 846 | } | ||
| 847 | scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { | ||
| 848 | cmdbuf.SetVertexInputEXT(bindings, attributes); | ||
| 849 | }); | ||
| 850 | } | ||
| 851 | |||
| 1089 | } // namespace Vulkan | 852 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2065209be..866827247 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -21,14 +21,13 @@ | |||
| 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 28 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | ||
| 32 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 31 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 33 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 32 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 34 | 33 | ||
| @@ -73,7 +72,7 @@ public: | |||
| 73 | 72 | ||
| 74 | void Draw(bool is_indexed, bool is_instanced) override; | 73 | void Draw(bool is_indexed, bool is_instanced) override; |
| 75 | void Clear() override; | 74 | void Clear() override; |
| 76 | void DispatchCompute(GPUVAddr code_addr) override; | 75 | void DispatchCompute() override; |
| 77 | void ResetCounter(VideoCore::QueryType type) override; | 76 | void ResetCounter(VideoCore::QueryType type) override; |
| 78 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 77 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 79 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | 78 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; |
| @@ -102,19 +101,8 @@ public: | |||
| 102 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | 101 | Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; |
| 103 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 102 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 104 | u32 pixel_stride) override; | 103 | u32 pixel_stride) override; |
| 105 | 104 | void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | |
| 106 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | 105 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 107 | return async_shaders; | ||
| 108 | } | ||
| 109 | |||
| 110 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 111 | return async_shaders; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 115 | static constexpr size_t MaxConstbufferSize = 0x10000; | ||
| 116 | static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||
| 117 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 118 | 106 | ||
| 119 | private: | 107 | private: |
| 120 | static constexpr size_t MAX_TEXTURES = 192; | 108 | static constexpr size_t MAX_TEXTURES = 192; |
| @@ -125,46 +113,19 @@ private: | |||
| 125 | 113 | ||
| 126 | void FlushWork(); | 114 | void FlushWork(); |
| 127 | 115 | ||
| 128 | /// Setup descriptors in the graphics pipeline. | ||
| 129 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, | ||
| 130 | bool is_indexed); | ||
| 131 | |||
| 132 | void UpdateDynamicStates(); | 116 | void UpdateDynamicStates(); |
| 133 | 117 | ||
| 134 | void BeginTransformFeedback(); | 118 | void BeginTransformFeedback(); |
| 135 | 119 | ||
| 136 | void EndTransformFeedback(); | 120 | void EndTransformFeedback(); |
| 137 | 121 | ||
| 138 | /// Setup uniform texels in the graphics pipeline. | ||
| 139 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 140 | |||
| 141 | /// Setup textures in the graphics pipeline. | ||
| 142 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | ||
| 143 | |||
| 144 | /// Setup storage texels in the graphics pipeline. | ||
| 145 | void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 146 | |||
| 147 | /// Setup images in the graphics pipeline. | ||
| 148 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | ||
| 149 | |||
| 150 | /// Setup texel buffers in the compute pipeline. | ||
| 151 | void SetupComputeUniformTexels(const ShaderEntries& entries); | ||
| 152 | |||
| 153 | /// Setup textures in the compute pipeline. | ||
| 154 | void SetupComputeTextures(const ShaderEntries& entries); | ||
| 155 | |||
| 156 | /// Setup storage texels in the compute pipeline. | ||
| 157 | void SetupComputeStorageTexels(const ShaderEntries& entries); | ||
| 158 | |||
| 159 | /// Setup images in the compute pipeline. | ||
| 160 | void SetupComputeImages(const ShaderEntries& entries); | ||
| 161 | |||
| 162 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 122 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 163 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 123 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 164 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 124 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| 165 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); | 125 | void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); |
| 166 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); | 126 | void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); |
| 167 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); | 127 | void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); |
| 128 | void UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs); | ||
| 168 | 129 | ||
| 169 | void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); | 130 | void UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs); |
| 170 | void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 131 | void UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -175,6 +136,8 @@ private: | |||
| 175 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 136 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 176 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 137 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 177 | 138 | ||
| 139 | void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs); | ||
| 140 | |||
| 178 | Tegra::GPU& gpu; | 141 | Tegra::GPU& gpu; |
| 179 | Tegra::MemoryManager& gpu_memory; | 142 | Tegra::MemoryManager& gpu_memory; |
| 180 | Tegra::Engines::Maxwell3D& maxwell3d; | 143 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -187,24 +150,22 @@ private: | |||
| 187 | VKScheduler& scheduler; | 150 | VKScheduler& scheduler; |
| 188 | 151 | ||
| 189 | StagingBufferPool staging_pool; | 152 | StagingBufferPool staging_pool; |
| 190 | VKDescriptorPool descriptor_pool; | 153 | DescriptorPool descriptor_pool; |
| 191 | VKUpdateDescriptorQueue update_descriptor_queue; | 154 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 192 | BlitImageHelper blit_image; | 155 | BlitImageHelper blit_image; |
| 193 | ASTCDecoderPass astc_decoder_pass; | 156 | ASTCDecoderPass astc_decoder_pass; |
| 194 | 157 | RenderPassCache render_pass_cache; | |
| 195 | GraphicsPipelineCacheKey graphics_key; | ||
| 196 | 158 | ||
| 197 | TextureCacheRuntime texture_cache_runtime; | 159 | TextureCacheRuntime texture_cache_runtime; |
| 198 | TextureCache texture_cache; | 160 | TextureCache texture_cache; |
| 199 | BufferCacheRuntime buffer_cache_runtime; | 161 | BufferCacheRuntime buffer_cache_runtime; |
| 200 | BufferCache buffer_cache; | 162 | BufferCache buffer_cache; |
| 201 | VKPipelineCache pipeline_cache; | 163 | PipelineCache pipeline_cache; |
| 202 | VKQueryCache query_cache; | 164 | VKQueryCache query_cache; |
| 203 | AccelerateDMA accelerate_dma; | 165 | AccelerateDMA accelerate_dma; |
| 204 | VKFenceManager fence_manager; | 166 | VKFenceManager fence_manager; |
| 205 | 167 | ||
| 206 | vk::Event wfi_event; | 168 | vk::Event wfi_event; |
| 207 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 208 | 169 | ||
| 209 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 170 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 210 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 171 | std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp new file mode 100644 index 000000000..451ffe019 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include <boost/container/static_vector.hpp> | ||
| 8 | |||
| 9 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | namespace { | ||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | |||
| 19 | VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, | ||
| 20 | VkSampleCountFlagBits samples) { | ||
| 21 | using MaxwellToVK::SurfaceFormat; | ||
| 22 | return { | ||
| 23 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 24 | .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, | ||
| 25 | .samples = samples, | ||
| 26 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 27 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 28 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 29 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 30 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 31 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 32 | }; | ||
| 33 | } | ||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | RenderPassCache::RenderPassCache(const Device& device_) : device{&device_} {} | ||
| 37 | |||
| 38 | VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { | ||
| 39 | std::lock_guard lock{mutex}; | ||
| 40 | const auto [pair, is_new] = cache.try_emplace(key); | ||
| 41 | if (!is_new) { | ||
| 42 | return *pair->second; | ||
| 43 | } | ||
| 44 | boost::container::static_vector<VkAttachmentDescription, 9> descriptions; | ||
| 45 | std::array<VkAttachmentReference, 8> references{}; | ||
| 46 | u32 num_attachments{}; | ||
| 47 | u32 num_colors{}; | ||
| 48 | for (size_t index = 0; index < key.color_formats.size(); ++index) { | ||
| 49 | const PixelFormat format{key.color_formats[index]}; | ||
| 50 | const bool is_valid{format != PixelFormat::Invalid}; | ||
| 51 | references[index] = VkAttachmentReference{ | ||
| 52 | .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, | ||
| 53 | .layout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 54 | }; | ||
| 55 | if (is_valid) { | ||
| 56 | descriptions.push_back(AttachmentDescription(*device, format, key.samples)); | ||
| 57 | num_attachments = static_cast<u32>(index + 1); | ||
| 58 | ++num_colors; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | const bool has_depth{key.depth_format != PixelFormat::Invalid}; | ||
| 62 | VkAttachmentReference depth_reference{}; | ||
| 63 | if (key.depth_format != PixelFormat::Invalid) { | ||
| 64 | depth_reference = VkAttachmentReference{ | ||
| 65 | .attachment = num_colors, | ||
| 66 | .layout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 67 | }; | ||
| 68 | descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); | ||
| 69 | } | ||
| 70 | const VkSubpassDescription subpass{ | ||
| 71 | .flags = 0, | ||
| 72 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 73 | .inputAttachmentCount = 0, | ||
| 74 | .pInputAttachments = nullptr, | ||
| 75 | .colorAttachmentCount = num_attachments, | ||
| 76 | .pColorAttachments = references.data(), | ||
| 77 | .pResolveAttachments = nullptr, | ||
| 78 | .pDepthStencilAttachment = has_depth ? &depth_reference : nullptr, | ||
| 79 | .preserveAttachmentCount = 0, | ||
| 80 | .pPreserveAttachments = nullptr, | ||
| 81 | }; | ||
| 82 | pair->second = device->GetLogical().CreateRenderPass({ | ||
| 83 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 84 | .pNext = nullptr, | ||
| 85 | .flags = 0, | ||
| 86 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 87 | .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), | ||
| 88 | .subpassCount = 1, | ||
| 89 | .pSubpasses = &subpass, | ||
| 90 | .dependencyCount = 0, | ||
| 91 | .pDependencies = nullptr, | ||
| 92 | }); | ||
| 93 | return *pair->second; | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.h b/src/video_core/renderer_vulkan/vk_render_pass_cache.h new file mode 100644 index 000000000..eaa0ed775 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <mutex> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 12 | |||
| 13 | namespace Vulkan { | ||
| 14 | |||
| 15 | struct RenderPassKey { | ||
| 16 | auto operator<=>(const RenderPassKey&) const noexcept = default; | ||
| 17 | |||
| 18 | std::array<VideoCore::Surface::PixelFormat, 8> color_formats; | ||
| 19 | VideoCore::Surface::PixelFormat depth_format; | ||
| 20 | VkSampleCountFlagBits samples; | ||
| 21 | }; | ||
| 22 | |||
| 23 | } // namespace Vulkan | ||
| 24 | |||
| 25 | namespace std { | ||
| 26 | template <> | ||
| 27 | struct hash<Vulkan::RenderPassKey> { | ||
| 28 | [[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { | ||
| 29 | size_t value = static_cast<size_t>(key.depth_format) << 48; | ||
| 30 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 31 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 32 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 33 | } | ||
| 34 | return value; | ||
| 35 | } | ||
| 36 | }; | ||
| 37 | } // namespace std | ||
| 38 | |||
| 39 | namespace Vulkan { | ||
| 40 | |||
| 41 | class Device; | ||
| 42 | |||
| 43 | class RenderPassCache { | ||
| 44 | public: | ||
| 45 | explicit RenderPassCache(const Device& device_); | ||
| 46 | |||
| 47 | VkRenderPass Get(const RenderPassKey& key); | ||
| 48 | |||
| 49 | private: | ||
| 50 | const Device* device{}; | ||
| 51 | std::unordered_map<RenderPassKey, vk::RenderPass> cache; | ||
| 52 | std::mutex mutex; | ||
| 53 | }; | ||
| 54 | |||
| 55 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp index a8bf7bda8..2dd514968 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp | |||
| @@ -10,18 +10,16 @@ | |||
| 10 | namespace Vulkan { | 10 | namespace Vulkan { |
| 11 | 11 | ||
| 12 | ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) | 12 | ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) |
| 13 | : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} | 13 | : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} |
| 14 | |||
| 15 | ResourcePool::~ResourcePool() = default; | ||
| 16 | 14 | ||
| 17 | size_t ResourcePool::CommitResource() { | 15 | size_t ResourcePool::CommitResource() { |
| 18 | // Refresh semaphore to query updated results | 16 | // Refresh semaphore to query updated results |
| 19 | master_semaphore.Refresh(); | 17 | master_semaphore->Refresh(); |
| 20 | const u64 gpu_tick = master_semaphore.KnownGpuTick(); | 18 | const u64 gpu_tick = master_semaphore->KnownGpuTick(); |
| 21 | const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { | 19 | const auto search = [this, gpu_tick](size_t begin, size_t end) -> std::optional<size_t> { |
| 22 | for (size_t iterator = begin; iterator < end; ++iterator) { | 20 | for (size_t iterator = begin; iterator < end; ++iterator) { |
| 23 | if (gpu_tick >= ticks[iterator]) { | 21 | if (gpu_tick >= ticks[iterator]) { |
| 24 | ticks[iterator] = master_semaphore.CurrentTick(); | 22 | ticks[iterator] = master_semaphore->CurrentTick(); |
| 25 | return iterator; | 23 | return iterator; |
| 26 | } | 24 | } |
| 27 | } | 25 | } |
| @@ -36,7 +34,7 @@ size_t ResourcePool::CommitResource() { | |||
| 36 | // Both searches failed, the pool is full; handle it. | 34 | // Both searches failed, the pool is full; handle it. |
| 37 | const size_t free_resource = ManageOverflow(); | 35 | const size_t free_resource = ManageOverflow(); |
| 38 | 36 | ||
| 39 | ticks[free_resource] = master_semaphore.CurrentTick(); | 37 | ticks[free_resource] = master_semaphore->CurrentTick(); |
| 40 | found = free_resource; | 38 | found = free_resource; |
| 41 | } | 39 | } |
| 42 | } | 40 | } |
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h index 9d0bb3b4d..f0b80ad59 100644 --- a/src/video_core/renderer_vulkan/vk_resource_pool.h +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h | |||
| @@ -18,8 +18,16 @@ class MasterSemaphore; | |||
| 18 | */ | 18 | */ |
| 19 | class ResourcePool { | 19 | class ResourcePool { |
| 20 | public: | 20 | public: |
| 21 | explicit ResourcePool() = default; | ||
| 21 | explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); | 22 | explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); |
| 22 | virtual ~ResourcePool(); | 23 | |
| 24 | virtual ~ResourcePool() = default; | ||
| 25 | |||
| 26 | ResourcePool& operator=(ResourcePool&&) noexcept = default; | ||
| 27 | ResourcePool(ResourcePool&&) noexcept = default; | ||
| 28 | |||
| 29 | ResourcePool& operator=(const ResourcePool&) = default; | ||
| 30 | ResourcePool(const ResourcePool&) = default; | ||
| 23 | 31 | ||
| 24 | protected: | 32 | protected: |
| 25 | size_t CommitResource(); | 33 | size_t CommitResource(); |
| @@ -34,7 +42,7 @@ private: | |||
| 34 | /// Allocates a new page of resources. | 42 | /// Allocates a new page of resources. |
| 35 | void Grow(); | 43 | void Grow(); |
| 36 | 44 | ||
| 37 | MasterSemaphore& master_semaphore; | 45 | MasterSemaphore* master_semaphore{}; |
| 38 | size_t grow_step = 0; ///< Number of new resources created after an overflow | 46 | size_t grow_step = 0; ///< Number of new resources created after an overflow |
| 39 | size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found | 47 | size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found |
| 40 | std::vector<u64> ticks; ///< Ticks for each resource | 48 | std::vector<u64> ticks; ///< Ticks for each resource |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..4840962de 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { | |||
| 31 | command->~Command(); | 31 | command->~Command(); |
| 32 | command = next; | 32 | command = next; |
| 33 | } | 33 | } |
| 34 | 34 | submit = false; | |
| 35 | command_offset = 0; | 35 | command_offset = 0; |
| 36 | first = nullptr; | 36 | first = nullptr; |
| 37 | last = nullptr; | 37 | last = nullptr; |
| @@ -42,13 +42,16 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) | |||
| 42 | master_semaphore{std::make_unique<MasterSemaphore>(device)}, | 42 | master_semaphore{std::make_unique<MasterSemaphore>(device)}, |
| 43 | command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { | 43 | command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { |
| 44 | AcquireNewChunk(); | 44 | AcquireNewChunk(); |
| 45 | AllocateNewContext(); | 45 | AllocateWorkerCommandBuffer(); |
| 46 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); | 46 | worker_thread = std::thread(&VKScheduler::WorkerThread, this); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | VKScheduler::~VKScheduler() { | 49 | VKScheduler::~VKScheduler() { |
| 50 | quit = true; | 50 | { |
| 51 | cv.notify_all(); | 51 | std::lock_guard lock{work_mutex}; |
| 52 | quit = true; | ||
| 53 | } | ||
| 54 | work_cv.notify_all(); | ||
| 52 | worker_thread.join(); | 55 | worker_thread.join(); |
| 53 | } | 56 | } |
| 54 | 57 | ||
| @@ -60,6 +63,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) { | |||
| 60 | void VKScheduler::Finish(VkSemaphore semaphore) { | 63 | void VKScheduler::Finish(VkSemaphore semaphore) { |
| 61 | const u64 presubmit_tick = CurrentTick(); | 64 | const u64 presubmit_tick = CurrentTick(); |
| 62 | SubmitExecution(semaphore); | 65 | SubmitExecution(semaphore); |
| 66 | WaitWorker(); | ||
| 63 | Wait(presubmit_tick); | 67 | Wait(presubmit_tick); |
| 64 | AllocateNewContext(); | 68 | AllocateNewContext(); |
| 65 | } | 69 | } |
| @@ -68,20 +72,19 @@ void VKScheduler::WaitWorker() { | |||
| 68 | MICROPROFILE_SCOPE(Vulkan_WaitForWorker); | 72 | MICROPROFILE_SCOPE(Vulkan_WaitForWorker); |
| 69 | DispatchWork(); | 73 | DispatchWork(); |
| 70 | 74 | ||
| 71 | bool finished = false; | 75 | std::unique_lock lock{work_mutex}; |
| 72 | do { | 76 | wait_cv.wait(lock, [this] { return work_queue.empty(); }); |
| 73 | cv.notify_all(); | ||
| 74 | std::unique_lock lock{mutex}; | ||
| 75 | finished = chunk_queue.Empty(); | ||
| 76 | } while (!finished); | ||
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void VKScheduler::DispatchWork() { | 79 | void VKScheduler::DispatchWork() { |
| 80 | if (chunk->Empty()) { | 80 | if (chunk->Empty()) { |
| 81 | return; | 81 | return; |
| 82 | } | 82 | } |
| 83 | chunk_queue.Push(std::move(chunk)); | 83 | { |
| 84 | cv.notify_all(); | 84 | std::lock_guard lock{work_mutex}; |
| 85 | work_queue.push(std::move(chunk)); | ||
| 86 | } | ||
| 87 | work_cv.notify_one(); | ||
| 85 | AcquireNewChunk(); | 88 | AcquireNewChunk(); |
| 86 | } | 89 | } |
| 87 | 90 | ||
| @@ -124,93 +127,101 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { | |||
| 124 | EndRenderPass(); | 127 | EndRenderPass(); |
| 125 | } | 128 | } |
| 126 | 129 | ||
| 127 | void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { | 130 | bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { |
| 128 | if (state.graphics_pipeline == pipeline) { | 131 | if (state.graphics_pipeline == pipeline) { |
| 129 | return; | 132 | return false; |
| 130 | } | 133 | } |
| 131 | state.graphics_pipeline = pipeline; | 134 | state.graphics_pipeline = pipeline; |
| 132 | Record([pipeline](vk::CommandBuffer cmdbuf) { | 135 | return true; |
| 133 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 134 | }); | ||
| 135 | } | 136 | } |
| 136 | 137 | ||
| 137 | void VKScheduler::WorkerThread() { | 138 | void VKScheduler::WorkerThread() { |
| 138 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | 139 | Common::SetCurrentThreadName("yuzu:VulkanWorker"); |
| 139 | std::unique_lock lock{mutex}; | ||
| 140 | do { | 140 | do { |
| 141 | cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); | 141 | if (work_queue.empty()) { |
| 142 | if (quit) { | 142 | wait_cv.notify_all(); |
| 143 | continue; | 143 | } |
| 144 | std::unique_ptr<CommandChunk> work; | ||
| 145 | { | ||
| 146 | std::unique_lock lock{work_mutex}; | ||
| 147 | work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); | ||
| 148 | if (quit) { | ||
| 149 | continue; | ||
| 150 | } | ||
| 151 | work = std::move(work_queue.front()); | ||
| 152 | work_queue.pop(); | ||
| 153 | } | ||
| 154 | const bool has_submit = work->HasSubmit(); | ||
| 155 | work->ExecuteAll(current_cmdbuf); | ||
| 156 | if (has_submit) { | ||
| 157 | AllocateWorkerCommandBuffer(); | ||
| 144 | } | 158 | } |
| 145 | auto extracted_chunk = std::move(chunk_queue.Front()); | 159 | std::lock_guard reserve_lock{reserve_mutex}; |
| 146 | chunk_queue.Pop(); | 160 | chunk_reserve.push_back(std::move(work)); |
| 147 | extracted_chunk->ExecuteAll(current_cmdbuf); | ||
| 148 | chunk_reserve.Push(std::move(extracted_chunk)); | ||
| 149 | } while (!quit); | 161 | } while (!quit); |
| 150 | } | 162 | } |
| 151 | 163 | ||
| 164 | void VKScheduler::AllocateWorkerCommandBuffer() { | ||
| 165 | current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); | ||
| 166 | current_cmdbuf.Begin({ | ||
| 167 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 168 | .pNext = nullptr, | ||
| 169 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 170 | .pInheritanceInfo = nullptr, | ||
| 171 | }); | ||
| 172 | } | ||
| 173 | |||
| 152 | void VKScheduler::SubmitExecution(VkSemaphore semaphore) { | 174 | void VKScheduler::SubmitExecution(VkSemaphore semaphore) { |
| 153 | EndPendingOperations(); | 175 | EndPendingOperations(); |
| 154 | InvalidateState(); | 176 | InvalidateState(); |
| 155 | WaitWorker(); | ||
| 156 | 177 | ||
| 157 | std::unique_lock lock{mutex}; | 178 | const u64 signal_value = master_semaphore->NextTick(); |
| 179 | Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) { | ||
| 180 | cmdbuf.End(); | ||
| 158 | 181 | ||
| 159 | current_cmdbuf.End(); | 182 | const u32 num_signal_semaphores = semaphore ? 2U : 1U; |
| 160 | 183 | ||
| 161 | const VkSemaphore timeline_semaphore = master_semaphore->Handle(); | 184 | const u64 wait_value = signal_value - 1; |
| 162 | const u32 num_signal_semaphores = semaphore ? 2U : 1U; | 185 | const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; |
| 163 | 186 | ||
| 164 | const u64 signal_value = master_semaphore->CurrentTick(); | 187 | const VkSemaphore timeline_semaphore = master_semaphore->Handle(); |
| 165 | const u64 wait_value = signal_value - 1; | 188 | const std::array signal_values{signal_value, u64(0)}; |
| 166 | const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; | 189 | const std::array signal_semaphores{timeline_semaphore, semaphore}; |
| 167 | 190 | ||
| 168 | master_semaphore->NextTick(); | 191 | const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ |
| 169 | 192 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, | |
| 170 | const std::array signal_values{signal_value, u64(0)}; | 193 | .pNext = nullptr, |
| 171 | const std::array signal_semaphores{timeline_semaphore, semaphore}; | 194 | .waitSemaphoreValueCount = 1, |
| 172 | 195 | .pWaitSemaphoreValues = &wait_value, | |
| 173 | const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ | 196 | .signalSemaphoreValueCount = num_signal_semaphores, |
| 174 | .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, | 197 | .pSignalSemaphoreValues = signal_values.data(), |
| 175 | .pNext = nullptr, | 198 | }; |
| 176 | .waitSemaphoreValueCount = 1, | 199 | const VkSubmitInfo submit_info{ |
| 177 | .pWaitSemaphoreValues = &wait_value, | 200 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, |
| 178 | .signalSemaphoreValueCount = num_signal_semaphores, | 201 | .pNext = &timeline_si, |
| 179 | .pSignalSemaphoreValues = signal_values.data(), | 202 | .waitSemaphoreCount = 1, |
| 180 | }; | 203 | .pWaitSemaphores = &timeline_semaphore, |
| 181 | const VkSubmitInfo submit_info{ | 204 | .pWaitDstStageMask = &wait_stage_mask, |
| 182 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | 205 | .commandBufferCount = 1, |
| 183 | .pNext = &timeline_si, | 206 | .pCommandBuffers = cmdbuf.address(), |
| 184 | .waitSemaphoreCount = 1, | 207 | .signalSemaphoreCount = num_signal_semaphores, |
| 185 | .pWaitSemaphores = &timeline_semaphore, | 208 | .pSignalSemaphores = signal_semaphores.data(), |
| 186 | .pWaitDstStageMask = &wait_stage_mask, | 209 | }; |
| 187 | .commandBufferCount = 1, | 210 | switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { |
| 188 | .pCommandBuffers = current_cmdbuf.address(), | 211 | case VK_SUCCESS: |
| 189 | .signalSemaphoreCount = num_signal_semaphores, | 212 | break; |
| 190 | .pSignalSemaphores = signal_semaphores.data(), | 213 | case VK_ERROR_DEVICE_LOST: |
| 191 | }; | 214 | device.ReportLoss(); |
| 192 | switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { | 215 | [[fallthrough]]; |
| 193 | case VK_SUCCESS: | 216 | default: |
| 194 | break; | 217 | vk::Check(result); |
| 195 | case VK_ERROR_DEVICE_LOST: | 218 | } |
| 196 | device.ReportLoss(); | 219 | }); |
| 197 | [[fallthrough]]; | 220 | chunk->MarkSubmit(); |
| 198 | default: | 221 | DispatchWork(); |
| 199 | vk::Check(result); | ||
| 200 | } | ||
| 201 | } | 222 | } |
| 202 | 223 | ||
| 203 | void VKScheduler::AllocateNewContext() { | 224 | void VKScheduler::AllocateNewContext() { |
| 204 | std::unique_lock lock{mutex}; | ||
| 205 | |||
| 206 | current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); | ||
| 207 | current_cmdbuf.Begin({ | ||
| 208 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 209 | .pNext = nullptr, | ||
| 210 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 211 | .pInheritanceInfo = nullptr, | ||
| 212 | }); | ||
| 213 | |||
| 214 | // Enable counters once again. These are disabled when a command buffer is finished. | 225 | // Enable counters once again. These are disabled when a command buffer is finished. |
| 215 | if (query_cache) { | 226 | if (query_cache) { |
| 216 | query_cache->UpdateCounters(); | 227 | query_cache->UpdateCounters(); |
| @@ -265,12 +276,13 @@ void VKScheduler::EndRenderPass() { | |||
| 265 | } | 276 | } |
| 266 | 277 | ||
| 267 | void VKScheduler::AcquireNewChunk() { | 278 | void VKScheduler::AcquireNewChunk() { |
| 268 | if (chunk_reserve.Empty()) { | 279 | std::lock_guard lock{reserve_mutex}; |
| 280 | if (chunk_reserve.empty()) { | ||
| 269 | chunk = std::make_unique<CommandChunk>(); | 281 | chunk = std::make_unique<CommandChunk>(); |
| 270 | return; | 282 | return; |
| 271 | } | 283 | } |
| 272 | chunk = std::move(chunk_reserve.Front()); | 284 | chunk = std::move(chunk_reserve.back()); |
| 273 | chunk_reserve.Pop(); | 285 | chunk_reserve.pop_back(); |
| 274 | } | 286 | } |
| 275 | 287 | ||
| 276 | } // namespace Vulkan | 288 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..cf39a2363 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -8,12 +8,12 @@ | |||
| 8 | #include <condition_variable> | 8 | #include <condition_variable> |
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <stack> | ||
| 12 | #include <thread> | 11 | #include <thread> |
| 13 | #include <utility> | 12 | #include <utility> |
| 13 | #include <queue> | ||
| 14 | |||
| 14 | #include "common/alignment.h" | 15 | #include "common/alignment.h" |
| 15 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 16 | #include "common/threadsafe_queue.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 19 | ||
| @@ -22,6 +22,7 @@ namespace Vulkan { | |||
| 22 | class CommandPool; | 22 | class CommandPool; |
| 23 | class Device; | 23 | class Device; |
| 24 | class Framebuffer; | 24 | class Framebuffer; |
| 25 | class GraphicsPipeline; | ||
| 25 | class StateTracker; | 26 | class StateTracker; |
| 26 | class VKQueryCache; | 27 | class VKQueryCache; |
| 27 | 28 | ||
| @@ -52,8 +53,8 @@ public: | |||
| 52 | /// of a renderpass. | 53 | /// of a renderpass. |
| 53 | void RequestOutsideRenderPassOperationContext(); | 54 | void RequestOutsideRenderPassOperationContext(); |
| 54 | 55 | ||
| 55 | /// Binds a pipeline to the current execution context. | 56 | /// Update the pipeline to the current execution context. |
| 56 | void BindGraphicsPipeline(VkPipeline pipeline); | 57 | bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); |
| 57 | 58 | ||
| 58 | /// Invalidates current command buffer state except for render passes | 59 | /// Invalidates current command buffer state except for render passes |
| 59 | void InvalidateState(); | 60 | void InvalidateState(); |
| @@ -85,6 +86,10 @@ public: | |||
| 85 | 86 | ||
| 86 | /// Waits for the given tick to trigger on the GPU. | 87 | /// Waits for the given tick to trigger on the GPU. |
| 87 | void Wait(u64 tick) { | 88 | void Wait(u64 tick) { |
| 89 | if (tick >= master_semaphore->CurrentTick()) { | ||
| 90 | // Make sure we are not waiting for the current tick without signalling | ||
| 91 | Flush(); | ||
| 92 | } | ||
| 88 | master_semaphore->Wait(tick); | 93 | master_semaphore->Wait(tick); |
| 89 | } | 94 | } |
| 90 | 95 | ||
| @@ -154,15 +159,24 @@ private: | |||
| 154 | return true; | 159 | return true; |
| 155 | } | 160 | } |
| 156 | 161 | ||
| 162 | void MarkSubmit() { | ||
| 163 | submit = true; | ||
| 164 | } | ||
| 165 | |||
| 157 | bool Empty() const { | 166 | bool Empty() const { |
| 158 | return command_offset == 0; | 167 | return command_offset == 0; |
| 159 | } | 168 | } |
| 160 | 169 | ||
| 170 | bool HasSubmit() const { | ||
| 171 | return submit; | ||
| 172 | } | ||
| 173 | |||
| 161 | private: | 174 | private: |
| 162 | Command* first = nullptr; | 175 | Command* first = nullptr; |
| 163 | Command* last = nullptr; | 176 | Command* last = nullptr; |
| 164 | 177 | ||
| 165 | size_t command_offset = 0; | 178 | size_t command_offset = 0; |
| 179 | bool submit = false; | ||
| 166 | alignas(std::max_align_t) std::array<u8, 0x8000> data{}; | 180 | alignas(std::max_align_t) std::array<u8, 0x8000> data{}; |
| 167 | }; | 181 | }; |
| 168 | 182 | ||
| @@ -170,11 +184,13 @@ private: | |||
| 170 | VkRenderPass renderpass = nullptr; | 184 | VkRenderPass renderpass = nullptr; |
| 171 | VkFramebuffer framebuffer = nullptr; | 185 | VkFramebuffer framebuffer = nullptr; |
| 172 | VkExtent2D render_area = {0, 0}; | 186 | VkExtent2D render_area = {0, 0}; |
| 173 | VkPipeline graphics_pipeline = nullptr; | 187 | GraphicsPipeline* graphics_pipeline = nullptr; |
| 174 | }; | 188 | }; |
| 175 | 189 | ||
| 176 | void WorkerThread(); | 190 | void WorkerThread(); |
| 177 | 191 | ||
| 192 | void AllocateWorkerCommandBuffer(); | ||
| 193 | |||
| 178 | void SubmitExecution(VkSemaphore semaphore); | 194 | void SubmitExecution(VkSemaphore semaphore); |
| 179 | 195 | ||
| 180 | void AllocateNewContext(); | 196 | void AllocateNewContext(); |
| @@ -204,11 +220,13 @@ private: | |||
| 204 | std::array<VkImage, 9> renderpass_images{}; | 220 | std::array<VkImage, 9> renderpass_images{}; |
| 205 | std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; | 221 | std::array<VkImageSubresourceRange, 9> renderpass_image_ranges{}; |
| 206 | 222 | ||
| 207 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; | 223 | std::queue<std::unique_ptr<CommandChunk>> work_queue; |
| 208 | Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | 224 | std::vector<std::unique_ptr<CommandChunk>> chunk_reserve; |
| 209 | std::mutex mutex; | 225 | std::mutex reserve_mutex; |
| 210 | std::condition_variable cv; | 226 | std::mutex work_mutex; |
| 211 | bool quit = false; | 227 | std::condition_variable work_cv; |
| 228 | std::condition_variable wait_cv; | ||
| 229 | std::atomic_bool quit{}; | ||
| 212 | }; | 230 | }; |
| 213 | 231 | ||
| 214 | } // namespace Vulkan | 232 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp deleted file mode 100644 index c6846d886..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ /dev/null | |||
| @@ -1,3166 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <functional> | ||
| 6 | #include <limits> | ||
| 7 | #include <map> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include <sirit/sirit.h> | ||
| 16 | |||
| 17 | #include "common/alignment.h" | ||
| 18 | #include "common/assert.h" | ||
| 19 | #include "common/common_types.h" | ||
| 20 | #include "common/logging/log.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/engines/shader_bytecode.h" | ||
| 23 | #include "video_core/engines/shader_header.h" | ||
| 24 | #include "video_core/engines/shader_type.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||
| 26 | #include "video_core/shader/node.h" | ||
| 27 | #include "video_core/shader/shader_ir.h" | ||
| 28 | #include "video_core/shader/transform_feedback.h" | ||
| 29 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | |||
| 33 | namespace { | ||
| 34 | |||
| 35 | using Sirit::Id; | ||
| 36 | using Tegra::Engines::ShaderType; | ||
| 37 | using Tegra::Shader::Attribute; | ||
| 38 | using Tegra::Shader::PixelImap; | ||
| 39 | using Tegra::Shader::Register; | ||
| 40 | using namespace VideoCommon::Shader; | ||
| 41 | |||
| 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 43 | using Operation = const OperationNode&; | ||
| 44 | |||
| 45 | class ASTDecompiler; | ||
| 46 | class ExprDecompiler; | ||
| 47 | |||
| 48 | // TODO(Rodrigo): Use rasterizer's value | ||
| 49 | constexpr u32 MaxConstBufferFloats = 0x4000; | ||
| 50 | constexpr u32 MaxConstBufferElements = MaxConstBufferFloats / 4; | ||
| 51 | |||
| 52 | constexpr u32 NumInputPatches = 32; // This value seems to be the standard | ||
| 53 | |||
| 54 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 55 | |||
| 56 | class Expression final { | ||
| 57 | public: | ||
| 58 | Expression(Id id_, Type type_) : id{id_}, type{type_} { | ||
| 59 | ASSERT(type_ != Type::Void); | ||
| 60 | } | ||
| 61 | Expression() : type{Type::Void} {} | ||
| 62 | |||
| 63 | Id id{}; | ||
| 64 | Type type{}; | ||
| 65 | }; | ||
| 66 | static_assert(std::is_standard_layout_v<Expression>); | ||
| 67 | |||
| 68 | struct TexelBuffer { | ||
| 69 | Id image_type{}; | ||
| 70 | Id image{}; | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct SampledImage { | ||
| 74 | Id image_type{}; | ||
| 75 | Id sampler_type{}; | ||
| 76 | Id sampler_pointer_type{}; | ||
| 77 | Id variable{}; | ||
| 78 | }; | ||
| 79 | |||
| 80 | struct StorageImage { | ||
| 81 | Id image_type{}; | ||
| 82 | Id image{}; | ||
| 83 | }; | ||
| 84 | |||
| 85 | struct AttributeType { | ||
| 86 | Type type; | ||
| 87 | Id scalar; | ||
| 88 | Id vector; | ||
| 89 | }; | ||
| 90 | |||
| 91 | struct VertexIndices { | ||
| 92 | std::optional<u32> position; | ||
| 93 | std::optional<u32> layer; | ||
| 94 | std::optional<u32> viewport; | ||
| 95 | std::optional<u32> point_size; | ||
| 96 | std::optional<u32> clip_distances; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 105 | spv::Dim GetSamplerDim(const SamplerEntry& sampler) { | ||
| 106 | ASSERT(!sampler.is_buffer); | ||
| 107 | switch (sampler.type) { | ||
| 108 | case Tegra::Shader::TextureType::Texture1D: | ||
| 109 | return spv::Dim::Dim1D; | ||
| 110 | case Tegra::Shader::TextureType::Texture2D: | ||
| 111 | return spv::Dim::Dim2D; | ||
| 112 | case Tegra::Shader::TextureType::Texture3D: | ||
| 113 | return spv::Dim::Dim3D; | ||
| 114 | case Tegra::Shader::TextureType::TextureCube: | ||
| 115 | return spv::Dim::Cube; | ||
| 116 | default: | ||
| 117 | UNIMPLEMENTED_MSG("Unimplemented sampler type={}", sampler.type); | ||
| 118 | return spv::Dim::Dim2D; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | std::pair<spv::Dim, bool> GetImageDim(const ImageEntry& image) { | ||
| 123 | switch (image.type) { | ||
| 124 | case Tegra::Shader::ImageType::Texture1D: | ||
| 125 | return {spv::Dim::Dim1D, false}; | ||
| 126 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 127 | return {spv::Dim::Buffer, false}; | ||
| 128 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 129 | return {spv::Dim::Dim1D, true}; | ||
| 130 | case Tegra::Shader::ImageType::Texture2D: | ||
| 131 | return {spv::Dim::Dim2D, false}; | ||
| 132 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 133 | return {spv::Dim::Dim2D, true}; | ||
| 134 | case Tegra::Shader::ImageType::Texture3D: | ||
| 135 | return {spv::Dim::Dim3D, false}; | ||
| 136 | default: | ||
| 137 | UNIMPLEMENTED_MSG("Unimplemented image type={}", image.type); | ||
| 138 | return {spv::Dim::Dim2D, false}; | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | /// Returns the number of vertices present in a primitive topology. | ||
| 143 | u32 GetNumPrimitiveTopologyVertices(Maxwell::PrimitiveTopology primitive_topology) { | ||
| 144 | switch (primitive_topology) { | ||
| 145 | case Maxwell::PrimitiveTopology::Points: | ||
| 146 | return 1; | ||
| 147 | case Maxwell::PrimitiveTopology::Lines: | ||
| 148 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 149 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 150 | return 2; | ||
| 151 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 152 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 153 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 154 | return 3; | ||
| 155 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 156 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 157 | return 4; | ||
| 158 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 159 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 160 | return 6; | ||
| 161 | case Maxwell::PrimitiveTopology::Quads: | ||
| 162 | UNIMPLEMENTED_MSG("Quads"); | ||
| 163 | return 3; | ||
| 164 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 165 | UNIMPLEMENTED_MSG("QuadStrip"); | ||
| 166 | return 3; | ||
| 167 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 168 | UNIMPLEMENTED_MSG("Polygon"); | ||
| 169 | return 3; | ||
| 170 | case Maxwell::PrimitiveTopology::Patches: | ||
| 171 | UNIMPLEMENTED_MSG("Patches"); | ||
| 172 | return 3; | ||
| 173 | default: | ||
| 174 | UNREACHABLE(); | ||
| 175 | return 3; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | spv::ExecutionMode GetExecutionMode(Maxwell::TessellationPrimitive primitive) { | ||
| 180 | switch (primitive) { | ||
| 181 | case Maxwell::TessellationPrimitive::Isolines: | ||
| 182 | return spv::ExecutionMode::Isolines; | ||
| 183 | case Maxwell::TessellationPrimitive::Triangles: | ||
| 184 | return spv::ExecutionMode::Triangles; | ||
| 185 | case Maxwell::TessellationPrimitive::Quads: | ||
| 186 | return spv::ExecutionMode::Quads; | ||
| 187 | } | ||
| 188 | UNREACHABLE(); | ||
| 189 | return spv::ExecutionMode::Triangles; | ||
| 190 | } | ||
| 191 | |||
| 192 | spv::ExecutionMode GetExecutionMode(Maxwell::TessellationSpacing spacing) { | ||
| 193 | switch (spacing) { | ||
| 194 | case Maxwell::TessellationSpacing::Equal: | ||
| 195 | return spv::ExecutionMode::SpacingEqual; | ||
| 196 | case Maxwell::TessellationSpacing::FractionalOdd: | ||
| 197 | return spv::ExecutionMode::SpacingFractionalOdd; | ||
| 198 | case Maxwell::TessellationSpacing::FractionalEven: | ||
| 199 | return spv::ExecutionMode::SpacingFractionalEven; | ||
| 200 | } | ||
| 201 | UNREACHABLE(); | ||
| 202 | return spv::ExecutionMode::SpacingEqual; | ||
| 203 | } | ||
| 204 | |||
| 205 | spv::ExecutionMode GetExecutionMode(Maxwell::PrimitiveTopology input_topology) { | ||
| 206 | switch (input_topology) { | ||
| 207 | case Maxwell::PrimitiveTopology::Points: | ||
| 208 | return spv::ExecutionMode::InputPoints; | ||
| 209 | case Maxwell::PrimitiveTopology::Lines: | ||
| 210 | case Maxwell::PrimitiveTopology::LineLoop: | ||
| 211 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 212 | return spv::ExecutionMode::InputLines; | ||
| 213 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 214 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 215 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 216 | return spv::ExecutionMode::Triangles; | ||
| 217 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 218 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 219 | return spv::ExecutionMode::InputLinesAdjacency; | ||
| 220 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 221 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 222 | return spv::ExecutionMode::InputTrianglesAdjacency; | ||
| 223 | case Maxwell::PrimitiveTopology::Quads: | ||
| 224 | UNIMPLEMENTED_MSG("Quads"); | ||
| 225 | return spv::ExecutionMode::Triangles; | ||
| 226 | case Maxwell::PrimitiveTopology::QuadStrip: | ||
| 227 | UNIMPLEMENTED_MSG("QuadStrip"); | ||
| 228 | return spv::ExecutionMode::Triangles; | ||
| 229 | case Maxwell::PrimitiveTopology::Polygon: | ||
| 230 | UNIMPLEMENTED_MSG("Polygon"); | ||
| 231 | return spv::ExecutionMode::Triangles; | ||
| 232 | case Maxwell::PrimitiveTopology::Patches: | ||
| 233 | UNIMPLEMENTED_MSG("Patches"); | ||
| 234 | return spv::ExecutionMode::Triangles; | ||
| 235 | } | ||
| 236 | UNREACHABLE(); | ||
| 237 | return spv::ExecutionMode::Triangles; | ||
| 238 | } | ||
| 239 | |||
| 240 | spv::ExecutionMode GetExecutionMode(Tegra::Shader::OutputTopology output_topology) { | ||
| 241 | switch (output_topology) { | ||
| 242 | case Tegra::Shader::OutputTopology::PointList: | ||
| 243 | return spv::ExecutionMode::OutputPoints; | ||
| 244 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 245 | return spv::ExecutionMode::OutputLineStrip; | ||
| 246 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 247 | return spv::ExecutionMode::OutputTriangleStrip; | ||
| 248 | default: | ||
| 249 | UNREACHABLE(); | ||
| 250 | return spv::ExecutionMode::OutputPoints; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | /// Returns true if an attribute index is one of the 32 generic attributes | ||
| 255 | constexpr bool IsGenericAttribute(Attribute::Index attribute) { | ||
| 256 | return attribute >= Attribute::Index::Attribute_0 && | ||
| 257 | attribute <= Attribute::Index::Attribute_31; | ||
| 258 | } | ||
| 259 | |||
| 260 | /// Returns the location of a generic attribute | ||
| 261 | u32 GetGenericAttributeLocation(Attribute::Index attribute) { | ||
| 262 | ASSERT(IsGenericAttribute(attribute)); | ||
| 263 | return static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 264 | } | ||
| 265 | |||
| 266 | /// Returns true if an object has to be treated as precise | ||
| 267 | bool IsPrecise(Operation operand) { | ||
| 268 | const auto& meta{operand.GetMeta()}; | ||
| 269 | if (std::holds_alternative<MetaArithmetic>(meta)) { | ||
| 270 | return std::get<MetaArithmetic>(meta).precise; | ||
| 271 | } | ||
| 272 | return false; | ||
| 273 | } | ||
| 274 | |||
| 275 | class SPIRVDecompiler final : public Sirit::Module { | ||
| 276 | public: | ||
| 277 | explicit SPIRVDecompiler(const Device& device_, const ShaderIR& ir_, ShaderType stage_, | ||
| 278 | const Registry& registry_, const Specialization& specialization_) | ||
| 279 | : Module(0x00010300), device{device_}, ir{ir_}, stage{stage_}, header{ir_.GetHeader()}, | ||
| 280 | registry{registry_}, specialization{specialization_} { | ||
| 281 | if (stage_ != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry_.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 285 | AddCapability(spv::Capability::Shader); | ||
| 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | ||
| 287 | AddCapability(spv::Capability::ImageQuery); | ||
| 288 | AddCapability(spv::Capability::Image1D); | ||
| 289 | AddCapability(spv::Capability::ImageBuffer); | ||
| 290 | AddCapability(spv::Capability::ImageGatherExtended); | ||
| 291 | AddCapability(spv::Capability::SampledBuffer); | ||
| 292 | AddCapability(spv::Capability::StorageImageWriteWithoutFormat); | ||
| 293 | AddCapability(spv::Capability::DrawParameters); | ||
| 294 | AddCapability(spv::Capability::SubgroupBallotKHR); | ||
| 295 | AddCapability(spv::Capability::SubgroupVoteKHR); | ||
| 296 | AddExtension("SPV_KHR_16bit_storage"); | ||
| 297 | AddExtension("SPV_KHR_shader_ballot"); | ||
| 298 | AddExtension("SPV_KHR_subgroup_vote"); | ||
| 299 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | ||
| 300 | AddExtension("SPV_KHR_variable_pointers"); | ||
| 301 | AddExtension("SPV_KHR_shader_draw_parameters"); | ||
| 302 | |||
| 303 | if (!transform_feedback.empty()) { | ||
| 304 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 305 | AddCapability(spv::Capability::TransformFeedback); | ||
| 306 | } else { | ||
| 307 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 308 | "supported on this device"); | ||
| 309 | } | ||
| 310 | } | ||
| 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | ||
| 312 | if (ir.UsesViewportIndex()) { | ||
| 313 | AddCapability(spv::Capability::MultiViewport); | ||
| 314 | } | ||
| 315 | if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { | ||
| 316 | AddExtension("SPV_EXT_shader_viewport_index_layer"); | ||
| 317 | AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); | ||
| 318 | } | ||
| 319 | } | ||
| 320 | if (device.IsFormatlessImageLoadSupported()) { | ||
| 321 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | ||
| 322 | } | ||
| 323 | if (device.IsFloat16Supported()) { | ||
| 324 | AddCapability(spv::Capability::Float16); | ||
| 325 | } | ||
| 326 | t_scalar_half = Name(TypeFloat(device_.IsFloat16Supported() ? 16 : 32), "scalar_half"); | ||
| 327 | t_half = Name(TypeVector(t_scalar_half, 2), "half"); | ||
| 328 | |||
| 329 | const Id main = Decompile(); | ||
| 330 | |||
| 331 | switch (stage) { | ||
| 332 | case ShaderType::Vertex: | ||
| 333 | AddEntryPoint(spv::ExecutionModel::Vertex, main, "main", interfaces); | ||
| 334 | break; | ||
| 335 | case ShaderType::TesselationControl: | ||
| 336 | AddCapability(spv::Capability::Tessellation); | ||
| 337 | AddEntryPoint(spv::ExecutionModel::TessellationControl, main, "main", interfaces); | ||
| 338 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | ||
| 339 | header.common2.threads_per_input_primitive); | ||
| 340 | break; | ||
| 341 | case ShaderType::TesselationEval: { | ||
| 342 | const auto& info = registry.GetGraphicsInfo(); | ||
| 343 | AddCapability(spv::Capability::Tessellation); | ||
| 344 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | ||
| 345 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); | ||
| 346 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); | ||
| 347 | AddExecutionMode(main, info.tessellation_clockwise | ||
| 348 | ? spv::ExecutionMode::VertexOrderCw | ||
| 349 | : spv::ExecutionMode::VertexOrderCcw); | ||
| 350 | break; | ||
| 351 | } | ||
| 352 | case ShaderType::Geometry: { | ||
| 353 | const auto& info = registry.GetGraphicsInfo(); | ||
| 354 | AddCapability(spv::Capability::Geometry); | ||
| 355 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | ||
| 356 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); | ||
| 357 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | ||
| 358 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | ||
| 359 | header.common4.max_output_vertices); | ||
| 360 | // TODO(Rodrigo): Where can we get this info from? | ||
| 361 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | ||
| 362 | break; | ||
| 363 | } | ||
| 364 | case ShaderType::Fragment: | ||
| 365 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | ||
| 366 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | ||
| 367 | if (header.ps.omap.depth) { | ||
| 368 | AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); | ||
| 369 | } | ||
| 370 | if (specialization.early_fragment_tests) { | ||
| 371 | AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); | ||
| 372 | } | ||
| 373 | break; | ||
| 374 | case ShaderType::Compute: | ||
| 375 | const auto workgroup_size = specialization.workgroup_size; | ||
| 376 | AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], | ||
| 377 | workgroup_size[1], workgroup_size[2]); | ||
| 378 | AddEntryPoint(spv::ExecutionModel::GLCompute, main, "main", interfaces); | ||
| 379 | break; | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | private: | ||
| 384 | Id Decompile() { | ||
| 385 | DeclareCommon(); | ||
| 386 | DeclareVertex(); | ||
| 387 | DeclareTessControl(); | ||
| 388 | DeclareTessEval(); | ||
| 389 | DeclareGeometry(); | ||
| 390 | DeclareFragment(); | ||
| 391 | DeclareCompute(); | ||
| 392 | DeclareRegisters(); | ||
| 393 | DeclareCustomVariables(); | ||
| 394 | DeclarePredicates(); | ||
| 395 | DeclareLocalMemory(); | ||
| 396 | DeclareSharedMemory(); | ||
| 397 | DeclareInternalFlags(); | ||
| 398 | DeclareInputAttributes(); | ||
| 399 | DeclareOutputAttributes(); | ||
| 400 | |||
| 401 | u32 binding = specialization.base_binding; | ||
| 402 | binding = DeclareConstantBuffers(binding); | ||
| 403 | binding = DeclareGlobalBuffers(binding); | ||
| 404 | binding = DeclareUniformTexels(binding); | ||
| 405 | binding = DeclareSamplers(binding); | ||
| 406 | binding = DeclareStorageTexels(binding); | ||
| 407 | binding = DeclareImages(binding); | ||
| 408 | |||
| 409 | const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); | ||
| 410 | AddLabel(); | ||
| 411 | |||
| 412 | if (ir.IsDecompiled()) { | ||
| 413 | DeclareFlowVariables(); | ||
| 414 | DecompileAST(); | ||
| 415 | } else { | ||
| 416 | AllocateLabels(); | ||
| 417 | DecompileBranchMode(); | ||
| 418 | } | ||
| 419 | |||
| 420 | OpReturn(); | ||
| 421 | OpFunctionEnd(); | ||
| 422 | |||
| 423 | return main; | ||
| 424 | } | ||
| 425 | |||
| 426 | void DefinePrologue() { | ||
| 427 | if (stage == ShaderType::Vertex) { | ||
| 428 | // Clear Position to avoid reading trash on the Z conversion. | ||
| 429 | const auto position_index = out_indices.position.value(); | ||
| 430 | const Id position = AccessElement(t_out_float4, out_vertex, position_index); | ||
| 431 | OpStore(position, v_varying_default); | ||
| 432 | |||
| 433 | if (specialization.point_size) { | ||
| 434 | const u32 point_size_index = out_indices.point_size.value(); | ||
| 435 | const Id out_point_size = AccessElement(t_out_float, out_vertex, point_size_index); | ||
| 436 | OpStore(out_point_size, Constant(t_float, *specialization.point_size)); | ||
| 437 | } | ||
| 438 | } | ||
| 439 | } | ||
| 440 | |||
| 441 | void DecompileAST(); | ||
| 442 | |||
| 443 | void DecompileBranchMode() { | ||
| 444 | const u32 first_address = ir.GetBasicBlocks().begin()->first; | ||
| 445 | const Id loop_label = OpLabel("loop"); | ||
| 446 | const Id merge_label = OpLabel("merge"); | ||
| 447 | const Id dummy_label = OpLabel(); | ||
| 448 | const Id jump_label = OpLabel(); | ||
| 449 | continue_label = OpLabel("continue"); | ||
| 450 | |||
| 451 | std::vector<Sirit::Literal> literals; | ||
| 452 | std::vector<Id> branch_labels; | ||
| 453 | for (const auto& [literal, label] : labels) { | ||
| 454 | literals.push_back(literal); | ||
| 455 | branch_labels.push_back(label); | ||
| 456 | } | ||
| 457 | |||
| 458 | jmp_to = OpVariable(TypePointer(spv::StorageClass::Function, t_uint), | ||
| 459 | spv::StorageClass::Function, Constant(t_uint, first_address)); | ||
| 460 | AddLocalVariable(jmp_to); | ||
| 461 | |||
| 462 | std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); | ||
| 463 | std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); | ||
| 464 | |||
| 465 | Name(jmp_to, "jmp_to"); | ||
| 466 | Name(ssy_flow_stack, "ssy_flow_stack"); | ||
| 467 | Name(ssy_flow_stack_top, "ssy_flow_stack_top"); | ||
| 468 | Name(pbk_flow_stack, "pbk_flow_stack"); | ||
| 469 | Name(pbk_flow_stack_top, "pbk_flow_stack_top"); | ||
| 470 | |||
| 471 | DefinePrologue(); | ||
| 472 | |||
| 473 | OpBranch(loop_label); | ||
| 474 | AddLabel(loop_label); | ||
| 475 | OpLoopMerge(merge_label, continue_label, spv::LoopControlMask::MaskNone); | ||
| 476 | OpBranch(dummy_label); | ||
| 477 | |||
| 478 | AddLabel(dummy_label); | ||
| 479 | const Id default_branch = OpLabel(); | ||
| 480 | const Id jmp_to_load = OpLoad(t_uint, jmp_to); | ||
| 481 | OpSelectionMerge(jump_label, spv::SelectionControlMask::MaskNone); | ||
| 482 | OpSwitch(jmp_to_load, default_branch, literals, branch_labels); | ||
| 483 | |||
| 484 | AddLabel(default_branch); | ||
| 485 | OpReturn(); | ||
| 486 | |||
| 487 | for (const auto& [address, bb] : ir.GetBasicBlocks()) { | ||
| 488 | AddLabel(labels.at(address)); | ||
| 489 | |||
| 490 | VisitBasicBlock(bb); | ||
| 491 | |||
| 492 | const auto next_it = labels.lower_bound(address + 1); | ||
| 493 | const Id next_label = next_it != labels.end() ? next_it->second : default_branch; | ||
| 494 | OpBranch(next_label); | ||
| 495 | } | ||
| 496 | |||
| 497 | AddLabel(jump_label); | ||
| 498 | OpBranch(continue_label); | ||
| 499 | AddLabel(continue_label); | ||
| 500 | OpBranch(loop_label); | ||
| 501 | AddLabel(merge_label); | ||
| 502 | } | ||
| 503 | |||
| 504 | private: | ||
| 505 | friend class ASTDecompiler; | ||
| 506 | friend class ExprDecompiler; | ||
| 507 | |||
| 508 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | ||
| 509 | |||
| 510 | void AllocateLabels() { | ||
| 511 | for (const auto& pair : ir.GetBasicBlocks()) { | ||
| 512 | const u32 address = pair.first; | ||
| 513 | labels.emplace(address, OpLabel(fmt::format("label_0x{:x}", address))); | ||
| 514 | } | ||
| 515 | } | ||
| 516 | |||
| 517 | void DeclareCommon() { | ||
| 518 | thread_id = | ||
| 519 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); | ||
| 520 | thread_masks[0] = | ||
| 521 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); | ||
| 522 | thread_masks[1] = | ||
| 523 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); | ||
| 524 | thread_masks[2] = | ||
| 525 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); | ||
| 526 | thread_masks[3] = | ||
| 527 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); | ||
| 528 | thread_masks[4] = | ||
| 529 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); | ||
| 530 | } | ||
| 531 | |||
| 532 | void DeclareVertex() { | ||
| 533 | if (stage != ShaderType::Vertex) { | ||
| 534 | return; | ||
| 535 | } | ||
| 536 | Id out_vertex_struct; | ||
| 537 | std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); | ||
| 538 | const Id vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); | ||
| 539 | out_vertex = OpVariable(vertex_ptr, spv::StorageClass::Output); | ||
| 540 | interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); | ||
| 541 | |||
| 542 | // Declare input attributes | ||
| 543 | vertex_index = DeclareInputBuiltIn(spv::BuiltIn::VertexIndex, t_in_int, "vertex_index"); | ||
| 544 | instance_index = | ||
| 545 | DeclareInputBuiltIn(spv::BuiltIn::InstanceIndex, t_in_int, "instance_index"); | ||
| 546 | base_vertex = DeclareInputBuiltIn(spv::BuiltIn::BaseVertex, t_in_int, "base_vertex"); | ||
| 547 | base_instance = DeclareInputBuiltIn(spv::BuiltIn::BaseInstance, t_in_int, "base_instance"); | ||
| 548 | } | ||
| 549 | |||
| 550 | void DeclareTessControl() { | ||
| 551 | if (stage != ShaderType::TesselationControl) { | ||
| 552 | return; | ||
| 553 | } | ||
| 554 | DeclareInputVertexArray(NumInputPatches); | ||
| 555 | DeclareOutputVertexArray(header.common2.threads_per_input_primitive); | ||
| 556 | |||
| 557 | tess_level_outer = DeclareBuiltIn( | ||
| 558 | spv::BuiltIn::TessLevelOuter, spv::StorageClass::Output, | ||
| 559 | TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 4U))), | ||
| 560 | "tess_level_outer"); | ||
| 561 | Decorate(tess_level_outer, spv::Decoration::Patch); | ||
| 562 | |||
| 563 | tess_level_inner = DeclareBuiltIn( | ||
| 564 | spv::BuiltIn::TessLevelInner, spv::StorageClass::Output, | ||
| 565 | TypePointer(spv::StorageClass::Output, TypeArray(t_float, Constant(t_uint, 2U))), | ||
| 566 | "tess_level_inner"); | ||
| 567 | Decorate(tess_level_inner, spv::Decoration::Patch); | ||
| 568 | |||
| 569 | invocation_id = DeclareInputBuiltIn(spv::BuiltIn::InvocationId, t_in_int, "invocation_id"); | ||
| 570 | } | ||
| 571 | |||
| 572 | void DeclareTessEval() { | ||
| 573 | if (stage != ShaderType::TesselationEval) { | ||
| 574 | return; | ||
| 575 | } | ||
| 576 | DeclareInputVertexArray(NumInputPatches); | ||
| 577 | DeclareOutputVertex(); | ||
| 578 | |||
| 579 | tess_coord = DeclareInputBuiltIn(spv::BuiltIn::TessCoord, t_in_float3, "tess_coord"); | ||
| 580 | } | ||
| 581 | |||
| 582 | void DeclareGeometry() { | ||
| 583 | if (stage != ShaderType::Geometry) { | ||
| 584 | return; | ||
| 585 | } | ||
| 586 | const auto& info = registry.GetGraphicsInfo(); | ||
| 587 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 588 | DeclareInputVertexArray(num_input); | ||
| 589 | DeclareOutputVertex(); | ||
| 590 | } | ||
| 591 | |||
| 592 | void DeclareFragment() { | ||
| 593 | if (stage != ShaderType::Fragment) { | ||
| 594 | return; | ||
| 595 | } | ||
| 596 | |||
| 597 | for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) { | ||
| 598 | if (!IsRenderTargetEnabled(rt)) { | ||
| 599 | continue; | ||
| 600 | } | ||
| 601 | const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output)); | ||
| 602 | Name(id, fmt::format("frag_color{}", rt)); | ||
| 603 | Decorate(id, spv::Decoration::Location, rt); | ||
| 604 | |||
| 605 | frag_colors[rt] = id; | ||
| 606 | interfaces.push_back(id); | ||
| 607 | } | ||
| 608 | |||
| 609 | if (header.ps.omap.depth) { | ||
| 610 | frag_depth = AddGlobalVariable(OpVariable(t_out_float, spv::StorageClass::Output)); | ||
| 611 | Name(frag_depth, "frag_depth"); | ||
| 612 | Decorate(frag_depth, spv::Decoration::BuiltIn, | ||
| 613 | static_cast<u32>(spv::BuiltIn::FragDepth)); | ||
| 614 | |||
| 615 | interfaces.push_back(frag_depth); | ||
| 616 | } | ||
| 617 | |||
| 618 | frag_coord = DeclareInputBuiltIn(spv::BuiltIn::FragCoord, t_in_float4, "frag_coord"); | ||
| 619 | front_facing = DeclareInputBuiltIn(spv::BuiltIn::FrontFacing, t_in_bool, "front_facing"); | ||
| 620 | point_coord = DeclareInputBuiltIn(spv::BuiltIn::PointCoord, t_in_float2, "point_coord"); | ||
| 621 | } | ||
| 622 | |||
| 623 | void DeclareCompute() { | ||
| 624 | if (stage != ShaderType::Compute) { | ||
| 625 | return; | ||
| 626 | } | ||
| 627 | |||
| 628 | workgroup_id = DeclareInputBuiltIn(spv::BuiltIn::WorkgroupId, t_in_uint3, "workgroup_id"); | ||
| 629 | local_invocation_id = | ||
| 630 | DeclareInputBuiltIn(spv::BuiltIn::LocalInvocationId, t_in_uint3, "local_invocation_id"); | ||
| 631 | } | ||
| 632 | |||
| 633 | void DeclareRegisters() { | ||
| 634 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 635 | const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); | ||
| 636 | Name(id, fmt::format("gpr_{}", gpr)); | ||
| 637 | registers.emplace(gpr, AddGlobalVariable(id)); | ||
| 638 | } | ||
| 639 | } | ||
| 640 | |||
| 641 | void DeclareCustomVariables() { | ||
| 642 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 643 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 644 | const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); | ||
| 645 | Name(id, fmt::format("custom_var_{}", i)); | ||
| 646 | custom_variables.emplace(i, AddGlobalVariable(id)); | ||
| 647 | } | ||
| 648 | } | ||
| 649 | |||
| 650 | void DeclarePredicates() { | ||
| 651 | for (const auto pred : ir.GetPredicates()) { | ||
| 652 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 653 | Name(id, fmt::format("pred_{}", static_cast<u32>(pred))); | ||
| 654 | predicates.emplace(pred, AddGlobalVariable(id)); | ||
| 655 | } | ||
| 656 | } | ||
| 657 | |||
| 658 | void DeclareFlowVariables() { | ||
| 659 | for (u32 i = 0; i < ir.GetASTNumVariables(); i++) { | ||
| 660 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 661 | Name(id, fmt::format("flow_var_{}", static_cast<u32>(i))); | ||
| 662 | flow_variables.emplace(i, AddGlobalVariable(id)); | ||
| 663 | } | ||
| 664 | } | ||
| 665 | |||
| 666 | void DeclareLocalMemory() { | ||
| 667 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at | ||
| 668 | // specialization time. | ||
| 669 | const u64 lmem_size = stage == ShaderType::Compute ? 0x400 : header.GetLocalMemorySize(); | ||
| 670 | if (lmem_size == 0) { | ||
| 671 | return; | ||
| 672 | } | ||
| 673 | const auto element_count = static_cast<u32>(Common::AlignUp(lmem_size, 4) / 4); | ||
| 674 | const Id type_array = TypeArray(t_float, Constant(t_uint, element_count)); | ||
| 675 | const Id type_pointer = TypePointer(spv::StorageClass::Private, type_array); | ||
| 676 | Name(type_pointer, "LocalMemory"); | ||
| 677 | |||
| 678 | local_memory = | ||
| 679 | OpVariable(type_pointer, spv::StorageClass::Private, ConstantNull(type_array)); | ||
| 680 | AddGlobalVariable(Name(local_memory, "local_memory")); | ||
| 681 | } | ||
| 682 | |||
| 683 | void DeclareSharedMemory() { | ||
| 684 | if (stage != ShaderType::Compute) { | ||
| 685 | return; | ||
| 686 | } | ||
| 687 | t_smem_uint = TypePointer(spv::StorageClass::Workgroup, t_uint); | ||
| 688 | |||
| 689 | u32 smem_size = specialization.shared_memory_size * 4; | ||
| 690 | if (smem_size == 0) { | ||
| 691 | // Avoid declaring an empty array. | ||
| 692 | return; | ||
| 693 | } | ||
| 694 | const u32 limit = device.GetMaxComputeSharedMemorySize(); | ||
| 695 | if (smem_size > limit) { | ||
| 696 | LOG_ERROR(Render_Vulkan, "Shared memory size {} is clamped to host's limit {}", | ||
| 697 | smem_size, limit); | ||
| 698 | smem_size = limit; | ||
| 699 | } | ||
| 700 | |||
| 701 | const Id type_array = TypeArray(t_uint, Constant(t_uint, smem_size / 4)); | ||
| 702 | const Id type_pointer = TypePointer(spv::StorageClass::Workgroup, type_array); | ||
| 703 | Name(type_pointer, "SharedMemory"); | ||
| 704 | |||
| 705 | shared_memory = OpVariable(type_pointer, spv::StorageClass::Workgroup); | ||
| 706 | AddGlobalVariable(Name(shared_memory, "shared_memory")); | ||
| 707 | } | ||
| 708 | |||
| 709 | void DeclareInternalFlags() { | ||
| 710 | static constexpr std::array names{"zero", "sign", "carry", "overflow"}; | ||
| 711 | |||
| 712 | for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { | ||
| 713 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 714 | internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | void DeclareInputVertexArray(u32 length) { | ||
| 719 | constexpr auto storage = spv::StorageClass::Input; | ||
| 720 | std::tie(in_indices, in_vertex) = DeclareVertexArray(storage, "in_indices", length); | ||
| 721 | } | ||
| 722 | |||
| 723 | void DeclareOutputVertexArray(u32 length) { | ||
| 724 | constexpr auto storage = spv::StorageClass::Output; | ||
| 725 | std::tie(out_indices, out_vertex) = DeclareVertexArray(storage, "out_indices", length); | ||
| 726 | } | ||
| 727 | |||
| 728 | std::tuple<VertexIndices, Id> DeclareVertexArray(spv::StorageClass storage_class, | ||
| 729 | std::string name, u32 length) { | ||
| 730 | const auto [struct_id, indices] = DeclareVertexStruct(); | ||
| 731 | const Id vertex_array = TypeArray(struct_id, Constant(t_uint, length)); | ||
| 732 | const Id vertex_ptr = TypePointer(storage_class, vertex_array); | ||
| 733 | const Id vertex = OpVariable(vertex_ptr, storage_class); | ||
| 734 | AddGlobalVariable(Name(vertex, std::move(name))); | ||
| 735 | interfaces.push_back(vertex); | ||
| 736 | return {indices, vertex}; | ||
| 737 | } | ||
| 738 | |||
| 739 | void DeclareOutputVertex() { | ||
| 740 | Id out_vertex_struct; | ||
| 741 | std::tie(out_vertex_struct, out_indices) = DeclareVertexStruct(); | ||
| 742 | const Id out_vertex_ptr = TypePointer(spv::StorageClass::Output, out_vertex_struct); | ||
| 743 | out_vertex = OpVariable(out_vertex_ptr, spv::StorageClass::Output); | ||
| 744 | interfaces.push_back(AddGlobalVariable(Name(out_vertex, "out_vertex"))); | ||
| 745 | } | ||
| 746 | |||
| 747 | void DeclareInputAttributes() { | ||
| 748 | for (const auto index : ir.GetInputAttributes()) { | ||
| 749 | if (!IsGenericAttribute(index)) { | ||
| 750 | continue; | ||
| 751 | } | ||
| 752 | const u32 location = GetGenericAttributeLocation(index); | ||
| 753 | if (!IsAttributeEnabled(location)) { | ||
| 754 | continue; | ||
| 755 | } | ||
| 756 | const auto type_descriptor = GetAttributeType(location); | ||
| 757 | Id type; | ||
| 758 | if (IsInputAttributeArray()) { | ||
| 759 | type = GetTypeVectorDefinitionLut(type_descriptor.type).at(3); | ||
| 760 | type = TypeArray(type, Constant(t_uint, GetNumInputVertices())); | ||
| 761 | type = TypePointer(spv::StorageClass::Input, type); | ||
| 762 | } else { | ||
| 763 | type = type_descriptor.vector; | ||
| 764 | } | ||
| 765 | const Id id = OpVariable(type, spv::StorageClass::Input); | ||
| 766 | AddGlobalVariable(Name(id, fmt::format("in_attr{}", location))); | ||
| 767 | input_attributes.emplace(index, id); | ||
| 768 | interfaces.push_back(id); | ||
| 769 | |||
| 770 | Decorate(id, spv::Decoration::Location, location); | ||
| 771 | |||
| 772 | if (stage != ShaderType::Fragment) { | ||
| 773 | continue; | ||
| 774 | } | ||
| 775 | switch (header.ps.GetPixelImap(location)) { | ||
| 776 | case PixelImap::Constant: | ||
| 777 | Decorate(id, spv::Decoration::Flat); | ||
| 778 | break; | ||
| 779 | case PixelImap::Perspective: | ||
| 780 | // Default | ||
| 781 | break; | ||
| 782 | case PixelImap::ScreenLinear: | ||
| 783 | Decorate(id, spv::Decoration::NoPerspective); | ||
| 784 | break; | ||
| 785 | default: | ||
| 786 | UNREACHABLE_MSG("Unused attribute being fetched"); | ||
| 787 | } | ||
| 788 | } | ||
| 789 | } | ||
| 790 | |||
| 791 | void DeclareOutputAttributes() { | ||
| 792 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 793 | return; | ||
| 794 | } | ||
| 795 | |||
| 796 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 797 | for (const auto index : ir.GetOutputAttributes()) { | ||
| 798 | if (!IsGenericAttribute(index)) { | ||
| 799 | continue; | ||
| 800 | } | ||
| 801 | DeclareOutputAttribute(index); | ||
| 802 | } | ||
| 803 | } | ||
| 804 | |||
| 805 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 806 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 807 | |||
| 808 | const u32 location = GetGenericAttributeLocation(index); | ||
| 809 | u8 element = 0; | ||
| 810 | while (element < 4) { | ||
| 811 | const std::size_t remainder = 4 - element; | ||
| 812 | |||
| 813 | std::size_t num_components = remainder; | ||
| 814 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 815 | if (tfb) { | ||
| 816 | num_components = tfb->components; | ||
| 817 | } | ||
| 818 | |||
| 819 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 820 | Id varying_default = v_varying_default; | ||
| 821 | if (IsOutputAttributeArray()) { | ||
| 822 | const u32 num = GetNumOutputVertices(); | ||
| 823 | type = TypeArray(type, Constant(t_uint, num)); | ||
| 824 | if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { | ||
| 825 | // Intel's proprietary driver fails to setup defaults for arrayed output | ||
| 826 | // attributes. | ||
| 827 | varying_default = ConstantComposite(type, std::vector(num, varying_default)); | ||
| 828 | } | ||
| 829 | } | ||
| 830 | type = TypePointer(spv::StorageClass::Output, type); | ||
| 831 | |||
| 832 | std::string name = fmt::format("out_attr{}", location); | ||
| 833 | if (num_components < 4 || element > 0) { | ||
| 834 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 835 | } | ||
| 836 | |||
| 837 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | ||
| 838 | Name(AddGlobalVariable(id), name); | ||
| 839 | |||
| 840 | GenericVaryingDescription description; | ||
| 841 | description.id = id; | ||
| 842 | description.first_element = element; | ||
| 843 | description.is_scalar = num_components == 1; | ||
| 844 | for (u32 i = 0; i < num_components; ++i) { | ||
| 845 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 846 | output_attributes.emplace(offset, description); | ||
| 847 | } | ||
| 848 | interfaces.push_back(id); | ||
| 849 | |||
| 850 | Decorate(id, spv::Decoration::Location, location); | ||
| 851 | if (element > 0) { | ||
| 852 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 853 | } | ||
| 854 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 855 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 856 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 857 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 858 | } | ||
| 859 | |||
| 860 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 861 | } | ||
| 862 | } | ||
| 863 | |||
| 864 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 865 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 866 | const auto it = transform_feedback.find(location); | ||
| 867 | if (it == transform_feedback.end()) { | ||
| 868 | return {}; | ||
| 869 | } | ||
| 870 | return it->second; | ||
| 871 | } | ||
| 872 | |||
| 873 | u32 DeclareConstantBuffers(u32 binding) { | ||
| 874 | for (const auto& [index, size] : ir.GetConstantBuffers()) { | ||
| 875 | const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo | ||
| 876 | : t_cbuf_std140_ubo; | ||
| 877 | const Id id = OpVariable(type, spv::StorageClass::Uniform); | ||
| 878 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); | ||
| 879 | |||
| 880 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 881 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 882 | constant_buffers.emplace(index, id); | ||
| 883 | } | ||
| 884 | return binding; | ||
| 885 | } | ||
| 886 | |||
| 887 | u32 DeclareGlobalBuffers(u32 binding) { | ||
| 888 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 889 | const Id id = OpVariable(t_gmem_ssbo, spv::StorageClass::StorageBuffer); | ||
| 890 | AddGlobalVariable( | ||
| 891 | Name(id, fmt::format("gmem_{}_{}", base.cbuf_index, base.cbuf_offset))); | ||
| 892 | |||
| 893 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 894 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 895 | global_buffers.emplace(base, id); | ||
| 896 | } | ||
| 897 | return binding; | ||
| 898 | } | ||
| 899 | |||
| 900 | u32 DeclareUniformTexels(u32 binding) { | ||
| 901 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 902 | if (!sampler.is_buffer) { | ||
| 903 | continue; | ||
| 904 | } | ||
| 905 | ASSERT(!sampler.is_array); | ||
| 906 | ASSERT(!sampler.is_shadow); | ||
| 907 | |||
| 908 | constexpr auto dim = spv::Dim::Buffer; | ||
| 909 | constexpr int depth = 0; | ||
| 910 | constexpr int arrayed = 0; | ||
| 911 | constexpr bool ms = false; | ||
| 912 | constexpr int sampled = 1; | ||
| 913 | constexpr auto format = spv::ImageFormat::Unknown; | ||
| 914 | const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); | ||
| 915 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 916 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 917 | AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); | ||
| 918 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 919 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 920 | |||
| 921 | uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); | ||
| 922 | } | ||
| 923 | return binding; | ||
| 924 | } | ||
| 925 | |||
| 926 | u32 DeclareSamplers(u32 binding) { | ||
| 927 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 928 | if (sampler.is_buffer) { | ||
| 929 | continue; | ||
| 930 | } | ||
| 931 | const auto dim = GetSamplerDim(sampler); | ||
| 932 | const int depth = sampler.is_shadow ? 1 : 0; | ||
| 933 | const int arrayed = sampler.is_array ? 1 : 0; | ||
| 934 | constexpr bool ms = false; | ||
| 935 | constexpr int sampled = 1; | ||
| 936 | constexpr auto format = spv::ImageFormat::Unknown; | ||
| 937 | const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); | ||
| 938 | const Id sampler_type = TypeSampledImage(image_type); | ||
| 939 | const Id sampler_pointer_type = | ||
| 940 | TypePointer(spv::StorageClass::UniformConstant, sampler_type); | ||
| 941 | const Id type = sampler.is_indexed | ||
| 942 | ? TypeArray(sampler_type, Constant(t_uint, sampler.size)) | ||
| 943 | : sampler_type; | ||
| 944 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type); | ||
| 945 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 946 | AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.index))); | ||
| 947 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 948 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 949 | |||
| 950 | sampled_images.emplace( | ||
| 951 | sampler.index, SampledImage{image_type, sampler_type, sampler_pointer_type, id}); | ||
| 952 | } | ||
| 953 | return binding; | ||
| 954 | } | ||
| 955 | |||
| 956 | u32 DeclareStorageTexels(u32 binding) { | ||
| 957 | for (const auto& image : ir.GetImages()) { | ||
| 958 | if (image.type != Tegra::Shader::ImageType::TextureBuffer) { | ||
| 959 | continue; | ||
| 960 | } | ||
| 961 | DeclareImage(image, binding); | ||
| 962 | } | ||
| 963 | return binding; | ||
| 964 | } | ||
| 965 | |||
| 966 | u32 DeclareImages(u32 binding) { | ||
| 967 | for (const auto& image : ir.GetImages()) { | ||
| 968 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { | ||
| 969 | continue; | ||
| 970 | } | ||
| 971 | DeclareImage(image, binding); | ||
| 972 | } | ||
| 973 | return binding; | ||
| 974 | } | ||
| 975 | |||
| 976 | void DeclareImage(const ImageEntry& image, u32& binding) { | ||
| 977 | const auto [dim, arrayed] = GetImageDim(image); | ||
| 978 | constexpr int depth = 0; | ||
| 979 | constexpr bool ms = false; | ||
| 980 | constexpr int sampled = 2; // This won't be accessed with a sampler | ||
| 981 | const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; | ||
| 982 | const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); | ||
| 983 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 984 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 985 | AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); | ||
| 986 | |||
| 987 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 988 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 989 | if (image.is_read && !image.is_written) { | ||
| 990 | Decorate(id, spv::Decoration::NonWritable); | ||
| 991 | } else if (image.is_written && !image.is_read) { | ||
| 992 | Decorate(id, spv::Decoration::NonReadable); | ||
| 993 | } | ||
| 994 | |||
| 995 | images.emplace(image.index, StorageImage{image_type, id}); | ||
| 996 | } | ||
| 997 | |||
| 998 | bool IsRenderTargetEnabled(u32 rt) const { | ||
| 999 | for (u32 component = 0; component < 4; ++component) { | ||
| 1000 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1001 | return true; | ||
| 1002 | } | ||
| 1003 | } | ||
| 1004 | return false; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | bool IsInputAttributeArray() const { | ||
| 1008 | return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval || | ||
| 1009 | stage == ShaderType::Geometry; | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | bool IsOutputAttributeArray() const { | ||
| 1013 | return stage == ShaderType::TesselationControl; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | bool IsAttributeEnabled(u32 location) const { | ||
| 1017 | return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | u32 GetNumInputVertices() const { | ||
| 1021 | switch (stage) { | ||
| 1022 | case ShaderType::Geometry: | ||
| 1023 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); | ||
| 1024 | case ShaderType::TesselationControl: | ||
| 1025 | case ShaderType::TesselationEval: | ||
| 1026 | return NumInputPatches; | ||
| 1027 | default: | ||
| 1028 | UNREACHABLE(); | ||
| 1029 | return 1; | ||
| 1030 | } | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | u32 GetNumOutputVertices() const { | ||
| 1034 | switch (stage) { | ||
| 1035 | case ShaderType::TesselationControl: | ||
| 1036 | return header.common2.threads_per_input_primitive; | ||
| 1037 | default: | ||
| 1038 | UNREACHABLE(); | ||
| 1039 | return 1; | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | std::tuple<Id, VertexIndices> DeclareVertexStruct() { | ||
| 1044 | struct BuiltIn { | ||
| 1045 | Id type; | ||
| 1046 | spv::BuiltIn builtin; | ||
| 1047 | const char* name; | ||
| 1048 | }; | ||
| 1049 | std::vector<BuiltIn> members; | ||
| 1050 | members.reserve(4); | ||
| 1051 | |||
| 1052 | const auto AddBuiltIn = [&](Id type, spv::BuiltIn builtin, const char* name) { | ||
| 1053 | const auto index = static_cast<u32>(members.size()); | ||
| 1054 | members.push_back(BuiltIn{type, builtin, name}); | ||
| 1055 | return index; | ||
| 1056 | }; | ||
| 1057 | |||
| 1058 | VertexIndices indices; | ||
| 1059 | indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); | ||
| 1060 | |||
| 1061 | if (ir.UsesLayer()) { | ||
| 1062 | if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { | ||
| 1063 | indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); | ||
| 1064 | } else { | ||
| 1065 | LOG_ERROR( | ||
| 1066 | Render_Vulkan, | ||
| 1067 | "Shader requires Layer but it's not supported on this stage with this device."); | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | if (ir.UsesViewportIndex()) { | ||
| 1072 | if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { | ||
| 1073 | indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); | ||
| 1074 | } else { | ||
| 1075 | LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " | ||
| 1076 | "this stage with this device."); | ||
| 1077 | } | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | if (ir.UsesPointSize() || specialization.point_size) { | ||
| 1081 | indices.point_size = AddBuiltIn(t_float, spv::BuiltIn::PointSize, "point_size"); | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | const auto& ir_output_attributes = ir.GetOutputAttributes(); | ||
| 1085 | const bool declare_clip_distances = std::any_of( | ||
| 1086 | ir_output_attributes.begin(), ir_output_attributes.end(), [](const auto& index) { | ||
| 1087 | return index == Attribute::Index::ClipDistances0123 || | ||
| 1088 | index == Attribute::Index::ClipDistances4567; | ||
| 1089 | }); | ||
| 1090 | if (declare_clip_distances) { | ||
| 1091 | indices.clip_distances = AddBuiltIn(TypeArray(t_float, Constant(t_uint, 8)), | ||
| 1092 | spv::BuiltIn::ClipDistance, "clip_distances"); | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | std::vector<Id> member_types; | ||
| 1096 | member_types.reserve(members.size()); | ||
| 1097 | for (std::size_t i = 0; i < members.size(); ++i) { | ||
| 1098 | member_types.push_back(members[i].type); | ||
| 1099 | } | ||
| 1100 | const Id per_vertex_struct = Name(TypeStruct(member_types), "PerVertex"); | ||
| 1101 | Decorate(per_vertex_struct, spv::Decoration::Block); | ||
| 1102 | |||
| 1103 | for (std::size_t index = 0; index < members.size(); ++index) { | ||
| 1104 | const auto& member = members[index]; | ||
| 1105 | MemberName(per_vertex_struct, static_cast<u32>(index), member.name); | ||
| 1106 | MemberDecorate(per_vertex_struct, static_cast<u32>(index), spv::Decoration::BuiltIn, | ||
| 1107 | static_cast<u32>(member.builtin)); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | return {per_vertex_struct, indices}; | ||
| 1111 | } | ||
| 1112 | |||
| 1113 | void VisitBasicBlock(const NodeBlock& bb) { | ||
| 1114 | for (const auto& node : bb) { | ||
| 1115 | Visit(node); | ||
| 1116 | } | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | Expression Visit(const Node& node) { | ||
| 1120 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1121 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1122 | [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; | ||
| 1123 | ASSERT(type == Type::Void); | ||
| 1124 | } | ||
| 1125 | const auto operation_index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1126 | const auto decompiler = operation_decompilers[operation_index]; | ||
| 1127 | if (decompiler == nullptr) { | ||
| 1128 | UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); | ||
| 1129 | } | ||
| 1130 | return (this->*decompiler)(*operation); | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1134 | const u32 index = gpr->GetIndex(); | ||
| 1135 | if (index == Register::ZeroIndex) { | ||
| 1136 | return {v_float_zero, Type::Float}; | ||
| 1137 | } | ||
| 1138 | return {OpLoad(t_float, registers.at(index)), Type::Float}; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1142 | const u32 index = cv->GetIndex(); | ||
| 1143 | return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1147 | return {Constant(t_uint, immediate->GetValue()), Type::Uint}; | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1151 | const auto value = [&]() -> Id { | ||
| 1152 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1153 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1154 | return v_true; | ||
| 1155 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1156 | return v_false; | ||
| 1157 | default: | ||
| 1158 | return OpLoad(t_bool, predicates.at(index)); | ||
| 1159 | } | ||
| 1160 | }(); | ||
| 1161 | if (predicate->IsNegated()) { | ||
| 1162 | return {OpLogicalNot(t_bool, value), Type::Bool}; | ||
| 1163 | } | ||
| 1164 | return {value, Type::Bool}; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1168 | const auto attribute = abuf->GetIndex(); | ||
| 1169 | const u32 element = abuf->GetElement(); | ||
| 1170 | const auto& buffer = abuf->GetBuffer(); | ||
| 1171 | |||
| 1172 | const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) { | ||
| 1173 | std::vector<Id> members; | ||
| 1174 | members.reserve(std::size(indices) + 1); | ||
| 1175 | |||
| 1176 | if (buffer && IsInputAttributeArray()) { | ||
| 1177 | members.push_back(AsUint(Visit(buffer))); | ||
| 1178 | } | ||
| 1179 | for (const u32 index : indices) { | ||
| 1180 | members.push_back(Constant(t_uint, index)); | ||
| 1181 | } | ||
| 1182 | return OpAccessChain(pointer_type, composite, members); | ||
| 1183 | }; | ||
| 1184 | |||
| 1185 | switch (attribute) { | ||
| 1186 | case Attribute::Index::Position: { | ||
| 1187 | if (stage == ShaderType::Fragment) { | ||
| 1188 | return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), | ||
| 1189 | Type::Float}; | ||
| 1190 | } | ||
| 1191 | const std::vector elements = {in_indices.position.value(), element}; | ||
| 1192 | return {OpLoad(t_float, ArrayPass(t_in_float, in_vertex, elements)), Type::Float}; | ||
| 1193 | } | ||
| 1194 | case Attribute::Index::PointCoord: { | ||
| 1195 | switch (element) { | ||
| 1196 | case 0: | ||
| 1197 | case 1: | ||
| 1198 | return {OpCompositeExtract(t_float, OpLoad(t_float2, point_coord), element), | ||
| 1199 | Type::Float}; | ||
| 1200 | } | ||
| 1201 | UNIMPLEMENTED_MSG("Unimplemented point coord element={}", element); | ||
| 1202 | return {v_float_zero, Type::Float}; | ||
| 1203 | } | ||
| 1204 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1205 | // TODO(Subv): Find out what the values are for the first two elements when inside a | ||
| 1206 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | ||
| 1207 | // shader. | ||
| 1208 | switch (element) { | ||
| 1209 | case 0: | ||
| 1210 | case 1: | ||
| 1211 | return {OpLoad(t_float, AccessElement(t_in_float, tess_coord, element)), | ||
| 1212 | Type::Float}; | ||
| 1213 | case 2: | ||
| 1214 | return { | ||
| 1215 | OpISub(t_int, OpLoad(t_int, instance_index), OpLoad(t_int, base_instance)), | ||
| 1216 | Type::Int}; | ||
| 1217 | case 3: | ||
| 1218 | return {OpISub(t_int, OpLoad(t_int, vertex_index), OpLoad(t_int, base_vertex)), | ||
| 1219 | Type::Int}; | ||
| 1220 | } | ||
| 1221 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1222 | return {Constant(t_uint, 0U), Type::Uint}; | ||
| 1223 | case Attribute::Index::FrontFacing: | ||
| 1224 | // TODO(Subv): Find out what the values are for the other elements. | ||
| 1225 | ASSERT(stage == ShaderType::Fragment); | ||
| 1226 | if (element == 3) { | ||
| 1227 | const Id is_front_facing = OpLoad(t_bool, front_facing); | ||
| 1228 | const Id true_value = Constant(t_int, static_cast<s32>(-1)); | ||
| 1229 | const Id false_value = Constant(t_int, 0); | ||
| 1230 | return {OpSelect(t_int, is_front_facing, true_value, false_value), Type::Int}; | ||
| 1231 | } | ||
| 1232 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | ||
| 1233 | return {v_float_zero, Type::Float}; | ||
| 1234 | default: | ||
| 1235 | if (!IsGenericAttribute(attribute)) { | ||
| 1236 | break; | ||
| 1237 | } | ||
| 1238 | const u32 location = GetGenericAttributeLocation(attribute); | ||
| 1239 | if (!IsAttributeEnabled(location)) { | ||
| 1240 | // Disabled attributes (also known as constant attributes) always return zero. | ||
| 1241 | return {v_float_zero, Type::Float}; | ||
| 1242 | } | ||
| 1243 | const auto type_descriptor = GetAttributeType(location); | ||
| 1244 | const Type type = type_descriptor.type; | ||
| 1245 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1246 | const std::vector elements = {element}; | ||
| 1247 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1248 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1249 | } | ||
| 1250 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", attribute); | ||
| 1251 | return {v_float_zero, Type::Float}; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1255 | const Node& offset = cbuf->GetOffset(); | ||
| 1256 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); | ||
| 1257 | |||
| 1258 | Id pointer{}; | ||
| 1259 | if (device.IsKhrUniformBufferStandardLayoutSupported()) { | ||
| 1260 | const Id buffer_offset = | ||
| 1261 | OpShiftRightLogical(t_uint, AsUint(Visit(offset)), Constant(t_uint, 2U)); | ||
| 1262 | pointer = | ||
| 1263 | OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0U), buffer_offset); | ||
| 1264 | } else { | ||
| 1265 | Id buffer_index{}; | ||
| 1266 | Id buffer_element{}; | ||
| 1267 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1268 | // Direct access | ||
| 1269 | const u32 offset_imm = immediate->GetValue(); | ||
| 1270 | ASSERT(offset_imm % 4 == 0); | ||
| 1271 | buffer_index = Constant(t_uint, offset_imm / 16); | ||
| 1272 | buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||
| 1273 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 1274 | // Indirect access | ||
| 1275 | const Id offset_id = AsUint(Visit(offset)); | ||
| 1276 | const Id unsafe_offset = OpUDiv(t_uint, offset_id, Constant(t_uint, 4)); | ||
| 1277 | const Id final_offset = | ||
| 1278 | OpUMod(t_uint, unsafe_offset, Constant(t_uint, MaxConstBufferElements - 1)); | ||
| 1279 | buffer_index = OpUDiv(t_uint, final_offset, Constant(t_uint, 4)); | ||
| 1280 | buffer_element = OpUMod(t_uint, final_offset, Constant(t_uint, 4)); | ||
| 1281 | } else { | ||
| 1282 | UNREACHABLE_MSG("Unmanaged offset node type"); | ||
| 1283 | } | ||
| 1284 | pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, | ||
| 1285 | buffer_element); | ||
| 1286 | } | ||
| 1287 | return {OpLoad(t_float, pointer), Type::Float}; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1291 | return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; | ||
| 1292 | } | ||
| 1293 | |||
| 1294 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1295 | Id address = AsUint(Visit(lmem->GetAddress())); | ||
| 1296 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 1297 | const Id pointer = OpAccessChain(t_prv_float, local_memory, address); | ||
| 1298 | return {OpLoad(t_float, pointer), Type::Float}; | ||
| 1299 | } | ||
| 1300 | |||
| 1301 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1302 | return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1306 | const Id flag = internal_flags.at(static_cast<std::size_t>(internal_flag->GetFlag())); | ||
| 1307 | return {OpLoad(t_bool, flag), Type::Bool}; | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1311 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1312 | [[maybe_unused]] const Type type = Visit(ir.GetAmendNode(*amend_index)).type; | ||
| 1313 | ASSERT(type == Type::Void); | ||
| 1314 | } | ||
| 1315 | // It's invalid to call conditional on nested nodes, use an operation instead | ||
| 1316 | const Id true_label = OpLabel(); | ||
| 1317 | const Id skip_label = OpLabel(); | ||
| 1318 | const Id condition = AsBool(Visit(conditional->GetCondition())); | ||
| 1319 | OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone); | ||
| 1320 | OpBranchConditional(condition, true_label, skip_label); | ||
| 1321 | AddLabel(true_label); | ||
| 1322 | |||
| 1323 | conditional_branch_set = true; | ||
| 1324 | inside_branch = false; | ||
| 1325 | VisitBasicBlock(conditional->GetCode()); | ||
| 1326 | conditional_branch_set = false; | ||
| 1327 | if (!inside_branch) { | ||
| 1328 | OpBranch(skip_label); | ||
| 1329 | } else { | ||
| 1330 | inside_branch = false; | ||
| 1331 | } | ||
| 1332 | AddLabel(skip_label); | ||
| 1333 | return {}; | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | if (const auto comment = std::get_if<CommentNode>(&*node)) { | ||
| 1337 | if (device.HasDebuggingToolAttached()) { | ||
| 1338 | // We should insert comments with OpString instead of using named variables | ||
| 1339 | Name(OpUndef(t_int), comment->GetText()); | ||
| 1340 | } | ||
| 1341 | return {}; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | UNREACHABLE(); | ||
| 1345 | return {}; | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | template <Id (Module::*func)(Id, Id), Type result_type, Type type_a = result_type> | ||
| 1349 | Expression Unary(Operation operation) { | ||
| 1350 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1351 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1352 | |||
| 1353 | const Id value = (this->*func)(type_def, op_a); | ||
| 1354 | if (IsPrecise(operation)) { | ||
| 1355 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1356 | } | ||
| 1357 | return {value, result_type}; | ||
| 1358 | } | ||
| 1359 | |||
| 1360 | template <Id (Module::*func)(Id, Id, Id), Type result_type, Type type_a = result_type, | ||
| 1361 | Type type_b = type_a> | ||
| 1362 | Expression Binary(Operation operation) { | ||
| 1363 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1364 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1365 | const Id op_b = As(Visit(operation[1]), type_b); | ||
| 1366 | |||
| 1367 | const Id value = (this->*func)(type_def, op_a, op_b); | ||
| 1368 | if (IsPrecise(operation)) { | ||
| 1369 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1370 | } | ||
| 1371 | return {value, result_type}; | ||
| 1372 | } | ||
| 1373 | |||
| 1374 | template <Id (Module::*func)(Id, Id, Id, Id), Type result_type, Type type_a = result_type, | ||
| 1375 | Type type_b = type_a, Type type_c = type_b> | ||
| 1376 | Expression Ternary(Operation operation) { | ||
| 1377 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1378 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1379 | const Id op_b = As(Visit(operation[1]), type_b); | ||
| 1380 | const Id op_c = As(Visit(operation[2]), type_c); | ||
| 1381 | |||
| 1382 | const Id value = (this->*func)(type_def, op_a, op_b, op_c); | ||
| 1383 | if (IsPrecise(operation)) { | ||
| 1384 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1385 | } | ||
| 1386 | return {value, result_type}; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, Type type_a = result_type, | ||
| 1390 | Type type_b = type_a, Type type_c = type_b, Type type_d = type_c> | ||
| 1391 | Expression Quaternary(Operation operation) { | ||
| 1392 | const Id type_def = GetTypeDefinition(result_type); | ||
| 1393 | const Id op_a = As(Visit(operation[0]), type_a); | ||
| 1394 | const Id op_b = As(Visit(operation[1]), type_b); | ||
| 1395 | const Id op_c = As(Visit(operation[2]), type_c); | ||
| 1396 | const Id op_d = As(Visit(operation[3]), type_d); | ||
| 1397 | |||
| 1398 | const Id value = (this->*func)(type_def, op_a, op_b, op_c, op_d); | ||
| 1399 | if (IsPrecise(operation)) { | ||
| 1400 | Decorate(value, spv::Decoration::NoContraction); | ||
| 1401 | } | ||
| 1402 | return {value, result_type}; | ||
| 1403 | } | ||
| 1404 | |||
| 1405 | Expression Assign(Operation operation) { | ||
| 1406 | const Node& dest = operation[0]; | ||
| 1407 | const Node& src = operation[1]; | ||
| 1408 | |||
| 1409 | Expression target{}; | ||
| 1410 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1411 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1412 | // Writing to Register::ZeroIndex is a no op but we still have to visit its source | ||
| 1413 | // because it might have side effects. | ||
| 1414 | Visit(src); | ||
| 1415 | return {}; | ||
| 1416 | } | ||
| 1417 | target = {registers.at(gpr->GetIndex()), Type::Float}; | ||
| 1418 | |||
| 1419 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1420 | const auto& buffer = abuf->GetBuffer(); | ||
| 1421 | const auto ArrayPass = [&](Id pointer_type, Id composite, std::vector<u32> indices) { | ||
| 1422 | std::vector<Id> members; | ||
| 1423 | members.reserve(std::size(indices) + 1); | ||
| 1424 | |||
| 1425 | if (buffer && IsOutputAttributeArray()) { | ||
| 1426 | members.push_back(AsUint(Visit(buffer))); | ||
| 1427 | } | ||
| 1428 | for (const u32 index : indices) { | ||
| 1429 | members.push_back(Constant(t_uint, index)); | ||
| 1430 | } | ||
| 1431 | return OpAccessChain(pointer_type, composite, members); | ||
| 1432 | }; | ||
| 1433 | |||
| 1434 | target = [&]() -> Expression { | ||
| 1435 | const u32 element = abuf->GetElement(); | ||
| 1436 | switch (const auto attribute = abuf->GetIndex(); attribute) { | ||
| 1437 | case Attribute::Index::Position: { | ||
| 1438 | const u32 index = out_indices.position.value(); | ||
| 1439 | return {ArrayPass(t_out_float, out_vertex, {index, element}), Type::Float}; | ||
| 1440 | } | ||
| 1441 | case Attribute::Index::LayerViewportPointSize: | ||
| 1442 | switch (element) { | ||
| 1443 | case 1: { | ||
| 1444 | if (!out_indices.layer) { | ||
| 1445 | return {}; | ||
| 1446 | } | ||
| 1447 | const u32 index = out_indices.layer.value(); | ||
| 1448 | return {AccessElement(t_out_int, out_vertex, index), Type::Int}; | ||
| 1449 | } | ||
| 1450 | case 2: { | ||
| 1451 | if (!out_indices.viewport) { | ||
| 1452 | return {}; | ||
| 1453 | } | ||
| 1454 | const u32 index = out_indices.viewport.value(); | ||
| 1455 | return {AccessElement(t_out_int, out_vertex, index), Type::Int}; | ||
| 1456 | } | ||
| 1457 | case 3: { | ||
| 1458 | const auto index = out_indices.point_size.value(); | ||
| 1459 | return {AccessElement(t_out_float, out_vertex, index), Type::Float}; | ||
| 1460 | } | ||
| 1461 | default: | ||
| 1462 | UNIMPLEMENTED_MSG("LayerViewportPoint element={}", abuf->GetElement()); | ||
| 1463 | return {}; | ||
| 1464 | } | ||
| 1465 | case Attribute::Index::ClipDistances0123: { | ||
| 1466 | const u32 index = out_indices.clip_distances.value(); | ||
| 1467 | return {AccessElement(t_out_float, out_vertex, index, element), Type::Float}; | ||
| 1468 | } | ||
| 1469 | case Attribute::Index::ClipDistances4567: { | ||
| 1470 | const u32 index = out_indices.clip_distances.value(); | ||
| 1471 | return {AccessElement(t_out_float, out_vertex, index, element + 4), | ||
| 1472 | Type::Float}; | ||
| 1473 | } | ||
| 1474 | default: | ||
| 1475 | if (IsGenericAttribute(attribute)) { | ||
| 1476 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); | ||
| 1477 | const GenericVaryingDescription description = output_attributes.at(offset); | ||
| 1478 | const Id composite = description.id; | ||
| 1479 | std::vector<u32> indices; | ||
| 1480 | if (!description.is_scalar) { | ||
| 1481 | indices.push_back(element - description.first_element); | ||
| 1482 | } | ||
| 1483 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1484 | } | ||
| 1485 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 1486 | static_cast<u32>(attribute)); | ||
| 1487 | return {}; | ||
| 1488 | } | ||
| 1489 | }(); | ||
| 1490 | |||
| 1491 | } else if (const auto patch = std::get_if<PatchNode>(&*dest)) { | ||
| 1492 | target = [&]() -> Expression { | ||
| 1493 | const u32 offset = patch->GetOffset(); | ||
| 1494 | switch (offset) { | ||
| 1495 | case 0: | ||
| 1496 | case 1: | ||
| 1497 | case 2: | ||
| 1498 | case 3: | ||
| 1499 | return {AccessElement(t_out_float, tess_level_outer, offset % 4), Type::Float}; | ||
| 1500 | case 4: | ||
| 1501 | case 5: | ||
| 1502 | return {AccessElement(t_out_float, tess_level_inner, offset % 4), Type::Float}; | ||
| 1503 | } | ||
| 1504 | UNIMPLEMENTED_MSG("Unhandled patch output offset: {}", offset); | ||
| 1505 | return {}; | ||
| 1506 | }(); | ||
| 1507 | |||
| 1508 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1509 | Id address = AsUint(Visit(lmem->GetAddress())); | ||
| 1510 | address = OpUDiv(t_uint, address, Constant(t_uint, 4)); | ||
| 1511 | target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; | ||
| 1512 | |||
| 1513 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1514 | target = {GetSharedMemoryPointer(*smem), Type::Uint}; | ||
| 1515 | |||
| 1516 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1517 | target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; | ||
| 1518 | |||
| 1519 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1520 | target = {custom_variables.at(cv->GetIndex()), Type::Float}; | ||
| 1521 | |||
| 1522 | } else { | ||
| 1523 | UNIMPLEMENTED(); | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | if (!target.id) { | ||
| 1527 | // On failure we return a nullptr target.id, skip these stores. | ||
| 1528 | return {}; | ||
| 1529 | } | ||
| 1530 | |||
| 1531 | OpStore(target.id, As(Visit(src), target.type)); | ||
| 1532 | return {}; | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | template <u32 offset> | ||
| 1536 | Expression FCastHalf(Operation operation) { | ||
| 1537 | const Id value = AsHalfFloat(Visit(operation[0])); | ||
| 1538 | return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, offset)), | ||
| 1539 | Type::Float}; | ||
| 1540 | } | ||
| 1541 | |||
| 1542 | Expression FSwizzleAdd(Operation operation) { | ||
| 1543 | const Id minus = Constant(t_float, -1.0f); | ||
| 1544 | const Id plus = v_float_one; | ||
| 1545 | const Id zero = v_float_zero; | ||
| 1546 | const Id lut_a = ConstantComposite(t_float4, minus, plus, minus, zero); | ||
| 1547 | const Id lut_b = ConstantComposite(t_float4, minus, minus, plus, minus); | ||
| 1548 | |||
| 1549 | Id mask = OpLoad(t_uint, thread_id); | ||
| 1550 | mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); | ||
| 1551 | mask = OpShiftLeftLogical(t_uint, mask, Constant(t_uint, 1)); | ||
| 1552 | mask = OpShiftRightLogical(t_uint, AsUint(Visit(operation[2])), mask); | ||
| 1553 | mask = OpBitwiseAnd(t_uint, mask, Constant(t_uint, 3)); | ||
| 1554 | |||
| 1555 | const Id modifier_a = OpVectorExtractDynamic(t_float, lut_a, mask); | ||
| 1556 | const Id modifier_b = OpVectorExtractDynamic(t_float, lut_b, mask); | ||
| 1557 | |||
| 1558 | const Id op_a = OpFMul(t_float, AsFloat(Visit(operation[0])), modifier_a); | ||
| 1559 | const Id op_b = OpFMul(t_float, AsFloat(Visit(operation[1])), modifier_b); | ||
| 1560 | return {OpFAdd(t_float, op_a, op_b), Type::Float}; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | Expression HNegate(Operation operation) { | ||
| 1564 | const bool is_f16 = device.IsFloat16Supported(); | ||
| 1565 | const Id minus_one = Constant(t_scalar_half, is_f16 ? 0xbc00 : 0xbf800000); | ||
| 1566 | const Id one = Constant(t_scalar_half, is_f16 ? 0x3c00 : 0x3f800000); | ||
| 1567 | const auto GetNegate = [&](std::size_t index) { | ||
| 1568 | return OpSelect(t_scalar_half, AsBool(Visit(operation[index])), minus_one, one); | ||
| 1569 | }; | ||
| 1570 | const Id negation = OpCompositeConstruct(t_half, GetNegate(1), GetNegate(2)); | ||
| 1571 | return {OpFMul(t_half, AsHalfFloat(Visit(operation[0])), negation), Type::HalfFloat}; | ||
| 1572 | } | ||
| 1573 | |||
| 1574 | Expression HClamp(Operation operation) { | ||
| 1575 | const auto Pack = [&](std::size_t index) { | ||
| 1576 | const Id scalar = GetHalfScalarFromFloat(AsFloat(Visit(operation[index]))); | ||
| 1577 | return OpCompositeConstruct(t_half, scalar, scalar); | ||
| 1578 | }; | ||
| 1579 | const Id value = AsHalfFloat(Visit(operation[0])); | ||
| 1580 | const Id min = Pack(1); | ||
| 1581 | const Id max = Pack(2); | ||
| 1582 | |||
| 1583 | const Id clamped = OpFClamp(t_half, value, min, max); | ||
| 1584 | if (IsPrecise(operation)) { | ||
| 1585 | Decorate(clamped, spv::Decoration::NoContraction); | ||
| 1586 | } | ||
| 1587 | return {clamped, Type::HalfFloat}; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | Expression HCastFloat(Operation operation) { | ||
| 1591 | const Id value = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); | ||
| 1592 | return {OpCompositeConstruct(t_half, value, Constant(t_scalar_half, 0)), Type::HalfFloat}; | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | Expression HUnpack(Operation operation) { | ||
| 1596 | Expression operand = Visit(operation[0]); | ||
| 1597 | const auto type = std::get<Tegra::Shader::HalfType>(operation.GetMeta()); | ||
| 1598 | if (type == Tegra::Shader::HalfType::H0_H1) { | ||
| 1599 | return operand; | ||
| 1600 | } | ||
| 1601 | const auto value = [&] { | ||
| 1602 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1603 | case Tegra::Shader::HalfType::F32: | ||
| 1604 | return GetHalfScalarFromFloat(AsFloat(operand)); | ||
| 1605 | case Tegra::Shader::HalfType::H0_H0: | ||
| 1606 | return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 0); | ||
| 1607 | case Tegra::Shader::HalfType::H1_H1: | ||
| 1608 | return OpCompositeExtract(t_scalar_half, AsHalfFloat(operand), 1); | ||
| 1609 | default: | ||
| 1610 | UNREACHABLE(); | ||
| 1611 | return ConstantNull(t_half); | ||
| 1612 | } | ||
| 1613 | }(); | ||
| 1614 | return {OpCompositeConstruct(t_half, value, value), Type::HalfFloat}; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | Expression HMergeF32(Operation operation) { | ||
| 1618 | const Id value = AsHalfFloat(Visit(operation[0])); | ||
| 1619 | return {GetFloatFromHalfScalar(OpCompositeExtract(t_scalar_half, value, 0)), Type::Float}; | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | template <u32 offset> | ||
| 1623 | Expression HMergeHN(Operation operation) { | ||
| 1624 | const Id target = AsHalfFloat(Visit(operation[0])); | ||
| 1625 | const Id source = AsHalfFloat(Visit(operation[1])); | ||
| 1626 | const Id object = OpCompositeExtract(t_scalar_half, source, offset); | ||
| 1627 | return {OpCompositeInsert(t_half, object, target, offset), Type::HalfFloat}; | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | Expression HPack2(Operation operation) { | ||
| 1631 | const Id low = GetHalfScalarFromFloat(AsFloat(Visit(operation[0]))); | ||
| 1632 | const Id high = GetHalfScalarFromFloat(AsFloat(Visit(operation[1]))); | ||
| 1633 | return {OpCompositeConstruct(t_half, low, high), Type::HalfFloat}; | ||
| 1634 | } | ||
| 1635 | |||
| 1636 | Expression LogicalAddCarry(Operation operation) { | ||
| 1637 | const Id op_a = AsUint(Visit(operation[0])); | ||
| 1638 | const Id op_b = AsUint(Visit(operation[1])); | ||
| 1639 | |||
| 1640 | const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); | ||
| 1641 | const Id carry = OpCompositeExtract(t_uint, result, 1); | ||
| 1642 | return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; | ||
| 1643 | } | ||
| 1644 | |||
| 1645 | Expression LogicalAssign(Operation operation) { | ||
| 1646 | const Node& dest = operation[0]; | ||
| 1647 | const Node& src = operation[1]; | ||
| 1648 | |||
| 1649 | Id target{}; | ||
| 1650 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1651 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1652 | |||
| 1653 | const auto index = pred->GetIndex(); | ||
| 1654 | switch (index) { | ||
| 1655 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1656 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1657 | // Writing to these predicates is a no-op | ||
| 1658 | return {}; | ||
| 1659 | } | ||
| 1660 | target = predicates.at(index); | ||
| 1661 | |||
| 1662 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1663 | target = internal_flags.at(static_cast<u32>(flag->GetFlag())); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | OpStore(target, AsBool(Visit(src))); | ||
| 1667 | return {}; | ||
| 1668 | } | ||
| 1669 | |||
| 1670 | Expression LogicalFOrdered(Operation operation) { | ||
| 1671 | // Emulate SPIR-V's OpOrdered | ||
| 1672 | const Id op_a = AsFloat(Visit(operation[0])); | ||
| 1673 | const Id op_b = AsFloat(Visit(operation[1])); | ||
| 1674 | const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a); | ||
| 1675 | const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b); | ||
| 1676 | return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool}; | ||
| 1677 | } | ||
| 1678 | |||
| 1679 | Expression LogicalFUnordered(Operation operation) { | ||
| 1680 | // Emulate SPIR-V's OpUnordered | ||
| 1681 | const Id op_a = AsFloat(Visit(operation[0])); | ||
| 1682 | const Id op_b = AsFloat(Visit(operation[1])); | ||
| 1683 | const Id is_nan_a = OpIsNan(t_bool, op_a); | ||
| 1684 | const Id is_nan_b = OpIsNan(t_bool, op_b); | ||
| 1685 | return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool}; | ||
| 1686 | } | ||
| 1687 | |||
| 1688 | Id GetTextureSampler(Operation operation) { | ||
| 1689 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1690 | ASSERT(!meta.sampler.is_buffer); | ||
| 1691 | |||
| 1692 | const auto& entry = sampled_images.at(meta.sampler.index); | ||
| 1693 | Id sampler = entry.variable; | ||
| 1694 | if (meta.sampler.is_indexed) { | ||
| 1695 | const Id index = AsInt(Visit(meta.index)); | ||
| 1696 | sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index); | ||
| 1697 | } | ||
| 1698 | return OpLoad(entry.sampler_type, sampler); | ||
| 1699 | } | ||
| 1700 | |||
| 1701 | Id GetTextureImage(Operation operation) { | ||
| 1702 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1703 | const u32 index = meta.sampler.index; | ||
| 1704 | if (meta.sampler.is_buffer) { | ||
| 1705 | const auto& entry = uniform_texels.at(index); | ||
| 1706 | return OpLoad(entry.image_type, entry.image); | ||
| 1707 | } else { | ||
| 1708 | const auto& entry = sampled_images.at(index); | ||
| 1709 | return OpImage(entry.image_type, GetTextureSampler(operation)); | ||
| 1710 | } | ||
| 1711 | } | ||
| 1712 | |||
| 1713 | Id GetImage(Operation operation) { | ||
| 1714 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1715 | const auto entry = images.at(meta.image.index); | ||
| 1716 | return OpLoad(entry.image_type, entry.image); | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | Id AssembleVector(const std::vector<Id>& coords, Type type) { | ||
| 1720 | const Id coords_type = GetTypeVectorDefinitionLut(type).at(coords.size() - 1); | ||
| 1721 | return coords.size() == 1 ? coords[0] : OpCompositeConstruct(coords_type, coords); | ||
| 1722 | } | ||
| 1723 | |||
| 1724 | Id GetCoordinates(Operation operation, Type type) { | ||
| 1725 | std::vector<Id> coords; | ||
| 1726 | for (std::size_t i = 0; i < operation.GetOperandsCount(); ++i) { | ||
| 1727 | coords.push_back(As(Visit(operation[i]), type)); | ||
| 1728 | } | ||
| 1729 | if (const auto meta = std::get_if<MetaTexture>(&operation.GetMeta())) { | ||
| 1730 | // Add array coordinate for textures | ||
| 1731 | if (meta->sampler.is_array) { | ||
| 1732 | Id array = AsInt(Visit(meta->array)); | ||
| 1733 | if (type == Type::Float) { | ||
| 1734 | array = OpConvertSToF(t_float, array); | ||
| 1735 | } | ||
| 1736 | coords.push_back(array); | ||
| 1737 | } | ||
| 1738 | } | ||
| 1739 | return AssembleVector(coords, type); | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | Id GetOffsetCoordinates(Operation operation) { | ||
| 1743 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1744 | std::vector<Id> coords; | ||
| 1745 | coords.reserve(meta.aoffi.size()); | ||
| 1746 | for (const auto& coord : meta.aoffi) { | ||
| 1747 | coords.push_back(AsInt(Visit(coord))); | ||
| 1748 | } | ||
| 1749 | return AssembleVector(coords, Type::Int); | ||
| 1750 | } | ||
| 1751 | |||
| 1752 | std::pair<Id, Id> GetDerivatives(Operation operation) { | ||
| 1753 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1754 | const auto& derivatives = meta.derivates; | ||
| 1755 | ASSERT(derivatives.size() % 2 == 0); | ||
| 1756 | |||
| 1757 | const std::size_t components = derivatives.size() / 2; | ||
| 1758 | std::vector<Id> dx, dy; | ||
| 1759 | dx.reserve(components); | ||
| 1760 | dy.reserve(components); | ||
| 1761 | for (std::size_t index = 0; index < components; ++index) { | ||
| 1762 | dx.push_back(AsFloat(Visit(derivatives.at(index * 2 + 0)))); | ||
| 1763 | dy.push_back(AsFloat(Visit(derivatives.at(index * 2 + 1)))); | ||
| 1764 | } | ||
| 1765 | return {AssembleVector(dx, Type::Float), AssembleVector(dy, Type::Float)}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | Expression GetTextureElement(Operation operation, Id sample_value, Type type) { | ||
| 1769 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1770 | const auto type_def = GetTypeDefinition(type); | ||
| 1771 | return {OpCompositeExtract(type_def, sample_value, meta.element), type}; | ||
| 1772 | } | ||
| 1773 | |||
| 1774 | Expression Texture(Operation operation) { | ||
| 1775 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1776 | |||
| 1777 | const bool can_implicit = stage == ShaderType::Fragment; | ||
| 1778 | const Id sampler = GetTextureSampler(operation); | ||
| 1779 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1780 | |||
| 1781 | std::vector<Id> operands; | ||
| 1782 | spv::ImageOperandsMask mask{}; | ||
| 1783 | if (meta.bias) { | ||
| 1784 | mask = mask | spv::ImageOperandsMask::Bias; | ||
| 1785 | operands.push_back(AsFloat(Visit(meta.bias))); | ||
| 1786 | } | ||
| 1787 | |||
| 1788 | if (!can_implicit) { | ||
| 1789 | mask = mask | spv::ImageOperandsMask::Lod; | ||
| 1790 | operands.push_back(v_float_zero); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | if (!meta.aoffi.empty()) { | ||
| 1794 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1795 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | if (meta.depth_compare) { | ||
| 1799 | // Depth sampling | ||
| 1800 | UNIMPLEMENTED_IF(meta.bias); | ||
| 1801 | const Id dref = AsFloat(Visit(meta.depth_compare)); | ||
| 1802 | if (can_implicit) { | ||
| 1803 | return { | ||
| 1804 | OpImageSampleDrefImplicitLod(t_float, sampler, coords, dref, mask, operands), | ||
| 1805 | Type::Float}; | ||
| 1806 | } else { | ||
| 1807 | return { | ||
| 1808 | OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), | ||
| 1809 | Type::Float}; | ||
| 1810 | } | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | Id texture; | ||
| 1814 | if (can_implicit) { | ||
| 1815 | texture = OpImageSampleImplicitLod(t_float4, sampler, coords, mask, operands); | ||
| 1816 | } else { | ||
| 1817 | texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); | ||
| 1818 | } | ||
| 1819 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | Expression TextureLod(Operation operation) { | ||
| 1823 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1824 | |||
| 1825 | const Id sampler = GetTextureSampler(operation); | ||
| 1826 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1827 | const Id lod = AsFloat(Visit(meta.lod)); | ||
| 1828 | |||
| 1829 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::Lod; | ||
| 1830 | std::vector<Id> operands{lod}; | ||
| 1831 | |||
| 1832 | if (!meta.aoffi.empty()) { | ||
| 1833 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1834 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1835 | } | ||
| 1836 | |||
| 1837 | if (meta.sampler.is_shadow) { | ||
| 1838 | const Id dref = AsFloat(Visit(meta.depth_compare)); | ||
| 1839 | return {OpImageSampleDrefExplicitLod(t_float, sampler, coords, dref, mask, operands), | ||
| 1840 | Type::Float}; | ||
| 1841 | } | ||
| 1842 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, operands); | ||
| 1843 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | Expression TextureGather(Operation operation) { | ||
| 1847 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1848 | |||
| 1849 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1850 | |||
| 1851 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; | ||
| 1852 | std::vector<Id> operands; | ||
| 1853 | Id texture{}; | ||
| 1854 | |||
| 1855 | if (!meta.aoffi.empty()) { | ||
| 1856 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1857 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1858 | } | ||
| 1859 | |||
| 1860 | if (meta.sampler.is_shadow) { | ||
| 1861 | texture = OpImageDrefGather(t_float4, GetTextureSampler(operation), coords, | ||
| 1862 | AsFloat(Visit(meta.depth_compare)), mask, operands); | ||
| 1863 | } else { | ||
| 1864 | u32 component_value = 0; | ||
| 1865 | if (meta.component) { | ||
| 1866 | const auto component = std::get_if<ImmediateNode>(&*meta.component); | ||
| 1867 | ASSERT_MSG(component, "Component is not an immediate value"); | ||
| 1868 | component_value = component->GetValue(); | ||
| 1869 | } | ||
| 1870 | texture = OpImageGather(t_float4, GetTextureSampler(operation), coords, | ||
| 1871 | Constant(t_uint, component_value), mask, operands); | ||
| 1872 | } | ||
| 1873 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1874 | } | ||
| 1875 | |||
| 1876 | Expression TextureQueryDimensions(Operation operation) { | ||
| 1877 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1878 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1879 | UNIMPLEMENTED_IF(meta.depth_compare); | ||
| 1880 | |||
| 1881 | const auto image_id = GetTextureImage(operation); | ||
| 1882 | if (meta.element == 3) { | ||
| 1883 | return {OpImageQueryLevels(t_int, image_id), Type::Int}; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | const Id lod = AsUint(Visit(operation[0])); | ||
| 1887 | const std::size_t coords_count = [&meta] { | ||
| 1888 | switch (const auto type = meta.sampler.type) { | ||
| 1889 | case Tegra::Shader::TextureType::Texture1D: | ||
| 1890 | return 1; | ||
| 1891 | case Tegra::Shader::TextureType::Texture2D: | ||
| 1892 | case Tegra::Shader::TextureType::TextureCube: | ||
| 1893 | return 2; | ||
| 1894 | case Tegra::Shader::TextureType::Texture3D: | ||
| 1895 | return 3; | ||
| 1896 | default: | ||
| 1897 | UNREACHABLE_MSG("Invalid texture type={}", type); | ||
| 1898 | return 2; | ||
| 1899 | } | ||
| 1900 | }(); | ||
| 1901 | |||
| 1902 | if (meta.element >= coords_count) { | ||
| 1903 | return {v_float_zero, Type::Float}; | ||
| 1904 | } | ||
| 1905 | |||
| 1906 | const std::array<Id, 3> types = {t_int, t_int2, t_int3}; | ||
| 1907 | const Id sizes = OpImageQuerySizeLod(types.at(coords_count - 1), image_id, lod); | ||
| 1908 | const Id size = OpCompositeExtract(t_int, sizes, meta.element); | ||
| 1909 | return {size, Type::Int}; | ||
| 1910 | } | ||
| 1911 | |||
| 1912 | Expression TextureQueryLod(Operation operation) { | ||
| 1913 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1914 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1915 | UNIMPLEMENTED_IF(meta.depth_compare); | ||
| 1916 | |||
| 1917 | if (meta.element >= 2) { | ||
| 1918 | UNREACHABLE_MSG("Invalid element"); | ||
| 1919 | return {v_float_zero, Type::Float}; | ||
| 1920 | } | ||
| 1921 | const auto sampler_id = GetTextureSampler(operation); | ||
| 1922 | |||
| 1923 | const Id multiplier = Constant(t_float, 256.0f); | ||
| 1924 | const Id multipliers = ConstantComposite(t_float2, multiplier, multiplier); | ||
| 1925 | |||
| 1926 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1927 | Id size = OpImageQueryLod(t_float2, sampler_id, coords); | ||
| 1928 | size = OpFMul(t_float2, size, multipliers); | ||
| 1929 | size = OpConvertFToS(t_int2, size); | ||
| 1930 | return GetTextureElement(operation, size, Type::Int); | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | Expression TexelFetch(Operation operation) { | ||
| 1934 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1935 | UNIMPLEMENTED_IF(meta.depth_compare); | ||
| 1936 | |||
| 1937 | const Id image = GetTextureImage(operation); | ||
| 1938 | const Id coords = GetCoordinates(operation, Type::Int); | ||
| 1939 | |||
| 1940 | spv::ImageOperandsMask mask = spv::ImageOperandsMask::MaskNone; | ||
| 1941 | std::vector<Id> operands; | ||
| 1942 | Id fetch; | ||
| 1943 | |||
| 1944 | if (meta.lod && !meta.sampler.is_buffer) { | ||
| 1945 | mask = mask | spv::ImageOperandsMask::Lod; | ||
| 1946 | operands.push_back(AsInt(Visit(meta.lod))); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | if (!meta.aoffi.empty()) { | ||
| 1950 | mask = mask | spv::ImageOperandsMask::Offset; | ||
| 1951 | operands.push_back(GetOffsetCoordinates(operation)); | ||
| 1952 | } | ||
| 1953 | |||
| 1954 | fetch = OpImageFetch(t_float4, image, coords, mask, operands); | ||
| 1955 | return GetTextureElement(operation, fetch, Type::Float); | ||
| 1956 | } | ||
| 1957 | |||
| 1958 | Expression TextureGradient(Operation operation) { | ||
| 1959 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1960 | UNIMPLEMENTED_IF(!meta.aoffi.empty()); | ||
| 1961 | |||
| 1962 | const Id sampler = GetTextureSampler(operation); | ||
| 1963 | const Id coords = GetCoordinates(operation, Type::Float); | ||
| 1964 | const auto [dx, dy] = GetDerivatives(operation); | ||
| 1965 | const std::vector grad = {dx, dy}; | ||
| 1966 | |||
| 1967 | static constexpr auto mask = spv::ImageOperandsMask::Grad; | ||
| 1968 | const Id texture = OpImageSampleExplicitLod(t_float4, sampler, coords, mask, grad); | ||
| 1969 | return GetTextureElement(operation, texture, Type::Float); | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | Expression ImageLoad(Operation operation) { | ||
| 1973 | if (!device.IsFormatlessImageLoadSupported()) { | ||
| 1974 | return {v_float_zero, Type::Float}; | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1978 | |||
| 1979 | const Id coords = GetCoordinates(operation, Type::Int); | ||
| 1980 | const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); | ||
| 1981 | |||
| 1982 | return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; | ||
| 1983 | } | ||
| 1984 | |||
| 1985 | Expression ImageStore(Operation operation) { | ||
| 1986 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1987 | std::vector<Id> colors; | ||
| 1988 | for (const auto& value : meta.values) { | ||
| 1989 | colors.push_back(AsUint(Visit(value))); | ||
| 1990 | } | ||
| 1991 | |||
| 1992 | const Id coords = GetCoordinates(operation, Type::Int); | ||
| 1993 | const Id texel = OpCompositeConstruct(t_uint4, colors); | ||
| 1994 | |||
| 1995 | OpImageWrite(GetImage(operation), coords, texel, {}); | ||
| 1996 | return {}; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||
| 2000 | Expression AtomicImage(Operation operation) { | ||
| 2001 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 2002 | ASSERT(meta.values.size() == 1); | ||
| 2003 | |||
| 2004 | const Id coordinate = GetCoordinates(operation, Type::Int); | ||
| 2005 | const Id image = images.at(meta.image.index).image; | ||
| 2006 | const Id sample = v_uint_zero; | ||
| 2007 | const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); | ||
| 2008 | |||
| 2009 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||
| 2010 | const Id semantics = v_uint_zero; | ||
| 2011 | const Id value = AsUint(Visit(meta.values[0])); | ||
| 2012 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||
| 2013 | } | ||
| 2014 | |||
| 2015 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||
| 2016 | Expression Atomic(Operation operation) { | ||
| 2017 | Id pointer; | ||
| 2018 | if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 2019 | pointer = GetSharedMemoryPointer(*smem); | ||
| 2020 | } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 2021 | pointer = GetGlobalMemoryPointer(*gmem); | ||
| 2022 | } else { | ||
| 2023 | UNREACHABLE(); | ||
| 2024 | return {v_float_zero, Type::Float}; | ||
| 2025 | } | ||
| 2026 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||
| 2027 | const Id semantics = v_uint_zero; | ||
| 2028 | const Id value = AsUint(Visit(operation[1])); | ||
| 2029 | |||
| 2030 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||
| 2031 | } | ||
| 2032 | |||
| 2033 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||
| 2034 | Expression Reduce(Operation operation) { | ||
| 2035 | Atomic<func>(operation); | ||
| 2036 | return {}; | ||
| 2037 | } | ||
| 2038 | |||
| 2039 | Expression Branch(Operation operation) { | ||
| 2040 | const auto& target = std::get<ImmediateNode>(*operation[0]); | ||
| 2041 | OpStore(jmp_to, Constant(t_uint, target.GetValue())); | ||
| 2042 | OpBranch(continue_label); | ||
| 2043 | inside_branch = true; | ||
| 2044 | if (!conditional_branch_set) { | ||
| 2045 | AddLabel(); | ||
| 2046 | } | ||
| 2047 | return {}; | ||
| 2048 | } | ||
| 2049 | |||
| 2050 | Expression BranchIndirect(Operation operation) { | ||
| 2051 | const Id op_a = AsUint(Visit(operation[0])); | ||
| 2052 | |||
| 2053 | OpStore(jmp_to, op_a); | ||
| 2054 | OpBranch(continue_label); | ||
| 2055 | inside_branch = true; | ||
| 2056 | if (!conditional_branch_set) { | ||
| 2057 | AddLabel(); | ||
| 2058 | } | ||
| 2059 | return {}; | ||
| 2060 | } | ||
| 2061 | |||
| 2062 | Expression PushFlowStack(Operation operation) { | ||
| 2063 | const auto& target = std::get<ImmediateNode>(*operation[0]); | ||
| 2064 | const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||
| 2065 | const Id current = OpLoad(t_uint, flow_stack_top); | ||
| 2066 | const Id next = OpIAdd(t_uint, current, Constant(t_uint, 1)); | ||
| 2067 | const Id access = OpAccessChain(t_func_uint, flow_stack, current); | ||
| 2068 | |||
| 2069 | OpStore(access, Constant(t_uint, target.GetValue())); | ||
| 2070 | OpStore(flow_stack_top, next); | ||
| 2071 | return {}; | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | Expression PopFlowStack(Operation operation) { | ||
| 2075 | const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||
| 2076 | const Id current = OpLoad(t_uint, flow_stack_top); | ||
| 2077 | const Id previous = OpISub(t_uint, current, Constant(t_uint, 1)); | ||
| 2078 | const Id access = OpAccessChain(t_func_uint, flow_stack, previous); | ||
| 2079 | const Id target = OpLoad(t_uint, access); | ||
| 2080 | |||
| 2081 | OpStore(flow_stack_top, previous); | ||
| 2082 | OpStore(jmp_to, target); | ||
| 2083 | OpBranch(continue_label); | ||
| 2084 | inside_branch = true; | ||
| 2085 | if (!conditional_branch_set) { | ||
| 2086 | AddLabel(); | ||
| 2087 | } | ||
| 2088 | return {}; | ||
| 2089 | } | ||
| 2090 | |||
| 2091 | Id MaxwellToSpirvComparison(Maxwell::ComparisonOp compare_op, Id operand_1, Id operand_2) { | ||
| 2092 | using Compare = Maxwell::ComparisonOp; | ||
| 2093 | switch (compare_op) { | ||
| 2094 | case Compare::NeverOld: | ||
| 2095 | return v_false; // Never let the test pass | ||
| 2096 | case Compare::LessOld: | ||
| 2097 | return OpFOrdLessThan(t_bool, operand_1, operand_2); | ||
| 2098 | case Compare::EqualOld: | ||
| 2099 | return OpFOrdEqual(t_bool, operand_1, operand_2); | ||
| 2100 | case Compare::LessEqualOld: | ||
| 2101 | return OpFOrdLessThanEqual(t_bool, operand_1, operand_2); | ||
| 2102 | case Compare::GreaterOld: | ||
| 2103 | return OpFOrdGreaterThan(t_bool, operand_1, operand_2); | ||
| 2104 | case Compare::NotEqualOld: | ||
| 2105 | return OpFOrdNotEqual(t_bool, operand_1, operand_2); | ||
| 2106 | case Compare::GreaterEqualOld: | ||
| 2107 | return OpFOrdGreaterThanEqual(t_bool, operand_1, operand_2); | ||
| 2108 | default: | ||
| 2109 | UNREACHABLE(); | ||
| 2110 | return v_true; | ||
| 2111 | } | ||
| 2112 | } | ||
| 2113 | |||
| 2114 | void AlphaTest(Id pointer) { | ||
| 2115 | if (specialization.alpha_test_func == Maxwell::ComparisonOp::AlwaysOld) { | ||
| 2116 | return; | ||
| 2117 | } | ||
| 2118 | const Id true_label = OpLabel(); | ||
| 2119 | const Id discard_label = OpLabel(); | ||
| 2120 | const Id alpha_reference = Constant(t_float, specialization.alpha_test_ref); | ||
| 2121 | const Id alpha_value = OpLoad(t_float, pointer); | ||
| 2122 | const Id condition = | ||
| 2123 | MaxwellToSpirvComparison(specialization.alpha_test_func, alpha_value, alpha_reference); | ||
| 2124 | |||
| 2125 | OpBranchConditional(condition, true_label, discard_label); | ||
| 2126 | AddLabel(discard_label); | ||
| 2127 | OpKill(); | ||
| 2128 | AddLabel(true_label); | ||
| 2129 | } | ||
| 2130 | |||
| 2131 | void PreExit() { | ||
| 2132 | if (stage == ShaderType::Vertex && specialization.ndc_minus_one_to_one) { | ||
| 2133 | const u32 position_index = out_indices.position.value(); | ||
| 2134 | const Id z_pointer = AccessElement(t_out_float, out_vertex, position_index, 2U); | ||
| 2135 | const Id w_pointer = AccessElement(t_out_float, out_vertex, position_index, 3U); | ||
| 2136 | Id depth = OpLoad(t_float, z_pointer); | ||
| 2137 | depth = OpFAdd(t_float, depth, OpLoad(t_float, w_pointer)); | ||
| 2138 | depth = OpFMul(t_float, depth, Constant(t_float, 0.5f)); | ||
| 2139 | OpStore(z_pointer, depth); | ||
| 2140 | } | ||
| 2141 | if (stage == ShaderType::Fragment) { | ||
| 2142 | const auto SafeGetRegister = [this](u32 reg) { | ||
| 2143 | if (const auto it = registers.find(reg); it != registers.end()) { | ||
| 2144 | return OpLoad(t_float, it->second); | ||
| 2145 | } | ||
| 2146 | return v_float_zero; | ||
| 2147 | }; | ||
| 2148 | |||
| 2149 | UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, | ||
| 2150 | "Sample mask write is unimplemented"); | ||
| 2151 | |||
| 2152 | // Write the color outputs using the data in the shader registers, disabled | ||
| 2153 | // rendertargets/components are skipped in the register assignment. | ||
| 2154 | u32 current_reg = 0; | ||
| 2155 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 2156 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||
| 2157 | for (u32 component = 0; component < 4; ++component) { | ||
| 2158 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 2159 | continue; | ||
| 2160 | } | ||
| 2161 | const Id pointer = AccessElement(t_out_float, frag_colors[rt], component); | ||
| 2162 | OpStore(pointer, SafeGetRegister(current_reg)); | ||
| 2163 | if (rt == 0 && component == 3) { | ||
| 2164 | AlphaTest(pointer); | ||
| 2165 | } | ||
| 2166 | ++current_reg; | ||
| 2167 | } | ||
| 2168 | } | ||
| 2169 | if (header.ps.omap.depth) { | ||
| 2170 | // The depth output is always 2 registers after the last color output, and | ||
| 2171 | // current_reg already contains one past the last color register. | ||
| 2172 | OpStore(frag_depth, SafeGetRegister(current_reg + 1)); | ||
| 2173 | } | ||
| 2174 | } | ||
| 2175 | } | ||
| 2176 | |||
| 2177 | Expression Exit(Operation operation) { | ||
| 2178 | PreExit(); | ||
| 2179 | inside_branch = true; | ||
| 2180 | if (conditional_branch_set) { | ||
| 2181 | OpReturn(); | ||
| 2182 | } else { | ||
| 2183 | const Id dummy = OpLabel(); | ||
| 2184 | OpBranch(dummy); | ||
| 2185 | AddLabel(dummy); | ||
| 2186 | OpReturn(); | ||
| 2187 | AddLabel(); | ||
| 2188 | } | ||
| 2189 | return {}; | ||
| 2190 | } | ||
| 2191 | |||
| 2192 | Expression Discard(Operation operation) { | ||
| 2193 | inside_branch = true; | ||
| 2194 | if (conditional_branch_set) { | ||
| 2195 | OpKill(); | ||
| 2196 | } else { | ||
| 2197 | const Id dummy = OpLabel(); | ||
| 2198 | OpBranch(dummy); | ||
| 2199 | AddLabel(dummy); | ||
| 2200 | OpKill(); | ||
| 2201 | AddLabel(); | ||
| 2202 | } | ||
| 2203 | return {}; | ||
| 2204 | } | ||
| 2205 | |||
| 2206 | Expression EmitVertex(Operation) { | ||
| 2207 | OpEmitVertex(); | ||
| 2208 | return {}; | ||
| 2209 | } | ||
| 2210 | |||
| 2211 | Expression EndPrimitive(Operation operation) { | ||
| 2212 | OpEndPrimitive(); | ||
| 2213 | return {}; | ||
| 2214 | } | ||
| 2215 | |||
| 2216 | Expression InvocationId(Operation) { | ||
| 2217 | return {OpLoad(t_int, invocation_id), Type::Int}; | ||
| 2218 | } | ||
| 2219 | |||
| 2220 | Expression YNegate(Operation) { | ||
| 2221 | LOG_WARNING(Render_Vulkan, "(STUBBED)"); | ||
| 2222 | return {Constant(t_float, 1.0f), Type::Float}; | ||
| 2223 | } | ||
| 2224 | |||
| 2225 | template <u32 element> | ||
| 2226 | Expression LocalInvocationId(Operation) { | ||
| 2227 | const Id id = OpLoad(t_uint3, local_invocation_id); | ||
| 2228 | return {OpCompositeExtract(t_uint, id, element), Type::Uint}; | ||
| 2229 | } | ||
| 2230 | |||
| 2231 | template <u32 element> | ||
| 2232 | Expression WorkGroupId(Operation operation) { | ||
| 2233 | const Id id = OpLoad(t_uint3, workgroup_id); | ||
| 2234 | return {OpCompositeExtract(t_uint, id, element), Type::Uint}; | ||
| 2235 | } | ||
| 2236 | |||
| 2237 | Expression BallotThread(Operation operation) { | ||
| 2238 | const Id predicate = AsBool(Visit(operation[0])); | ||
| 2239 | const Id ballot = OpSubgroupBallotKHR(t_uint4, predicate); | ||
| 2240 | |||
| 2241 | if (!device.IsWarpSizePotentiallyBiggerThanGuest()) { | ||
| 2242 | // Guest-like devices can just return the first index. | ||
| 2243 | return {OpCompositeExtract(t_uint, ballot, 0U), Type::Uint}; | ||
| 2244 | } | ||
| 2245 | |||
| 2246 | // The others will have to return what is local to the current thread. | ||
| 2247 | // For instance a device with a warp size of 64 will return the upper uint when the current | ||
| 2248 | // thread is 38. | ||
| 2249 | const Id tid = OpLoad(t_uint, thread_id); | ||
| 2250 | const Id thread_index = OpShiftRightLogical(t_uint, tid, Constant(t_uint, 5)); | ||
| 2251 | return {OpVectorExtractDynamic(t_uint, ballot, thread_index), Type::Uint}; | ||
| 2252 | } | ||
| 2253 | |||
| 2254 | template <Id (Module::*func)(Id, Id)> | ||
| 2255 | Expression Vote(Operation operation) { | ||
| 2256 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2257 | const Id predicate = AsBool(Visit(operation[0])); | ||
| 2258 | return {(this->*func)(t_bool, predicate), Type::Bool}; | ||
| 2259 | } | ||
| 2260 | |||
| 2261 | Expression ThreadId(Operation) { | ||
| 2262 | return {OpLoad(t_uint, thread_id), Type::Uint}; | ||
| 2263 | } | ||
| 2264 | |||
| 2265 | template <std::size_t index> | ||
| 2266 | Expression ThreadMask(Operation) { | ||
| 2267 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2268 | const Id mask = thread_masks[index]; | ||
| 2269 | return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; | ||
| 2270 | } | ||
| 2271 | |||
| 2272 | Expression ShuffleIndexed(Operation operation) { | ||
| 2273 | const Id value = AsFloat(Visit(operation[0])); | ||
| 2274 | const Id index = AsUint(Visit(operation[1])); | ||
| 2275 | return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; | ||
| 2276 | } | ||
| 2277 | |||
| 2278 | Expression Barrier(Operation) { | ||
| 2279 | if (!ir.IsDecompiled()) { | ||
| 2280 | LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); | ||
| 2281 | return {}; | ||
| 2282 | } | ||
| 2283 | |||
| 2284 | const auto scope = spv::Scope::Workgroup; | ||
| 2285 | const auto memory = spv::Scope::Workgroup; | ||
| 2286 | const auto semantics = | ||
| 2287 | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; | ||
| 2288 | OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)), | ||
| 2289 | Constant(t_uint, static_cast<u32>(memory)), | ||
| 2290 | Constant(t_uint, static_cast<u32>(semantics))); | ||
| 2291 | return {}; | ||
| 2292 | } | ||
| 2293 | |||
| 2294 | template <spv::Scope scope> | ||
| 2295 | Expression MemoryBarrier(Operation) { | ||
| 2296 | const auto semantics = | ||
| 2297 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | ||
| 2298 | spv::MemorySemanticsMask::WorkgroupMemory | | ||
| 2299 | spv::MemorySemanticsMask::AtomicCounterMemory | spv::MemorySemanticsMask::ImageMemory; | ||
| 2300 | |||
| 2301 | OpMemoryBarrier(Constant(t_uint, static_cast<u32>(scope)), | ||
| 2302 | Constant(t_uint, static_cast<u32>(semantics))); | ||
| 2303 | return {}; | ||
| 2304 | } | ||
| 2305 | |||
| 2306 | Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, std::string name) { | ||
| 2307 | const Id id = OpVariable(type, storage); | ||
| 2308 | Decorate(id, spv::Decoration::BuiltIn, static_cast<u32>(builtin)); | ||
| 2309 | AddGlobalVariable(Name(id, std::move(name))); | ||
| 2310 | interfaces.push_back(id); | ||
| 2311 | return id; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | Id DeclareInputBuiltIn(spv::BuiltIn builtin, Id type, std::string name) { | ||
| 2315 | return DeclareBuiltIn(builtin, spv::StorageClass::Input, type, std::move(name)); | ||
| 2316 | } | ||
| 2317 | |||
| 2318 | template <typename... Args> | ||
| 2319 | Id AccessElement(Id pointer_type, Id composite, Args... elements_) { | ||
| 2320 | std::vector<Id> members; | ||
| 2321 | auto elements = {elements_...}; | ||
| 2322 | for (const auto element : elements) { | ||
| 2323 | members.push_back(Constant(t_uint, element)); | ||
| 2324 | } | ||
| 2325 | |||
| 2326 | return OpAccessChain(pointer_type, composite, members); | ||
| 2327 | } | ||
| 2328 | |||
| 2329 | Id As(Expression expr, Type wanted_type) { | ||
| 2330 | switch (wanted_type) { | ||
| 2331 | case Type::Bool: | ||
| 2332 | return AsBool(expr); | ||
| 2333 | case Type::Bool2: | ||
| 2334 | return AsBool2(expr); | ||
| 2335 | case Type::Float: | ||
| 2336 | return AsFloat(expr); | ||
| 2337 | case Type::Int: | ||
| 2338 | return AsInt(expr); | ||
| 2339 | case Type::Uint: | ||
| 2340 | return AsUint(expr); | ||
| 2341 | case Type::HalfFloat: | ||
| 2342 | return AsHalfFloat(expr); | ||
| 2343 | default: | ||
| 2344 | UNREACHABLE(); | ||
| 2345 | return expr.id; | ||
| 2346 | } | ||
| 2347 | } | ||
| 2348 | |||
| 2349 | Id AsBool(Expression expr) { | ||
| 2350 | ASSERT(expr.type == Type::Bool); | ||
| 2351 | return expr.id; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | Id AsBool2(Expression expr) { | ||
| 2355 | ASSERT(expr.type == Type::Bool2); | ||
| 2356 | return expr.id; | ||
| 2357 | } | ||
| 2358 | |||
| 2359 | Id AsFloat(Expression expr) { | ||
| 2360 | switch (expr.type) { | ||
| 2361 | case Type::Float: | ||
| 2362 | return expr.id; | ||
| 2363 | case Type::Int: | ||
| 2364 | case Type::Uint: | ||
| 2365 | return OpBitcast(t_float, expr.id); | ||
| 2366 | case Type::HalfFloat: | ||
| 2367 | if (device.IsFloat16Supported()) { | ||
| 2368 | return OpBitcast(t_float, expr.id); | ||
| 2369 | } | ||
| 2370 | return OpBitcast(t_float, OpPackHalf2x16(t_uint, expr.id)); | ||
| 2371 | default: | ||
| 2372 | UNREACHABLE(); | ||
| 2373 | return expr.id; | ||
| 2374 | } | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | Id AsInt(Expression expr) { | ||
| 2378 | switch (expr.type) { | ||
| 2379 | case Type::Int: | ||
| 2380 | return expr.id; | ||
| 2381 | case Type::Float: | ||
| 2382 | case Type::Uint: | ||
| 2383 | return OpBitcast(t_int, expr.id); | ||
| 2384 | case Type::HalfFloat: | ||
| 2385 | if (device.IsFloat16Supported()) { | ||
| 2386 | return OpBitcast(t_int, expr.id); | ||
| 2387 | } | ||
| 2388 | return OpPackHalf2x16(t_int, expr.id); | ||
| 2389 | default: | ||
| 2390 | UNREACHABLE(); | ||
| 2391 | return expr.id; | ||
| 2392 | } | ||
| 2393 | } | ||
| 2394 | |||
| 2395 | Id AsUint(Expression expr) { | ||
| 2396 | switch (expr.type) { | ||
| 2397 | case Type::Uint: | ||
| 2398 | return expr.id; | ||
| 2399 | case Type::Float: | ||
| 2400 | case Type::Int: | ||
| 2401 | return OpBitcast(t_uint, expr.id); | ||
| 2402 | case Type::HalfFloat: | ||
| 2403 | if (device.IsFloat16Supported()) { | ||
| 2404 | return OpBitcast(t_uint, expr.id); | ||
| 2405 | } | ||
| 2406 | return OpPackHalf2x16(t_uint, expr.id); | ||
| 2407 | default: | ||
| 2408 | UNREACHABLE(); | ||
| 2409 | return expr.id; | ||
| 2410 | } | ||
| 2411 | } | ||
| 2412 | |||
| 2413 | Id AsHalfFloat(Expression expr) { | ||
| 2414 | switch (expr.type) { | ||
| 2415 | case Type::HalfFloat: | ||
| 2416 | return expr.id; | ||
| 2417 | case Type::Float: | ||
| 2418 | case Type::Int: | ||
| 2419 | case Type::Uint: | ||
| 2420 | if (device.IsFloat16Supported()) { | ||
| 2421 | return OpBitcast(t_half, expr.id); | ||
| 2422 | } | ||
| 2423 | return OpUnpackHalf2x16(t_half, AsUint(expr)); | ||
| 2424 | default: | ||
| 2425 | UNREACHABLE(); | ||
| 2426 | return expr.id; | ||
| 2427 | } | ||
| 2428 | } | ||
| 2429 | |||
| 2430 | Id GetHalfScalarFromFloat(Id value) { | ||
| 2431 | if (device.IsFloat16Supported()) { | ||
| 2432 | return OpFConvert(t_scalar_half, value); | ||
| 2433 | } | ||
| 2434 | return value; | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | Id GetFloatFromHalfScalar(Id value) { | ||
| 2438 | if (device.IsFloat16Supported()) { | ||
| 2439 | return OpFConvert(t_float, value); | ||
| 2440 | } | ||
| 2441 | return value; | ||
| 2442 | } | ||
| 2443 | |||
| 2444 | AttributeType GetAttributeType(u32 location) const { | ||
| 2445 | if (stage != ShaderType::Vertex) { | ||
| 2446 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2447 | } | ||
| 2448 | switch (specialization.attribute_types.at(location)) { | ||
| 2449 | case Maxwell::VertexAttribute::Type::SignedNorm: | ||
| 2450 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 2451 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 2452 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 2453 | case Maxwell::VertexAttribute::Type::Float: | ||
| 2454 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2455 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 2456 | return {Type::Int, t_in_int, t_in_int4}; | ||
| 2457 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 2458 | return {Type::Uint, t_in_uint, t_in_uint4}; | ||
| 2459 | default: | ||
| 2460 | UNREACHABLE(); | ||
| 2461 | return {Type::Float, t_in_float, t_in_float4}; | ||
| 2462 | } | ||
| 2463 | } | ||
| 2464 | |||
| 2465 | Id GetTypeDefinition(Type type) const { | ||
| 2466 | switch (type) { | ||
| 2467 | case Type::Bool: | ||
| 2468 | return t_bool; | ||
| 2469 | case Type::Bool2: | ||
| 2470 | return t_bool2; | ||
| 2471 | case Type::Float: | ||
| 2472 | return t_float; | ||
| 2473 | case Type::Int: | ||
| 2474 | return t_int; | ||
| 2475 | case Type::Uint: | ||
| 2476 | return t_uint; | ||
| 2477 | case Type::HalfFloat: | ||
| 2478 | return t_half; | ||
| 2479 | default: | ||
| 2480 | UNREACHABLE(); | ||
| 2481 | return {}; | ||
| 2482 | } | ||
| 2483 | } | ||
| 2484 | |||
| 2485 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | ||
| 2486 | switch (type) { | ||
| 2487 | case Type::Float: | ||
| 2488 | return {t_float, t_float2, t_float3, t_float4}; | ||
| 2489 | case Type::Int: | ||
| 2490 | return {t_int, t_int2, t_int3, t_int4}; | ||
| 2491 | case Type::Uint: | ||
| 2492 | return {t_uint, t_uint2, t_uint3, t_uint4}; | ||
| 2493 | default: | ||
| 2494 | UNIMPLEMENTED(); | ||
| 2495 | return {}; | ||
| 2496 | } | ||
| 2497 | } | ||
| 2498 | |||
| 2499 | std::tuple<Id, Id> CreateFlowStack() { | ||
| 2500 | // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely | ||
| 2501 | // that shaders will use 20 nested SSYs and PBKs. | ||
| 2502 | constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 2503 | constexpr auto storage_class = spv::StorageClass::Function; | ||
| 2504 | |||
| 2505 | const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); | ||
| 2506 | const Id stack = OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, | ||
| 2507 | ConstantNull(flow_stack_type)); | ||
| 2508 | const Id top = OpVariable(t_func_uint, storage_class, Constant(t_uint, 0)); | ||
| 2509 | AddLocalVariable(stack); | ||
| 2510 | AddLocalVariable(top); | ||
| 2511 | return std::tie(stack, top); | ||
| 2512 | } | ||
| 2513 | |||
| 2514 | std::pair<Id, Id> GetFlowStack(Operation operation) { | ||
| 2515 | const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 2516 | switch (stack_class) { | ||
| 2517 | case MetaStackClass::Ssy: | ||
| 2518 | return {ssy_flow_stack, ssy_flow_stack_top}; | ||
| 2519 | case MetaStackClass::Pbk: | ||
| 2520 | return {pbk_flow_stack, pbk_flow_stack_top}; | ||
| 2521 | } | ||
| 2522 | UNREACHABLE(); | ||
| 2523 | return {}; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | Id GetGlobalMemoryPointer(const GmemNode& gmem) { | ||
| 2527 | const Id real = AsUint(Visit(gmem.GetRealAddress())); | ||
| 2528 | const Id base = AsUint(Visit(gmem.GetBaseAddress())); | ||
| 2529 | const Id diff = OpISub(t_uint, real, base); | ||
| 2530 | const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||
| 2531 | const Id buffer = global_buffers.at(gmem.GetDescriptor()); | ||
| 2532 | return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); | ||
| 2533 | } | ||
| 2534 | |||
| 2535 | Id GetSharedMemoryPointer(const SmemNode& smem) { | ||
| 2536 | ASSERT(stage == ShaderType::Compute); | ||
| 2537 | Id address = AsUint(Visit(smem.GetAddress())); | ||
| 2538 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 2539 | return OpAccessChain(t_smem_uint, shared_memory, address); | ||
| 2540 | } | ||
| 2541 | |||
| 2542 | static constexpr std::array operation_decompilers = { | ||
| 2543 | &SPIRVDecompiler::Assign, | ||
| 2544 | |||
| 2545 | &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, | ||
| 2546 | Type::Float>, | ||
| 2547 | |||
| 2548 | &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::Float>, | ||
| 2549 | &SPIRVDecompiler::Binary<&Module::OpFMul, Type::Float>, | ||
| 2550 | &SPIRVDecompiler::Binary<&Module::OpFDiv, Type::Float>, | ||
| 2551 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::Float>, | ||
| 2552 | &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | ||
| 2553 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | ||
| 2554 | &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | ||
| 2555 | &SPIRVDecompiler::FCastHalf<0>, | ||
| 2556 | &SPIRVDecompiler::FCastHalf<1>, | ||
| 2557 | &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | ||
| 2558 | &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | ||
| 2559 | &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | ||
| 2560 | &SPIRVDecompiler::Unary<&Module::OpSin, Type::Float>, | ||
| 2561 | &SPIRVDecompiler::Unary<&Module::OpExp2, Type::Float>, | ||
| 2562 | &SPIRVDecompiler::Unary<&Module::OpLog2, Type::Float>, | ||
| 2563 | &SPIRVDecompiler::Unary<&Module::OpInverseSqrt, Type::Float>, | ||
| 2564 | &SPIRVDecompiler::Unary<&Module::OpSqrt, Type::Float>, | ||
| 2565 | &SPIRVDecompiler::Unary<&Module::OpRoundEven, Type::Float>, | ||
| 2566 | &SPIRVDecompiler::Unary<&Module::OpFloor, Type::Float>, | ||
| 2567 | &SPIRVDecompiler::Unary<&Module::OpCeil, Type::Float>, | ||
| 2568 | &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, | ||
| 2569 | &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, | ||
| 2570 | &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, | ||
| 2571 | &SPIRVDecompiler::FSwizzleAdd, | ||
| 2572 | |||
| 2573 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, | ||
| 2574 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, | ||
| 2575 | &SPIRVDecompiler::Binary<&Module::OpSDiv, Type::Int>, | ||
| 2576 | &SPIRVDecompiler::Unary<&Module::OpSNegate, Type::Int>, | ||
| 2577 | &SPIRVDecompiler::Unary<&Module::OpSAbs, Type::Int>, | ||
| 2578 | &SPIRVDecompiler::Binary<&Module::OpSMin, Type::Int>, | ||
| 2579 | &SPIRVDecompiler::Binary<&Module::OpSMax, Type::Int>, | ||
| 2580 | |||
| 2581 | &SPIRVDecompiler::Unary<&Module::OpConvertFToS, Type::Int, Type::Float>, | ||
| 2582 | &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Int, Type::Uint>, | ||
| 2583 | &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Int, Type::Int, Type::Uint>, | ||
| 2584 | &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Int, Type::Int, Type::Uint>, | ||
| 2585 | &SPIRVDecompiler::Binary<&Module::OpShiftRightArithmetic, Type::Int, Type::Int, Type::Uint>, | ||
| 2586 | &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Int>, | ||
| 2587 | &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Int>, | ||
| 2588 | &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Int>, | ||
| 2589 | &SPIRVDecompiler::Unary<&Module::OpNot, Type::Int>, | ||
| 2590 | &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Int>, | ||
| 2591 | &SPIRVDecompiler::Ternary<&Module::OpBitFieldSExtract, Type::Int>, | ||
| 2592 | &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Int>, | ||
| 2593 | &SPIRVDecompiler::Unary<&Module::OpFindSMsb, Type::Int>, | ||
| 2594 | |||
| 2595 | &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Uint>, | ||
| 2596 | &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Uint>, | ||
| 2597 | &SPIRVDecompiler::Binary<&Module::OpUDiv, Type::Uint>, | ||
| 2598 | &SPIRVDecompiler::Binary<&Module::OpUMin, Type::Uint>, | ||
| 2599 | &SPIRVDecompiler::Binary<&Module::OpUMax, Type::Uint>, | ||
| 2600 | &SPIRVDecompiler::Unary<&Module::OpConvertFToU, Type::Uint, Type::Float>, | ||
| 2601 | &SPIRVDecompiler::Unary<&Module::OpBitcast, Type::Uint, Type::Int>, | ||
| 2602 | &SPIRVDecompiler::Binary<&Module::OpShiftLeftLogical, Type::Uint>, | ||
| 2603 | &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, | ||
| 2604 | &SPIRVDecompiler::Binary<&Module::OpShiftRightLogical, Type::Uint>, | ||
| 2605 | &SPIRVDecompiler::Binary<&Module::OpBitwiseAnd, Type::Uint>, | ||
| 2606 | &SPIRVDecompiler::Binary<&Module::OpBitwiseOr, Type::Uint>, | ||
| 2607 | &SPIRVDecompiler::Binary<&Module::OpBitwiseXor, Type::Uint>, | ||
| 2608 | &SPIRVDecompiler::Unary<&Module::OpNot, Type::Uint>, | ||
| 2609 | &SPIRVDecompiler::Quaternary<&Module::OpBitFieldInsert, Type::Uint>, | ||
| 2610 | &SPIRVDecompiler::Ternary<&Module::OpBitFieldUExtract, Type::Uint>, | ||
| 2611 | &SPIRVDecompiler::Unary<&Module::OpBitCount, Type::Uint>, | ||
| 2612 | &SPIRVDecompiler::Unary<&Module::OpFindUMsb, Type::Uint>, | ||
| 2613 | |||
| 2614 | &SPIRVDecompiler::Binary<&Module::OpFAdd, Type::HalfFloat>, | ||
| 2615 | &SPIRVDecompiler::Binary<&Module::OpFMul, Type::HalfFloat>, | ||
| 2616 | &SPIRVDecompiler::Ternary<&Module::OpFma, Type::HalfFloat>, | ||
| 2617 | &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | ||
| 2618 | &SPIRVDecompiler::HNegate, | ||
| 2619 | &SPIRVDecompiler::HClamp, | ||
| 2620 | &SPIRVDecompiler::HCastFloat, | ||
| 2621 | &SPIRVDecompiler::HUnpack, | ||
| 2622 | &SPIRVDecompiler::HMergeF32, | ||
| 2623 | &SPIRVDecompiler::HMergeHN<0>, | ||
| 2624 | &SPIRVDecompiler::HMergeHN<1>, | ||
| 2625 | &SPIRVDecompiler::HPack2, | ||
| 2626 | |||
| 2627 | &SPIRVDecompiler::LogicalAssign, | ||
| 2628 | &SPIRVDecompiler::Binary<&Module::OpLogicalAnd, Type::Bool>, | ||
| 2629 | &SPIRVDecompiler::Binary<&Module::OpLogicalOr, Type::Bool>, | ||
| 2630 | &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, | ||
| 2631 | &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, | ||
| 2632 | &SPIRVDecompiler::Binary<&Module::OpVectorExtractDynamic, Type::Bool, Type::Bool2, | ||
| 2633 | Type::Uint>, | ||
| 2634 | &SPIRVDecompiler::Unary<&Module::OpAll, Type::Bool, Type::Bool2>, | ||
| 2635 | |||
| 2636 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, | ||
| 2637 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, | ||
| 2638 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool, Type::Float>, | ||
| 2639 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, | ||
| 2640 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, | ||
| 2641 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, | ||
| 2642 | &SPIRVDecompiler::LogicalFOrdered, | ||
| 2643 | &SPIRVDecompiler::LogicalFUnordered, | ||
| 2644 | &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>, | ||
| 2645 | &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>, | ||
| 2646 | &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>, | ||
| 2647 | &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>, | ||
| 2648 | &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>, | ||
| 2649 | &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>, | ||
| 2650 | |||
| 2651 | &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, | ||
| 2652 | &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, | ||
| 2653 | &SPIRVDecompiler::Binary<&Module::OpSLessThanEqual, Type::Bool, Type::Int>, | ||
| 2654 | &SPIRVDecompiler::Binary<&Module::OpSGreaterThan, Type::Bool, Type::Int>, | ||
| 2655 | &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Int>, | ||
| 2656 | &SPIRVDecompiler::Binary<&Module::OpSGreaterThanEqual, Type::Bool, Type::Int>, | ||
| 2657 | |||
| 2658 | &SPIRVDecompiler::Binary<&Module::OpULessThan, Type::Bool, Type::Uint>, | ||
| 2659 | &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Uint>, | ||
| 2660 | &SPIRVDecompiler::Binary<&Module::OpULessThanEqual, Type::Bool, Type::Uint>, | ||
| 2661 | &SPIRVDecompiler::Binary<&Module::OpUGreaterThan, Type::Bool, Type::Uint>, | ||
| 2662 | &SPIRVDecompiler::Binary<&Module::OpINotEqual, Type::Bool, Type::Uint>, | ||
| 2663 | &SPIRVDecompiler::Binary<&Module::OpUGreaterThanEqual, Type::Bool, Type::Uint>, | ||
| 2664 | |||
| 2665 | &SPIRVDecompiler::LogicalAddCarry, | ||
| 2666 | |||
| 2667 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, | ||
| 2668 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2669 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2670 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, | ||
| 2671 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2672 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2673 | // TODO(Rodrigo): Should these use the OpFUnord* variants? | ||
| 2674 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool2, Type::HalfFloat>, | ||
| 2675 | &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2676 | &SPIRVDecompiler::Binary<&Module::OpFOrdLessThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2677 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool2, Type::HalfFloat>, | ||
| 2678 | &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2679 | &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool2, Type::HalfFloat>, | ||
| 2680 | |||
| 2681 | &SPIRVDecompiler::Texture, | ||
| 2682 | &SPIRVDecompiler::TextureLod, | ||
| 2683 | &SPIRVDecompiler::TextureGather, | ||
| 2684 | &SPIRVDecompiler::TextureQueryDimensions, | ||
| 2685 | &SPIRVDecompiler::TextureQueryLod, | ||
| 2686 | &SPIRVDecompiler::TexelFetch, | ||
| 2687 | &SPIRVDecompiler::TextureGradient, | ||
| 2688 | |||
| 2689 | &SPIRVDecompiler::ImageLoad, | ||
| 2690 | &SPIRVDecompiler::ImageStore, | ||
| 2691 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, | ||
| 2692 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, | ||
| 2693 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, | ||
| 2694 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, | ||
| 2695 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, | ||
| 2696 | |||
| 2697 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | ||
| 2698 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | ||
| 2699 | &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, | ||
| 2700 | &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, | ||
| 2701 | &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, | ||
| 2702 | &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, | ||
| 2703 | &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, | ||
| 2704 | |||
| 2705 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | ||
| 2706 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | ||
| 2707 | &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, | ||
| 2708 | &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, | ||
| 2709 | &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, | ||
| 2710 | &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, | ||
| 2711 | &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, | ||
| 2712 | |||
| 2713 | &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, | ||
| 2714 | &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, | ||
| 2715 | &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, | ||
| 2716 | &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, | ||
| 2717 | &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, | ||
| 2718 | &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, | ||
| 2719 | |||
| 2720 | &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, | ||
| 2721 | &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, | ||
| 2722 | &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, | ||
| 2723 | &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, | ||
| 2724 | &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, | ||
| 2725 | &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, | ||
| 2726 | |||
| 2727 | &SPIRVDecompiler::Branch, | ||
| 2728 | &SPIRVDecompiler::BranchIndirect, | ||
| 2729 | &SPIRVDecompiler::PushFlowStack, | ||
| 2730 | &SPIRVDecompiler::PopFlowStack, | ||
| 2731 | &SPIRVDecompiler::Exit, | ||
| 2732 | &SPIRVDecompiler::Discard, | ||
| 2733 | |||
| 2734 | &SPIRVDecompiler::EmitVertex, | ||
| 2735 | &SPIRVDecompiler::EndPrimitive, | ||
| 2736 | |||
| 2737 | &SPIRVDecompiler::InvocationId, | ||
| 2738 | &SPIRVDecompiler::YNegate, | ||
| 2739 | &SPIRVDecompiler::LocalInvocationId<0>, | ||
| 2740 | &SPIRVDecompiler::LocalInvocationId<1>, | ||
| 2741 | &SPIRVDecompiler::LocalInvocationId<2>, | ||
| 2742 | &SPIRVDecompiler::WorkGroupId<0>, | ||
| 2743 | &SPIRVDecompiler::WorkGroupId<1>, | ||
| 2744 | &SPIRVDecompiler::WorkGroupId<2>, | ||
| 2745 | |||
| 2746 | &SPIRVDecompiler::BallotThread, | ||
| 2747 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllKHR>, | ||
| 2748 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAnyKHR>, | ||
| 2749 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, | ||
| 2750 | |||
| 2751 | &SPIRVDecompiler::ThreadId, | ||
| 2752 | &SPIRVDecompiler::ThreadMask<0>, // Eq | ||
| 2753 | &SPIRVDecompiler::ThreadMask<1>, // Ge | ||
| 2754 | &SPIRVDecompiler::ThreadMask<2>, // Gt | ||
| 2755 | &SPIRVDecompiler::ThreadMask<3>, // Le | ||
| 2756 | &SPIRVDecompiler::ThreadMask<4>, // Lt | ||
| 2757 | &SPIRVDecompiler::ShuffleIndexed, | ||
| 2758 | |||
| 2759 | &SPIRVDecompiler::Barrier, | ||
| 2760 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, | ||
| 2761 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, | ||
| 2762 | }; | ||
| 2763 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | ||
| 2764 | |||
| 2765 | const Device& device; | ||
| 2766 | const ShaderIR& ir; | ||
| 2767 | const ShaderType stage; | ||
| 2768 | const Tegra::Shader::Header header; | ||
| 2769 | const Registry& registry; | ||
| 2770 | const Specialization& specialization; | ||
| 2771 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2772 | |||
| 2773 | const Id t_void = Name(TypeVoid(), "void"); | ||
| 2774 | |||
| 2775 | const Id t_bool = Name(TypeBool(), "bool"); | ||
| 2776 | const Id t_bool2 = Name(TypeVector(t_bool, 2), "bool2"); | ||
| 2777 | |||
| 2778 | const Id t_int = Name(TypeInt(32, true), "int"); | ||
| 2779 | const Id t_int2 = Name(TypeVector(t_int, 2), "int2"); | ||
| 2780 | const Id t_int3 = Name(TypeVector(t_int, 3), "int3"); | ||
| 2781 | const Id t_int4 = Name(TypeVector(t_int, 4), "int4"); | ||
| 2782 | |||
| 2783 | const Id t_uint = Name(TypeInt(32, false), "uint"); | ||
| 2784 | const Id t_uint2 = Name(TypeVector(t_uint, 2), "uint2"); | ||
| 2785 | const Id t_uint3 = Name(TypeVector(t_uint, 3), "uint3"); | ||
| 2786 | const Id t_uint4 = Name(TypeVector(t_uint, 4), "uint4"); | ||
| 2787 | |||
| 2788 | const Id t_float = Name(TypeFloat(32), "float"); | ||
| 2789 | const Id t_float2 = Name(TypeVector(t_float, 2), "float2"); | ||
| 2790 | const Id t_float3 = Name(TypeVector(t_float, 3), "float3"); | ||
| 2791 | const Id t_float4 = Name(TypeVector(t_float, 4), "float4"); | ||
| 2792 | |||
| 2793 | const Id t_prv_bool = Name(TypePointer(spv::StorageClass::Private, t_bool), "prv_bool"); | ||
| 2794 | const Id t_prv_float = Name(TypePointer(spv::StorageClass::Private, t_float), "prv_float"); | ||
| 2795 | |||
| 2796 | const Id t_func_uint = Name(TypePointer(spv::StorageClass::Function, t_uint), "func_uint"); | ||
| 2797 | |||
| 2798 | const Id t_in_bool = Name(TypePointer(spv::StorageClass::Input, t_bool), "in_bool"); | ||
| 2799 | const Id t_in_int = Name(TypePointer(spv::StorageClass::Input, t_int), "in_int"); | ||
| 2800 | const Id t_in_int4 = Name(TypePointer(spv::StorageClass::Input, t_int4), "in_int4"); | ||
| 2801 | const Id t_in_uint = Name(TypePointer(spv::StorageClass::Input, t_uint), "in_uint"); | ||
| 2802 | const Id t_in_uint3 = Name(TypePointer(spv::StorageClass::Input, t_uint3), "in_uint3"); | ||
| 2803 | const Id t_in_uint4 = Name(TypePointer(spv::StorageClass::Input, t_uint4), "in_uint4"); | ||
| 2804 | const Id t_in_float = Name(TypePointer(spv::StorageClass::Input, t_float), "in_float"); | ||
| 2805 | const Id t_in_float2 = Name(TypePointer(spv::StorageClass::Input, t_float2), "in_float2"); | ||
| 2806 | const Id t_in_float3 = Name(TypePointer(spv::StorageClass::Input, t_float3), "in_float3"); | ||
| 2807 | const Id t_in_float4 = Name(TypePointer(spv::StorageClass::Input, t_float4), "in_float4"); | ||
| 2808 | |||
| 2809 | const Id t_out_int = Name(TypePointer(spv::StorageClass::Output, t_int), "out_int"); | ||
| 2810 | |||
| 2811 | const Id t_out_float = Name(TypePointer(spv::StorageClass::Output, t_float), "out_float"); | ||
| 2812 | const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); | ||
| 2813 | |||
| 2814 | const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); | ||
| 2815 | const Id t_cbuf_std140 = Decorate( | ||
| 2816 | Name(TypeArray(t_float4, Constant(t_uint, MaxConstBufferElements)), "CbufStd140Array"), | ||
| 2817 | spv::Decoration::ArrayStride, 16U); | ||
| 2818 | const Id t_cbuf_scalar = Decorate( | ||
| 2819 | Name(TypeArray(t_float, Constant(t_uint, MaxConstBufferFloats)), "CbufScalarArray"), | ||
| 2820 | spv::Decoration::ArrayStride, 4U); | ||
| 2821 | const Id t_cbuf_std140_struct = MemberDecorate( | ||
| 2822 | Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 2823 | const Id t_cbuf_scalar_struct = MemberDecorate( | ||
| 2824 | Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 2825 | const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); | ||
| 2826 | const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); | ||
| 2827 | |||
| 2828 | Id t_smem_uint{}; | ||
| 2829 | |||
| 2830 | const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); | ||
| 2831 | const Id t_gmem_array = | ||
| 2832 | Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); | ||
| 2833 | const Id t_gmem_struct = MemberDecorate( | ||
| 2834 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 2835 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | ||
| 2836 | |||
| 2837 | const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); | ||
| 2838 | |||
| 2839 | const Id v_float_zero = Constant(t_float, 0.0f); | ||
| 2840 | const Id v_float_one = Constant(t_float, 1.0f); | ||
| 2841 | const Id v_uint_zero = Constant(t_uint, 0); | ||
| 2842 | |||
| 2843 | // Nvidia uses these defaults for varyings (e.g. position and generic attributes) | ||
| 2844 | const Id v_varying_default = | ||
| 2845 | ConstantComposite(t_float4, v_float_zero, v_float_zero, v_float_zero, v_float_one); | ||
| 2846 | |||
| 2847 | const Id v_true = ConstantTrue(t_bool); | ||
| 2848 | const Id v_false = ConstantFalse(t_bool); | ||
| 2849 | |||
| 2850 | Id t_scalar_half{}; | ||
| 2851 | Id t_half{}; | ||
| 2852 | |||
| 2853 | Id out_vertex{}; | ||
| 2854 | Id in_vertex{}; | ||
| 2855 | std::map<u32, Id> registers; | ||
| 2856 | std::map<u32, Id> custom_variables; | ||
| 2857 | std::map<Tegra::Shader::Pred, Id> predicates; | ||
| 2858 | std::map<u32, Id> flow_variables; | ||
| 2859 | Id local_memory{}; | ||
| 2860 | Id shared_memory{}; | ||
| 2861 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | ||
| 2862 | std::map<Attribute::Index, Id> input_attributes; | ||
| 2863 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; | ||
| 2864 | std::map<u32, Id> constant_buffers; | ||
| 2865 | std::map<GlobalMemoryBase, Id> global_buffers; | ||
| 2866 | std::map<u32, TexelBuffer> uniform_texels; | ||
| 2867 | std::map<u32, SampledImage> sampled_images; | ||
| 2868 | std::map<u32, StorageImage> images; | ||
| 2869 | |||
| 2870 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | ||
| 2871 | Id instance_index{}; | ||
| 2872 | Id vertex_index{}; | ||
| 2873 | Id base_instance{}; | ||
| 2874 | Id base_vertex{}; | ||
| 2875 | Id frag_depth{}; | ||
| 2876 | Id frag_coord{}; | ||
| 2877 | Id front_facing{}; | ||
| 2878 | Id point_coord{}; | ||
| 2879 | Id tess_level_outer{}; | ||
| 2880 | Id tess_level_inner{}; | ||
| 2881 | Id tess_coord{}; | ||
| 2882 | Id invocation_id{}; | ||
| 2883 | Id workgroup_id{}; | ||
| 2884 | Id local_invocation_id{}; | ||
| 2885 | Id thread_id{}; | ||
| 2886 | std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt | ||
| 2887 | |||
| 2888 | VertexIndices in_indices; | ||
| 2889 | VertexIndices out_indices; | ||
| 2890 | |||
| 2891 | std::vector<Id> interfaces; | ||
| 2892 | |||
| 2893 | Id jmp_to{}; | ||
| 2894 | Id ssy_flow_stack_top{}; | ||
| 2895 | Id pbk_flow_stack_top{}; | ||
| 2896 | Id ssy_flow_stack{}; | ||
| 2897 | Id pbk_flow_stack{}; | ||
| 2898 | Id continue_label{}; | ||
| 2899 | std::map<u32, Id> labels; | ||
| 2900 | |||
| 2901 | bool conditional_branch_set{}; | ||
| 2902 | bool inside_branch{}; | ||
| 2903 | }; | ||
| 2904 | |||
| 2905 | class ExprDecompiler { | ||
| 2906 | public: | ||
| 2907 | explicit ExprDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2908 | |||
| 2909 | Id operator()(const ExprAnd& expr) { | ||
| 2910 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | ||
| 2911 | const Id op1 = Visit(expr.operand1); | ||
| 2912 | const Id op2 = Visit(expr.operand2); | ||
| 2913 | return decomp.OpLogicalAnd(type_def, op1, op2); | ||
| 2914 | } | ||
| 2915 | |||
| 2916 | Id operator()(const ExprOr& expr) { | ||
| 2917 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | ||
| 2918 | const Id op1 = Visit(expr.operand1); | ||
| 2919 | const Id op2 = Visit(expr.operand2); | ||
| 2920 | return decomp.OpLogicalOr(type_def, op1, op2); | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | Id operator()(const ExprNot& expr) { | ||
| 2924 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | ||
| 2925 | const Id op1 = Visit(expr.operand1); | ||
| 2926 | return decomp.OpLogicalNot(type_def, op1); | ||
| 2927 | } | ||
| 2928 | |||
| 2929 | Id operator()(const ExprPredicate& expr) { | ||
| 2930 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | ||
| 2931 | return decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)); | ||
| 2932 | } | ||
| 2933 | |||
| 2934 | Id operator()(const ExprCondCode& expr) { | ||
| 2935 | return decomp.AsBool(decomp.Visit(decomp.ir.GetConditionCode(expr.cc))); | ||
| 2936 | } | ||
| 2937 | |||
| 2938 | Id operator()(const ExprVar& expr) { | ||
| 2939 | return decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)); | ||
| 2940 | } | ||
| 2941 | |||
| 2942 | Id operator()(const ExprBoolean& expr) { | ||
| 2943 | return expr.value ? decomp.v_true : decomp.v_false; | ||
| 2944 | } | ||
| 2945 | |||
| 2946 | Id operator()(const ExprGprEqual& expr) { | ||
| 2947 | const Id target = decomp.Constant(decomp.t_uint, expr.value); | ||
| 2948 | Id gpr = decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)); | ||
| 2949 | gpr = decomp.OpBitcast(decomp.t_uint, gpr); | ||
| 2950 | return decomp.OpIEqual(decomp.t_bool, gpr, target); | ||
| 2951 | } | ||
| 2952 | |||
| 2953 | Id Visit(const Expr& node) { | ||
| 2954 | return std::visit(*this, *node); | ||
| 2955 | } | ||
| 2956 | |||
| 2957 | private: | ||
| 2958 | SPIRVDecompiler& decomp; | ||
| 2959 | }; | ||
| 2960 | |||
| 2961 | class ASTDecompiler { | ||
| 2962 | public: | ||
| 2963 | explicit ASTDecompiler(SPIRVDecompiler& decomp_) : decomp{decomp_} {} | ||
| 2964 | |||
| 2965 | void operator()(const ASTProgram& ast) { | ||
| 2966 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2967 | while (current) { | ||
| 2968 | Visit(current); | ||
| 2969 | current = current->GetNext(); | ||
| 2970 | } | ||
| 2971 | } | ||
| 2972 | |||
| 2973 | void operator()(const ASTIfThen& ast) { | ||
| 2974 | ExprDecompiler expr_parser{decomp}; | ||
| 2975 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 2976 | const Id then_label = decomp.OpLabel(); | ||
| 2977 | const Id endif_label = decomp.OpLabel(); | ||
| 2978 | decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 2979 | decomp.OpBranchConditional(condition, then_label, endif_label); | ||
| 2980 | decomp.AddLabel(then_label); | ||
| 2981 | ASTNode current = ast.nodes.GetFirst(); | ||
| 2982 | while (current) { | ||
| 2983 | Visit(current); | ||
| 2984 | current = current->GetNext(); | ||
| 2985 | } | ||
| 2986 | decomp.OpBranch(endif_label); | ||
| 2987 | decomp.AddLabel(endif_label); | ||
| 2988 | } | ||
| 2989 | |||
| 2990 | void operator()([[maybe_unused]] const ASTIfElse& ast) { | ||
| 2991 | UNREACHABLE(); | ||
| 2992 | } | ||
| 2993 | |||
| 2994 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { | ||
| 2995 | UNREACHABLE(); | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | void operator()(const ASTBlockDecoded& ast) { | ||
| 2999 | decomp.VisitBasicBlock(ast.nodes); | ||
| 3000 | } | ||
| 3001 | |||
| 3002 | void operator()(const ASTVarSet& ast) { | ||
| 3003 | ExprDecompiler expr_parser{decomp}; | ||
| 3004 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3005 | decomp.OpStore(decomp.flow_variables.at(ast.index), condition); | ||
| 3006 | } | ||
| 3007 | |||
| 3008 | void operator()([[maybe_unused]] const ASTLabel& ast) { | ||
| 3009 | // Do nothing | ||
| 3010 | } | ||
| 3011 | |||
| 3012 | void operator()([[maybe_unused]] const ASTGoto& ast) { | ||
| 3013 | UNREACHABLE(); | ||
| 3014 | } | ||
| 3015 | |||
| 3016 | void operator()(const ASTDoWhile& ast) { | ||
| 3017 | const Id loop_label = decomp.OpLabel(); | ||
| 3018 | const Id endloop_label = decomp.OpLabel(); | ||
| 3019 | const Id loop_start_block = decomp.OpLabel(); | ||
| 3020 | const Id loop_continue_block = decomp.OpLabel(); | ||
| 3021 | current_loop_exit = endloop_label; | ||
| 3022 | decomp.OpBranch(loop_label); | ||
| 3023 | decomp.AddLabel(loop_label); | ||
| 3024 | decomp.OpLoopMerge(endloop_label, loop_continue_block, spv::LoopControlMask::MaskNone); | ||
| 3025 | decomp.OpBranch(loop_start_block); | ||
| 3026 | decomp.AddLabel(loop_start_block); | ||
| 3027 | ASTNode current = ast.nodes.GetFirst(); | ||
| 3028 | while (current) { | ||
| 3029 | Visit(current); | ||
| 3030 | current = current->GetNext(); | ||
| 3031 | } | ||
| 3032 | decomp.OpBranch(loop_continue_block); | ||
| 3033 | decomp.AddLabel(loop_continue_block); | ||
| 3034 | ExprDecompiler expr_parser{decomp}; | ||
| 3035 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3036 | decomp.OpBranchConditional(condition, loop_label, endloop_label); | ||
| 3037 | decomp.AddLabel(endloop_label); | ||
| 3038 | } | ||
| 3039 | |||
| 3040 | void operator()(const ASTReturn& ast) { | ||
| 3041 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { | ||
| 3042 | ExprDecompiler expr_parser{decomp}; | ||
| 3043 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3044 | const Id then_label = decomp.OpLabel(); | ||
| 3045 | const Id endif_label = decomp.OpLabel(); | ||
| 3046 | decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 3047 | decomp.OpBranchConditional(condition, then_label, endif_label); | ||
| 3048 | decomp.AddLabel(then_label); | ||
| 3049 | if (ast.kills) { | ||
| 3050 | decomp.OpKill(); | ||
| 3051 | } else { | ||
| 3052 | decomp.PreExit(); | ||
| 3053 | decomp.OpReturn(); | ||
| 3054 | } | ||
| 3055 | decomp.AddLabel(endif_label); | ||
| 3056 | } else { | ||
| 3057 | const Id next_block = decomp.OpLabel(); | ||
| 3058 | decomp.OpBranch(next_block); | ||
| 3059 | decomp.AddLabel(next_block); | ||
| 3060 | if (ast.kills) { | ||
| 3061 | decomp.OpKill(); | ||
| 3062 | } else { | ||
| 3063 | decomp.PreExit(); | ||
| 3064 | decomp.OpReturn(); | ||
| 3065 | } | ||
| 3066 | decomp.AddLabel(decomp.OpLabel()); | ||
| 3067 | } | ||
| 3068 | } | ||
| 3069 | |||
| 3070 | void operator()(const ASTBreak& ast) { | ||
| 3071 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { | ||
| 3072 | ExprDecompiler expr_parser{decomp}; | ||
| 3073 | const Id condition = expr_parser.Visit(ast.condition); | ||
| 3074 | const Id then_label = decomp.OpLabel(); | ||
| 3075 | const Id endif_label = decomp.OpLabel(); | ||
| 3076 | decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 3077 | decomp.OpBranchConditional(condition, then_label, endif_label); | ||
| 3078 | decomp.AddLabel(then_label); | ||
| 3079 | decomp.OpBranch(current_loop_exit); | ||
| 3080 | decomp.AddLabel(endif_label); | ||
| 3081 | } else { | ||
| 3082 | const Id next_block = decomp.OpLabel(); | ||
| 3083 | decomp.OpBranch(next_block); | ||
| 3084 | decomp.AddLabel(next_block); | ||
| 3085 | decomp.OpBranch(current_loop_exit); | ||
| 3086 | decomp.AddLabel(decomp.OpLabel()); | ||
| 3087 | } | ||
| 3088 | } | ||
| 3089 | |||
| 3090 | void Visit(const ASTNode& node) { | ||
| 3091 | std::visit(*this, *node->GetInnerData()); | ||
| 3092 | } | ||
| 3093 | |||
| 3094 | private: | ||
| 3095 | SPIRVDecompiler& decomp; | ||
| 3096 | Id current_loop_exit{}; | ||
| 3097 | }; | ||
| 3098 | |||
| 3099 | void SPIRVDecompiler::DecompileAST() { | ||
| 3100 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 3101 | for (u32 i = 0; i < num_flow_variables; i++) { | ||
| 3102 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | ||
| 3103 | Name(id, fmt::format("flow_var_{}", i)); | ||
| 3104 | flow_variables.emplace(i, AddGlobalVariable(id)); | ||
| 3105 | } | ||
| 3106 | |||
| 3107 | DefinePrologue(); | ||
| 3108 | |||
| 3109 | const ASTNode program = ir.GetASTProgram(); | ||
| 3110 | ASTDecompiler decompiler{*this}; | ||
| 3111 | decompiler.Visit(program); | ||
| 3112 | |||
| 3113 | const Id next_block = OpLabel(); | ||
| 3114 | OpBranch(next_block); | ||
| 3115 | AddLabel(next_block); | ||
| 3116 | } | ||
| 3117 | |||
| 3118 | } // Anonymous namespace | ||
| 3119 | |||
| 3120 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | ||
| 3121 | ShaderEntries entries; | ||
| 3122 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 3123 | entries.const_buffers.emplace_back(cbuf.second, cbuf.first); | ||
| 3124 | } | ||
| 3125 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 3126 | entries.global_buffers.emplace_back(GlobalBufferEntry{ | ||
| 3127 | .cbuf_index = base.cbuf_index, | ||
| 3128 | .cbuf_offset = base.cbuf_offset, | ||
| 3129 | .is_written = usage.is_written, | ||
| 3130 | }); | ||
| 3131 | } | ||
| 3132 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 3133 | if (sampler.is_buffer) { | ||
| 3134 | entries.uniform_texels.emplace_back(sampler); | ||
| 3135 | } else { | ||
| 3136 | entries.samplers.emplace_back(sampler); | ||
| 3137 | } | ||
| 3138 | } | ||
| 3139 | for (const auto& image : ir.GetImages()) { | ||
| 3140 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { | ||
| 3141 | entries.storage_texels.emplace_back(image); | ||
| 3142 | } else { | ||
| 3143 | entries.images.emplace_back(image); | ||
| 3144 | } | ||
| 3145 | } | ||
| 3146 | for (const auto& attribute : ir.GetInputAttributes()) { | ||
| 3147 | if (IsGenericAttribute(attribute)) { | ||
| 3148 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | ||
| 3149 | } | ||
| 3150 | } | ||
| 3151 | for (const auto& buffer : entries.const_buffers) { | ||
| 3152 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 3153 | } | ||
| 3154 | entries.clip_distances = ir.GetClipDistances(); | ||
| 3155 | entries.shader_length = ir.GetLength(); | ||
| 3156 | entries.uses_warps = ir.UsesWarps(); | ||
| 3157 | return entries; | ||
| 3158 | } | ||
| 3159 | |||
| 3160 | std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 3161 | ShaderType stage, const VideoCommon::Shader::Registry& registry, | ||
| 3162 | const Specialization& specialization) { | ||
| 3163 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 3164 | } | ||
| 3165 | |||
| 3166 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h deleted file mode 100644 index 5d94132a5..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <set> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/engines/shader_type.h" | ||
| 14 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace Vulkan { | ||
| 18 | |||
| 19 | class Device; | ||
| 20 | |||
| 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 22 | using UniformTexelEntry = VideoCommon::Shader::SamplerEntry; | ||
| 23 | using SamplerEntry = VideoCommon::Shader::SamplerEntry; | ||
| 24 | using StorageTexelEntry = VideoCommon::Shader::ImageEntry; | ||
| 25 | using ImageEntry = VideoCommon::Shader::ImageEntry; | ||
| 26 | |||
| 27 | constexpr u32 DESCRIPTOR_SET = 0; | ||
| 28 | |||
| 29 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | ||
| 30 | public: | ||
| 31 | explicit constexpr ConstBufferEntry(const ConstBuffer& entry_, u32 index_) | ||
| 32 | : ConstBuffer{entry_}, index{index_} {} | ||
| 33 | |||
| 34 | constexpr u32 GetIndex() const { | ||
| 35 | return index; | ||
| 36 | } | ||
| 37 | |||
| 38 | private: | ||
| 39 | u32 index{}; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct GlobalBufferEntry { | ||
| 43 | u32 cbuf_index{}; | ||
| 44 | u32 cbuf_offset{}; | ||
| 45 | bool is_written{}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | struct ShaderEntries { | ||
| 49 | u32 NumBindings() const { | ||
| 50 | return static_cast<u32>(const_buffers.size() + global_buffers.size() + | ||
| 51 | uniform_texels.size() + samplers.size() + storage_texels.size() + | ||
| 52 | images.size()); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::vector<ConstBufferEntry> const_buffers; | ||
| 56 | std::vector<GlobalBufferEntry> global_buffers; | ||
| 57 | std::vector<UniformTexelEntry> uniform_texels; | ||
| 58 | std::vector<SamplerEntry> samplers; | ||
| 59 | std::vector<StorageTexelEntry> storage_texels; | ||
| 60 | std::vector<ImageEntry> images; | ||
| 61 | std::set<u32> attributes; | ||
| 62 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||
| 63 | std::size_t shader_length{}; | ||
| 64 | u32 enabled_uniform_buffers{}; | ||
| 65 | bool uses_warps{}; | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct Specialization final { | ||
| 69 | u32 base_binding{}; | ||
| 70 | |||
| 71 | // Compute specific | ||
| 72 | std::array<u32, 3> workgroup_size{}; | ||
| 73 | u32 shared_memory_size{}; | ||
| 74 | |||
| 75 | // Graphics specific | ||
| 76 | std::optional<float> point_size; | ||
| 77 | std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; | ||
| 78 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | ||
| 79 | bool ndc_minus_one_to_one{}; | ||
| 80 | bool early_fragment_tests{}; | ||
| 81 | float alpha_test_ref{}; | ||
| 82 | Maxwell::ComparisonOp alpha_test_func{}; | ||
| 83 | }; | ||
| 84 | // Old gcc versions don't consider this trivially copyable. | ||
| 85 | // static_assert(std::is_trivially_copyable_v<Specialization>); | ||
| 86 | |||
| 87 | struct SPIRVShader { | ||
| 88 | std::vector<u32> code; | ||
| 89 | ShaderEntries entries; | ||
| 90 | }; | ||
| 91 | |||
| 92 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | ||
| 93 | |||
| 94 | std::vector<u32> Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 95 | Tegra::Engines::ShaderType stage, | ||
| 96 | const VideoCommon::Shader::Registry& registry, | ||
| 97 | const Specialization& specialization); | ||
| 98 | |||
| 99 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 0412b5234..555b12ed7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -91,7 +91,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem | |||
| 91 | .flags = 0, | 91 | .flags = 0, |
| 92 | .size = STREAM_BUFFER_SIZE, | 92 | .size = STREAM_BUFFER_SIZE, |
| 93 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | 93 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |
| 94 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | 94 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 95 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 95 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 96 | .queueFamilyIndexCount = 0, | 96 | .queueFamilyIndexCount = 0, |
| 97 | .pQueueFamilyIndices = nullptr, | 97 | .pQueueFamilyIndices = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 956f86845..e3b7dd61c 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -29,9 +29,10 @@ using Flags = Maxwell3D::DirtyState::Flags; | |||
| 29 | 29 | ||
| 30 | Flags MakeInvalidationFlags() { | 30 | Flags MakeInvalidationFlags() { |
| 31 | static constexpr int INVALIDATION_FLAGS[]{ | 31 | static constexpr int INVALIDATION_FLAGS[]{ |
| 32 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | 32 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, |
| 33 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | 33 | StencilProperties, LineWidth, CullMode, DepthBoundsEnable, DepthTestEnable, |
| 34 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, | 34 | DepthWriteEnable, DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, |
| 35 | VertexBuffers, VertexInput, | ||
| 35 | }; | 36 | }; |
| 36 | Flags flags{}; | 37 | Flags flags{}; |
| 37 | for (const int flag : INVALIDATION_FLAGS) { | 38 | for (const int flag : INVALIDATION_FLAGS) { |
| @@ -40,6 +41,12 @@ Flags MakeInvalidationFlags() { | |||
| 40 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { | 41 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { |
| 41 | flags[index] = true; | 42 | flags[index] = true; |
| 42 | } | 43 | } |
| 44 | for (int index = VertexAttribute0; index <= VertexAttribute31; ++index) { | ||
| 45 | flags[index] = true; | ||
| 46 | } | ||
| 47 | for (int index = VertexBinding0; index <= VertexBinding31; ++index) { | ||
| 48 | flags[index] = true; | ||
| 49 | } | ||
| 43 | return flags; | 50 | return flags; |
| 44 | } | 51 | } |
| 45 | 52 | ||
| @@ -79,6 +86,11 @@ void SetupDirtyStencilProperties(Tables& tables) { | |||
| 79 | table[OFF(stencil_back_func_mask)] = StencilProperties; | 86 | table[OFF(stencil_back_func_mask)] = StencilProperties; |
| 80 | } | 87 | } |
| 81 | 88 | ||
| 89 | void SetupDirtyLineWidth(Tables& tables) { | ||
| 90 | tables[0][OFF(line_width_smooth)] = LineWidth; | ||
| 91 | tables[0][OFF(line_width_aliased)] = LineWidth; | ||
| 92 | } | ||
| 93 | |||
| 82 | void SetupDirtyCullMode(Tables& tables) { | 94 | void SetupDirtyCullMode(Tables& tables) { |
| 83 | auto& table = tables[0]; | 95 | auto& table = tables[0]; |
| 84 | table[OFF(cull_face)] = CullMode; | 96 | table[OFF(cull_face)] = CullMode; |
| @@ -134,31 +146,38 @@ void SetupDirtyBlending(Tables& tables) { | |||
| 134 | FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); | 146 | FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); |
| 135 | } | 147 | } |
| 136 | 148 | ||
| 137 | void SetupDirtyInstanceDivisors(Tables& tables) { | 149 | void SetupDirtyViewportSwizzles(Tables& tables) { |
| 138 | static constexpr size_t divisor_offset = 3; | 150 | static constexpr size_t swizzle_offset = 6; |
| 139 | for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { | 151 | for (size_t index = 0; index < Regs::NumViewports; ++index) { |
| 140 | tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; | 152 | tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = |
| 141 | tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = | 153 | ViewportSwizzles; |
| 142 | InstanceDivisors; | ||
| 143 | } | 154 | } |
| 144 | } | 155 | } |
| 145 | 156 | ||
| 146 | void SetupDirtyVertexAttributes(Tables& tables) { | 157 | void SetupDirtyVertexAttributes(Tables& tables) { |
| 147 | FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); | 158 | for (size_t i = 0; i < Regs::NumVertexAttributes; ++i) { |
| 159 | const size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); | ||
| 160 | FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexAttribute0 + i); | ||
| 161 | } | ||
| 162 | FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexInput); | ||
| 148 | } | 163 | } |
| 149 | 164 | ||
| 150 | void SetupDirtyViewportSwizzles(Tables& tables) { | 165 | void SetupDirtyVertexBindings(Tables& tables) { |
| 151 | static constexpr size_t swizzle_offset = 6; | 166 | // Do NOT include stride here, it's implicit in VertexBuffer |
| 152 | for (size_t index = 0; index < Regs::NumViewports; ++index) { | 167 | static constexpr size_t divisor_offset = 3; |
| 153 | tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = | 168 | for (size_t i = 0; i < Regs::NumVertexArrays; ++i) { |
| 154 | ViewportSwizzles; | 169 | const u8 flag = static_cast<u8>(VertexBinding0 + i); |
| 170 | tables[0][OFF(instanced_arrays) + i] = VertexInput; | ||
| 171 | tables[1][OFF(instanced_arrays) + i] = flag; | ||
| 172 | tables[0][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = VertexInput; | ||
| 173 | tables[1][OFF(vertex_array) + i * NUM(vertex_array[0]) + divisor_offset] = flag; | ||
| 155 | } | 174 | } |
| 156 | } | 175 | } |
| 157 | } // Anonymous namespace | 176 | } // Anonymous namespace |
| 158 | 177 | ||
| 159 | StateTracker::StateTracker(Tegra::GPU& gpu) | 178 | StateTracker::StateTracker(Tegra::GPU& gpu) |
| 160 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | 179 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { |
| 161 | auto& tables = gpu.Maxwell3D().dirty.tables; | 180 | auto& tables{gpu.Maxwell3D().dirty.tables}; |
| 162 | SetupDirtyFlags(tables); | 181 | SetupDirtyFlags(tables); |
| 163 | SetupDirtyViewports(tables); | 182 | SetupDirtyViewports(tables); |
| 164 | SetupDirtyScissors(tables); | 183 | SetupDirtyScissors(tables); |
| @@ -166,6 +185,7 @@ StateTracker::StateTracker(Tegra::GPU& gpu) | |||
| 166 | SetupDirtyBlendConstants(tables); | 185 | SetupDirtyBlendConstants(tables); |
| 167 | SetupDirtyDepthBounds(tables); | 186 | SetupDirtyDepthBounds(tables); |
| 168 | SetupDirtyStencilProperties(tables); | 187 | SetupDirtyStencilProperties(tables); |
| 188 | SetupDirtyLineWidth(tables); | ||
| 169 | SetupDirtyCullMode(tables); | 189 | SetupDirtyCullMode(tables); |
| 170 | SetupDirtyDepthBoundsEnable(tables); | 190 | SetupDirtyDepthBoundsEnable(tables); |
| 171 | SetupDirtyDepthTestEnable(tables); | 191 | SetupDirtyDepthTestEnable(tables); |
| @@ -175,9 +195,9 @@ StateTracker::StateTracker(Tegra::GPU& gpu) | |||
| 175 | SetupDirtyStencilOp(tables); | 195 | SetupDirtyStencilOp(tables); |
| 176 | SetupDirtyStencilTestEnable(tables); | 196 | SetupDirtyStencilTestEnable(tables); |
| 177 | SetupDirtyBlending(tables); | 197 | SetupDirtyBlending(tables); |
| 178 | SetupDirtyInstanceDivisors(tables); | ||
| 179 | SetupDirtyVertexAttributes(tables); | ||
| 180 | SetupDirtyViewportSwizzles(tables); | 198 | SetupDirtyViewportSwizzles(tables); |
| 199 | SetupDirtyVertexAttributes(tables); | ||
| 200 | SetupDirtyVertexBindings(tables); | ||
| 181 | } | 201 | } |
| 182 | 202 | ||
| 183 | } // namespace Vulkan | 203 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 84e918a71..5f78f6950 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h | |||
| @@ -19,12 +19,19 @@ namespace Dirty { | |||
| 19 | enum : u8 { | 19 | enum : u8 { |
| 20 | First = VideoCommon::Dirty::LastCommonEntry, | 20 | First = VideoCommon::Dirty::LastCommonEntry, |
| 21 | 21 | ||
| 22 | VertexInput, | ||
| 23 | VertexAttribute0, | ||
| 24 | VertexAttribute31 = VertexAttribute0 + 31, | ||
| 25 | VertexBinding0, | ||
| 26 | VertexBinding31 = VertexBinding0 + 31, | ||
| 27 | |||
| 22 | Viewports, | 28 | Viewports, |
| 23 | Scissors, | 29 | Scissors, |
| 24 | DepthBias, | 30 | DepthBias, |
| 25 | BlendConstants, | 31 | BlendConstants, |
| 26 | DepthBounds, | 32 | DepthBounds, |
| 27 | StencilProperties, | 33 | StencilProperties, |
| 34 | LineWidth, | ||
| 28 | 35 | ||
| 29 | CullMode, | 36 | CullMode, |
| 30 | DepthBoundsEnable, | 37 | DepthBoundsEnable, |
| @@ -36,11 +43,9 @@ enum : u8 { | |||
| 36 | StencilTestEnable, | 43 | StencilTestEnable, |
| 37 | 44 | ||
| 38 | Blending, | 45 | Blending, |
| 39 | InstanceDivisors, | ||
| 40 | VertexAttributes, | ||
| 41 | ViewportSwizzles, | 46 | ViewportSwizzles, |
| 42 | 47 | ||
| 43 | Last | 48 | Last, |
| 44 | }; | 49 | }; |
| 45 | static_assert(Last <= std::numeric_limits<u8>::max()); | 50 | static_assert(Last <= std::numeric_limits<u8>::max()); |
| 46 | 51 | ||
| @@ -89,6 +94,10 @@ public: | |||
| 89 | return Exchange(Dirty::StencilProperties, false); | 94 | return Exchange(Dirty::StencilProperties, false); |
| 90 | } | 95 | } |
| 91 | 96 | ||
| 97 | bool TouchLineWidth() const { | ||
| 98 | return Exchange(Dirty::LineWidth, false); | ||
| 99 | } | ||
| 100 | |||
| 92 | bool TouchCullMode() { | 101 | bool TouchCullMode() { |
| 93 | return Exchange(Dirty::CullMode, false); | 102 | return Exchange(Dirty::CullMode, false); |
| 94 | } | 103 | } |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index dfd5c65ba..d990eefba 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -65,6 +65,9 @@ VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKSchedul | |||
| 65 | VKSwapchain::~VKSwapchain() = default; | 65 | VKSwapchain::~VKSwapchain() = default; |
| 66 | 66 | ||
| 67 | void VKSwapchain::Create(u32 width, u32 height, bool srgb) { | 67 | void VKSwapchain::Create(u32 width, u32 height, bool srgb) { |
| 68 | is_outdated = false; | ||
| 69 | is_suboptimal = false; | ||
| 70 | |||
| 68 | const auto physical_device = device.GetPhysical(); | 71 | const auto physical_device = device.GetPhysical(); |
| 69 | const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; | 72 | const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; |
| 70 | if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { | 73 | if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { |
| @@ -82,21 +85,31 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { | |||
| 82 | resource_ticks.resize(image_count); | 85 | resource_ticks.resize(image_count); |
| 83 | } | 86 | } |
| 84 | 87 | ||
| 85 | bool VKSwapchain::AcquireNextImage() { | 88 | void VKSwapchain::AcquireNextImage() { |
| 86 | const VkResult result = | 89 | const VkResult result = device.GetLogical().AcquireNextImageKHR( |
| 87 | device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), | 90 | *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], |
| 88 | *present_semaphores[frame_index], {}, &image_index); | 91 | VK_NULL_HANDLE, &image_index); |
| 89 | 92 | switch (result) { | |
| 93 | case VK_SUCCESS: | ||
| 94 | break; | ||
| 95 | case VK_SUBOPTIMAL_KHR: | ||
| 96 | is_suboptimal = true; | ||
| 97 | break; | ||
| 98 | case VK_ERROR_OUT_OF_DATE_KHR: | ||
| 99 | is_outdated = true; | ||
| 100 | break; | ||
| 101 | default: | ||
| 102 | LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); | ||
| 103 | break; | ||
| 104 | } | ||
| 90 | scheduler.Wait(resource_ticks[image_index]); | 105 | scheduler.Wait(resource_ticks[image_index]); |
| 91 | return result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR; | 106 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 92 | } | 107 | } |
| 93 | 108 | ||
| 94 | bool VKSwapchain::Present(VkSemaphore render_semaphore) { | 109 | void VKSwapchain::Present(VkSemaphore render_semaphore) { |
| 95 | const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; | 110 | const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; |
| 96 | const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; | 111 | const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; |
| 97 | const auto present_queue{device.GetPresentQueue()}; | 112 | const auto present_queue{device.GetPresentQueue()}; |
| 98 | bool recreated = false; | ||
| 99 | |||
| 100 | const VkPresentInfoKHR present_info{ | 113 | const VkPresentInfoKHR present_info{ |
| 101 | .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, | 114 | .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, |
| 102 | .pNext = nullptr, | 115 | .pNext = nullptr, |
| @@ -107,7 +120,6 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { | |||
| 107 | .pImageIndices = &image_index, | 120 | .pImageIndices = &image_index, |
| 108 | .pResults = nullptr, | 121 | .pResults = nullptr, |
| 109 | }; | 122 | }; |
| 110 | |||
| 111 | switch (const VkResult result = present_queue.Present(present_info)) { | 123 | switch (const VkResult result = present_queue.Present(present_info)) { |
| 112 | case VK_SUCCESS: | 124 | case VK_SUCCESS: |
| 113 | break; | 125 | break; |
| @@ -115,24 +127,16 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore) { | |||
| 115 | LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); | 127 | LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); |
| 116 | break; | 128 | break; |
| 117 | case VK_ERROR_OUT_OF_DATE_KHR: | 129 | case VK_ERROR_OUT_OF_DATE_KHR: |
| 118 | if (current_width > 0 && current_height > 0) { | 130 | is_outdated = true; |
| 119 | Create(current_width, current_height, current_srgb); | ||
| 120 | recreated = true; | ||
| 121 | } | ||
| 122 | break; | 131 | break; |
| 123 | default: | 132 | default: |
| 124 | LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); | 133 | LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); |
| 125 | break; | 134 | break; |
| 126 | } | 135 | } |
| 127 | 136 | ++frame_index; | |
| 128 | resource_ticks[image_index] = scheduler.CurrentTick(); | 137 | if (frame_index >= image_count) { |
| 129 | frame_index = (frame_index + 1) % static_cast<u32>(image_count); | 138 | frame_index = 0; |
| 130 | return recreated; | 139 | } |
| 131 | } | ||
| 132 | |||
| 133 | bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { | ||
| 134 | // TODO(Rodrigo): Handle framebuffer pixel format changes | ||
| 135 | return framebuffer.width != current_width || framebuffer.height != current_height; | ||
| 136 | } | 140 | } |
| 137 | 141 | ||
| 138 | void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, | 142 | void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, |
| @@ -148,7 +152,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 148 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { | 152 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { |
| 149 | requested_image_count = capabilities.maxImageCount; | 153 | requested_image_count = capabilities.maxImageCount; |
| 150 | } | 154 | } |
| 151 | |||
| 152 | VkSwapchainCreateInfoKHR swapchain_ci{ | 155 | VkSwapchainCreateInfoKHR swapchain_ci{ |
| 153 | .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, | 156 | .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, |
| 154 | .pNext = nullptr, | 157 | .pNext = nullptr, |
| @@ -169,7 +172,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 169 | .clipped = VK_FALSE, | 172 | .clipped = VK_FALSE, |
| 170 | .oldSwapchain = nullptr, | 173 | .oldSwapchain = nullptr, |
| 171 | }; | 174 | }; |
| 172 | |||
| 173 | const u32 graphics_family{device.GetGraphicsFamily()}; | 175 | const u32 graphics_family{device.GetGraphicsFamily()}; |
| 174 | const u32 present_family{device.GetPresentFamily()}; | 176 | const u32 present_family{device.GetPresentFamily()}; |
| 175 | const std::array<u32, 2> queue_indices{graphics_family, present_family}; | 177 | const std::array<u32, 2> queue_indices{graphics_family, present_family}; |
| @@ -178,7 +180,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 178 | swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); | 180 | swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); |
| 179 | swapchain_ci.pQueueFamilyIndices = queue_indices.data(); | 181 | swapchain_ci.pQueueFamilyIndices = queue_indices.data(); |
| 180 | } | 182 | } |
| 181 | |||
| 182 | // Request the size again to reduce the possibility of a TOCTOU race condition. | 183 | // Request the size again to reduce the possibility of a TOCTOU race condition. |
| 183 | const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); | 184 | const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); |
| 184 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); | 185 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); |
| @@ -186,8 +187,6 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 186 | swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); | 187 | swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); |
| 187 | 188 | ||
| 188 | extent = swapchain_ci.imageExtent; | 189 | extent = swapchain_ci.imageExtent; |
| 189 | current_width = extent.width; | ||
| 190 | current_height = extent.height; | ||
| 191 | current_srgb = srgb; | 190 | current_srgb = srgb; |
| 192 | 191 | ||
| 193 | images = swapchain.GetImages(); | 192 | images = swapchain.GetImages(); |
| @@ -197,8 +196,8 @@ void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, | |||
| 197 | 196 | ||
| 198 | void VKSwapchain::CreateSemaphores() { | 197 | void VKSwapchain::CreateSemaphores() { |
| 199 | present_semaphores.resize(image_count); | 198 | present_semaphores.resize(image_count); |
| 200 | std::generate(present_semaphores.begin(), present_semaphores.end(), | 199 | std::ranges::generate(present_semaphores, |
| 201 | [this] { return device.GetLogical().CreateSemaphore(); }); | 200 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 202 | } | 201 | } |
| 203 | 202 | ||
| 204 | void VKSwapchain::CreateImageViews() { | 203 | void VKSwapchain::CreateImageViews() { |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index adc8d27cf..35c2cdc14 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -28,14 +28,25 @@ public: | |||
| 28 | void Create(u32 width, u32 height, bool srgb); | 28 | void Create(u32 width, u32 height, bool srgb); |
| 29 | 29 | ||
| 30 | /// Acquires the next image in the swapchain, waits as needed. | 30 | /// Acquires the next image in the swapchain, waits as needed. |
| 31 | bool AcquireNextImage(); | 31 | void AcquireNextImage(); |
| 32 | 32 | ||
| 33 | /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be | 33 | /// Presents the rendered image to the swapchain. |
| 34 | /// recreated. Takes responsability for the ownership of fence. | 34 | void Present(VkSemaphore render_semaphore); |
| 35 | bool Present(VkSemaphore render_semaphore); | ||
| 36 | 35 | ||
| 37 | /// Returns true when the framebuffer layout has changed. | 36 | /// Returns true when the color space has changed. |
| 38 | bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; | 37 | bool HasColorSpaceChanged(bool is_srgb) const { |
| 38 | return current_srgb != is_srgb; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Returns true when the swapchain is outdated. | ||
| 42 | bool IsOutDated() const { | ||
| 43 | return is_outdated; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Returns true when the swapchain is suboptimal. | ||
| 47 | bool IsSubOptimal() const { | ||
| 48 | return is_suboptimal; | ||
| 49 | } | ||
| 39 | 50 | ||
| 40 | VkExtent2D GetSize() const { | 51 | VkExtent2D GetSize() const { |
| 41 | return extent; | 52 | return extent; |
| @@ -61,10 +72,6 @@ public: | |||
| 61 | return image_format; | 72 | return image_format; |
| 62 | } | 73 | } |
| 63 | 74 | ||
| 64 | bool GetSrgbState() const { | ||
| 65 | return current_srgb; | ||
| 66 | } | ||
| 67 | |||
| 68 | private: | 75 | private: |
| 69 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, | 76 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, |
| 70 | bool srgb); | 77 | bool srgb); |
| @@ -92,9 +99,9 @@ private: | |||
| 92 | VkFormat image_format{}; | 99 | VkFormat image_format{}; |
| 93 | VkExtent2D extent{}; | 100 | VkExtent2D extent{}; |
| 94 | 101 | ||
| 95 | u32 current_width{}; | ||
| 96 | u32 current_height{}; | ||
| 97 | bool current_srgb{}; | 102 | bool current_srgb{}; |
| 103 | bool is_outdated{}; | ||
| 104 | bool is_suboptimal{}; | ||
| 98 | }; | 105 | }; |
| 99 | 106 | ||
| 100 | } // namespace Vulkan | 107 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 88ccf96f5..8e029bcb3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 15 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 17 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 17 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 18 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 19 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 20 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 20 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| @@ -34,19 +35,6 @@ using VideoCommon::SubresourceRange; | |||
| 34 | using VideoCore::Surface::IsPixelFormatASTC; | 35 | using VideoCore::Surface::IsPixelFormatASTC; |
| 35 | 36 | ||
| 36 | namespace { | 37 | namespace { |
| 37 | |||
| 38 | constexpr std::array ATTACHMENT_REFERENCES{ | ||
| 39 | VkAttachmentReference{0, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 40 | VkAttachmentReference{1, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 41 | VkAttachmentReference{2, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 42 | VkAttachmentReference{3, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 43 | VkAttachmentReference{4, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 44 | VkAttachmentReference{5, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 45 | VkAttachmentReference{6, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 46 | VkAttachmentReference{7, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 47 | VkAttachmentReference{8, VK_IMAGE_LAYOUT_GENERAL}, | ||
| 48 | }; | ||
| 49 | |||
| 50 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | 38 | constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { |
| 51 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | 39 | if (color == std::array<float, 4>{0, 0, 0, 0}) { |
| 52 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; | 40 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; |
| @@ -174,25 +162,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 174 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); | 162 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); |
| 175 | } | 163 | } |
| 176 | 164 | ||
| 177 | [[nodiscard]] vk::Buffer MakeBuffer(const Device& device, const ImageInfo& info) { | ||
| 178 | if (info.type != ImageType::Buffer) { | ||
| 179 | return vk::Buffer{}; | ||
| 180 | } | ||
| 181 | const size_t bytes_per_block = VideoCore::Surface::BytesPerBlock(info.format); | ||
| 182 | return device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 183 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 184 | .pNext = nullptr, | ||
| 185 | .flags = 0, | ||
| 186 | .size = info.size.width * bytes_per_block, | ||
| 187 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 188 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 189 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, | ||
| 190 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 191 | .queueFamilyIndexCount = 0, | ||
| 192 | .pQueueFamilyIndices = nullptr, | ||
| 193 | }); | ||
| 194 | } | ||
| 195 | |||
| 196 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { | 165 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { |
| 197 | switch (VideoCore::Surface::GetFormatType(format)) { | 166 | switch (VideoCore::Surface::GetFormatType(format)) { |
| 198 | case VideoCore::Surface::SurfaceType::ColorTexture: | 167 | case VideoCore::Surface::SurfaceType::ColorTexture: |
| @@ -226,23 +195,6 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 226 | } | 195 | } |
| 227 | } | 196 | } |
| 228 | 197 | ||
| 229 | [[nodiscard]] VkAttachmentDescription AttachmentDescription(const Device& device, | ||
| 230 | const ImageView* image_view) { | ||
| 231 | using MaxwellToVK::SurfaceFormat; | ||
| 232 | const PixelFormat pixel_format = image_view->format; | ||
| 233 | return VkAttachmentDescription{ | ||
| 234 | .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, | ||
| 235 | .format = SurfaceFormat(device, FormatType::Optimal, true, pixel_format).format, | ||
| 236 | .samples = image_view->Samples(), | ||
| 237 | .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 238 | .storeOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 239 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, | ||
| 240 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, | ||
| 241 | .initialLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 242 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 243 | }; | ||
| 244 | } | ||
| 245 | |||
| 246 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { | 198 | [[nodiscard]] VkComponentSwizzle ComponentSwizzle(SwizzleSource swizzle) { |
| 247 | switch (swizzle) { | 199 | switch (swizzle) { |
| 248 | case SwizzleSource::Zero: | 200 | case SwizzleSource::Zero: |
| @@ -263,6 +215,30 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 263 | return VK_COMPONENT_SWIZZLE_ZERO; | 215 | return VK_COMPONENT_SWIZZLE_ZERO; |
| 264 | } | 216 | } |
| 265 | 217 | ||
| 218 | [[nodiscard]] VkImageViewType ImageViewType(Shader::TextureType type) { | ||
| 219 | switch (type) { | ||
| 220 | case Shader::TextureType::Color1D: | ||
| 221 | return VK_IMAGE_VIEW_TYPE_1D; | ||
| 222 | case Shader::TextureType::Color2D: | ||
| 223 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 224 | case Shader::TextureType::ColorCube: | ||
| 225 | return VK_IMAGE_VIEW_TYPE_CUBE; | ||
| 226 | case Shader::TextureType::Color3D: | ||
| 227 | return VK_IMAGE_VIEW_TYPE_3D; | ||
| 228 | case Shader::TextureType::ColorArray1D: | ||
| 229 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; | ||
| 230 | case Shader::TextureType::ColorArray2D: | ||
| 231 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; | ||
| 232 | case Shader::TextureType::ColorArrayCube: | ||
| 233 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | ||
| 234 | case Shader::TextureType::Buffer: | ||
| 235 | UNREACHABLE_MSG("Texture buffers can't be image views"); | ||
| 236 | return VK_IMAGE_VIEW_TYPE_1D; | ||
| 237 | } | ||
| 238 | UNREACHABLE_MSG("Invalid image view type={}", type); | ||
| 239 | return VK_IMAGE_VIEW_TYPE_2D; | ||
| 240 | } | ||
| 241 | |||
| 266 | [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { | 242 | [[nodiscard]] VkImageViewType ImageViewType(VideoCommon::ImageViewType type) { |
| 267 | switch (type) { | 243 | switch (type) { |
| 268 | case VideoCommon::ImageViewType::e1D: | 244 | case VideoCommon::ImageViewType::e1D: |
| @@ -280,7 +256,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 280 | case VideoCommon::ImageViewType::CubeArray: | 256 | case VideoCommon::ImageViewType::CubeArray: |
| 281 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; | 257 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; |
| 282 | case VideoCommon::ImageViewType::Rect: | 258 | case VideoCommon::ImageViewType::Rect: |
| 283 | LOG_WARNING(Render_Vulkan, "Unnormalized image view type not supported"); | 259 | UNIMPLEMENTED_MSG("Rect image view"); |
| 284 | return VK_IMAGE_VIEW_TYPE_2D; | 260 | return VK_IMAGE_VIEW_TYPE_2D; |
| 285 | case VideoCommon::ImageViewType::Buffer: | 261 | case VideoCommon::ImageViewType::Buffer: |
| 286 | UNREACHABLE_MSG("Texture buffers can't be image views"); | 262 | UNREACHABLE_MSG("Texture buffers can't be image views"); |
| @@ -327,7 +303,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 327 | }; | 303 | }; |
| 328 | } | 304 | } |
| 329 | 305 | ||
| 330 | [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( | 306 | [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( |
| 331 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { | 307 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 332 | std::vector<VkBufferCopy> result(copies.size()); | 308 | std::vector<VkBufferCopy> result(copies.size()); |
| 333 | std::ranges::transform( | 309 | std::ranges::transform( |
| @@ -587,6 +563,28 @@ struct RangedBarrierRange { | |||
| 587 | } | 563 | } |
| 588 | }; | 564 | }; |
| 589 | 565 | ||
| 566 | [[nodiscard]] VkFormat Format(Shader::ImageFormat format) { | ||
| 567 | switch (format) { | ||
| 568 | case Shader::ImageFormat::Typeless: | ||
| 569 | break; | ||
| 570 | case Shader::ImageFormat::R8_SINT: | ||
| 571 | return VK_FORMAT_R8_SINT; | ||
| 572 | case Shader::ImageFormat::R8_UINT: | ||
| 573 | return VK_FORMAT_R8_UINT; | ||
| 574 | case Shader::ImageFormat::R16_UINT: | ||
| 575 | return VK_FORMAT_R16_UINT; | ||
| 576 | case Shader::ImageFormat::R16_SINT: | ||
| 577 | return VK_FORMAT_R16_SINT; | ||
| 578 | case Shader::ImageFormat::R32_UINT: | ||
| 579 | return VK_FORMAT_R32_UINT; | ||
| 580 | case Shader::ImageFormat::R32G32_UINT: | ||
| 581 | return VK_FORMAT_R32G32_UINT; | ||
| 582 | case Shader::ImageFormat::R32G32B32A32_UINT: | ||
| 583 | return VK_FORMAT_R32G32B32A32_UINT; | ||
| 584 | } | ||
| 585 | UNREACHABLE_MSG("Invalid image format={}", format); | ||
| 586 | return VK_FORMAT_R32_UINT; | ||
| 587 | } | ||
| 590 | } // Anonymous namespace | 588 | } // Anonymous namespace |
| 591 | 589 | ||
| 592 | void TextureCacheRuntime::Finish() { | 590 | void TextureCacheRuntime::Finish() { |
| @@ -625,7 +623,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst | |||
| 625 | return; | 623 | return; |
| 626 | } | 624 | } |
| 627 | } | 625 | } |
| 628 | ASSERT(src.ImageFormat() == dst.ImageFormat()); | 626 | ASSERT(src.format == dst.format); |
| 629 | ASSERT(!(is_dst_msaa && !is_src_msaa)); | 627 | ASSERT(!(is_dst_msaa && !is_src_msaa)); |
| 630 | ASSERT(operation == Fermi2D::Operation::SrcCopy); | 628 | ASSERT(operation == Fermi2D::Operation::SrcCopy); |
| 631 | 629 | ||
| @@ -842,13 +840,9 @@ u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | |||
| 842 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, | 840 | Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, |
| 843 | VAddr cpu_addr_) | 841 | VAddr cpu_addr_) |
| 844 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, | 842 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, |
| 845 | image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), | 843 | image(MakeImage(runtime.device, info)), |
| 844 | commit(runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal)), | ||
| 846 | aspect_mask(ImageAspectMask(info.format)) { | 845 | aspect_mask(ImageAspectMask(info.format)) { |
| 847 | if (image) { | ||
| 848 | commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal); | ||
| 849 | } else { | ||
| 850 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 851 | } | ||
| 852 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 846 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 853 | if (Settings::values.accelerate_astc.GetValue()) { | 847 | if (Settings::values.accelerate_astc.GetValue()) { |
| 854 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; | 848 | flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; |
| @@ -857,11 +851,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 857 | } | 851 | } |
| 858 | } | 852 | } |
| 859 | if (runtime.device.HasDebuggingToolAttached()) { | 853 | if (runtime.device.HasDebuggingToolAttached()) { |
| 860 | if (image) { | 854 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 861 | image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 862 | } else { | ||
| 863 | buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | ||
| 864 | } | ||
| 865 | } | 855 | } |
| 866 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ | 856 | static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{ |
| 867 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, | 857 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, |
| @@ -913,19 +903,6 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag | |||
| 913 | }); | 903 | }); |
| 914 | } | 904 | } |
| 915 | 905 | ||
| 916 | void Image::UploadMemory(const StagingBufferRef& map, | ||
| 917 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 918 | // TODO: Move this to another API | ||
| 919 | scheduler->RequestOutsideRenderPassOperationContext(); | ||
| 920 | std::vector vk_copies = TransformBufferCopies(copies, map.offset); | ||
| 921 | const VkBuffer src_buffer = map.buffer; | ||
| 922 | const VkBuffer dst_buffer = *buffer; | ||
| 923 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | ||
| 924 | // TODO: Barriers | ||
| 925 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); | ||
| 926 | }); | ||
| 927 | } | ||
| 928 | |||
| 929 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 906 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 930 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); | 907 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 931 | scheduler->RequestOutsideRenderPassOperationContext(); | 908 | scheduler->RequestOutsideRenderPassOperationContext(); |
| @@ -984,8 +961,9 @@ void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferIm | |||
| 984 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, | 961 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, |
| 985 | ImageId image_id_, Image& image) | 962 | ImageId image_id_, Image& image) |
| 986 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, | 963 | : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, |
| 987 | image_handle{image.Handle()}, image_format{image.info.format}, samples{ConvertSampleCount( | 964 | image_handle{image.Handle()}, samples{ConvertSampleCount(image.info.num_samples)} { |
| 988 | image.info.num_samples)} { | 965 | using Shader::TextureType; |
| 966 | |||
| 989 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); | 967 | const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); |
| 990 | std::array<SwizzleSource, 4> swizzle{ | 968 | std::array<SwizzleSource, 4> swizzle{ |
| 991 | SwizzleSource::R, | 969 | SwizzleSource::R, |
| @@ -1023,57 +1001,54 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 1023 | }, | 1001 | }, |
| 1024 | .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), | 1002 | .subresourceRange = MakeSubresourceRange(aspect_mask, info.range), |
| 1025 | }; | 1003 | }; |
| 1026 | const auto create = [&](VideoCommon::ImageViewType view_type, std::optional<u32> num_layers) { | 1004 | const auto create = [&](TextureType tex_type, std::optional<u32> num_layers) { |
| 1027 | VkImageViewCreateInfo ci{create_info}; | 1005 | VkImageViewCreateInfo ci{create_info}; |
| 1028 | ci.viewType = ImageViewType(view_type); | 1006 | ci.viewType = ImageViewType(tex_type); |
| 1029 | if (num_layers) { | 1007 | if (num_layers) { |
| 1030 | ci.subresourceRange.layerCount = *num_layers; | 1008 | ci.subresourceRange.layerCount = *num_layers; |
| 1031 | } | 1009 | } |
| 1032 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); | 1010 | vk::ImageView handle = device->GetLogical().CreateImageView(ci); |
| 1033 | if (device->HasDebuggingToolAttached()) { | 1011 | if (device->HasDebuggingToolAttached()) { |
| 1034 | handle.SetObjectNameEXT(VideoCommon::Name(*this, view_type).c_str()); | 1012 | handle.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 1035 | } | 1013 | } |
| 1036 | image_views[static_cast<size_t>(view_type)] = std::move(handle); | 1014 | image_views[static_cast<size_t>(tex_type)] = std::move(handle); |
| 1037 | }; | 1015 | }; |
| 1038 | switch (info.type) { | 1016 | switch (info.type) { |
| 1039 | case VideoCommon::ImageViewType::e1D: | 1017 | case VideoCommon::ImageViewType::e1D: |
| 1040 | case VideoCommon::ImageViewType::e1DArray: | 1018 | case VideoCommon::ImageViewType::e1DArray: |
| 1041 | create(VideoCommon::ImageViewType::e1D, 1); | 1019 | create(TextureType::Color1D, 1); |
| 1042 | create(VideoCommon::ImageViewType::e1DArray, std::nullopt); | 1020 | create(TextureType::ColorArray1D, std::nullopt); |
| 1043 | render_target = Handle(VideoCommon::ImageViewType::e1DArray); | 1021 | render_target = Handle(TextureType::ColorArray1D); |
| 1044 | break; | 1022 | break; |
| 1045 | case VideoCommon::ImageViewType::e2D: | 1023 | case VideoCommon::ImageViewType::e2D: |
| 1046 | case VideoCommon::ImageViewType::e2DArray: | 1024 | case VideoCommon::ImageViewType::e2DArray: |
| 1047 | create(VideoCommon::ImageViewType::e2D, 1); | 1025 | create(TextureType::Color2D, 1); |
| 1048 | create(VideoCommon::ImageViewType::e2DArray, std::nullopt); | 1026 | create(TextureType::ColorArray2D, std::nullopt); |
| 1049 | render_target = Handle(VideoCommon::ImageViewType::e2DArray); | 1027 | render_target = Handle(Shader::TextureType::ColorArray2D); |
| 1050 | break; | 1028 | break; |
| 1051 | case VideoCommon::ImageViewType::e3D: | 1029 | case VideoCommon::ImageViewType::e3D: |
| 1052 | create(VideoCommon::ImageViewType::e3D, std::nullopt); | 1030 | create(TextureType::Color3D, std::nullopt); |
| 1053 | render_target = Handle(VideoCommon::ImageViewType::e3D); | 1031 | render_target = Handle(Shader::TextureType::Color3D); |
| 1054 | break; | 1032 | break; |
| 1055 | case VideoCommon::ImageViewType::Cube: | 1033 | case VideoCommon::ImageViewType::Cube: |
| 1056 | case VideoCommon::ImageViewType::CubeArray: | 1034 | case VideoCommon::ImageViewType::CubeArray: |
| 1057 | create(VideoCommon::ImageViewType::Cube, 6); | 1035 | create(TextureType::ColorCube, 6); |
| 1058 | create(VideoCommon::ImageViewType::CubeArray, std::nullopt); | 1036 | create(TextureType::ColorArrayCube, std::nullopt); |
| 1059 | break; | 1037 | break; |
| 1060 | case VideoCommon::ImageViewType::Rect: | 1038 | case VideoCommon::ImageViewType::Rect: |
| 1061 | UNIMPLEMENTED(); | 1039 | UNIMPLEMENTED(); |
| 1062 | break; | 1040 | break; |
| 1063 | case VideoCommon::ImageViewType::Buffer: | 1041 | case VideoCommon::ImageViewType::Buffer: |
| 1064 | buffer_view = device->GetLogical().CreateBufferView(VkBufferViewCreateInfo{ | 1042 | UNREACHABLE(); |
| 1065 | .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, | ||
| 1066 | .pNext = nullptr, | ||
| 1067 | .flags = 0, | ||
| 1068 | .buffer = image.Buffer(), | ||
| 1069 | .format = format_info.format, | ||
| 1070 | .offset = 0, // TODO: Redesign buffer cache to support this | ||
| 1071 | .range = image.guest_size_bytes, | ||
| 1072 | }); | ||
| 1073 | break; | 1043 | break; |
| 1074 | } | 1044 | } |
| 1075 | } | 1045 | } |
| 1076 | 1046 | ||
| 1047 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||
| 1048 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | ||
| 1049 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | ||
| 1050 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | ||
| 1051 | |||
| 1077 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) | 1052 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams& params) |
| 1078 | : VideoCommon::ImageViewBase{params} {} | 1053 | : VideoCommon::ImageViewBase{params} {} |
| 1079 | 1054 | ||
| @@ -1081,7 +1056,8 @@ VkImageView ImageView::DepthView() { | |||
| 1081 | if (depth_view) { | 1056 | if (depth_view) { |
| 1082 | return *depth_view; | 1057 | return *depth_view; |
| 1083 | } | 1058 | } |
| 1084 | depth_view = MakeDepthStencilView(VK_IMAGE_ASPECT_DEPTH_BIT); | 1059 | const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); |
| 1060 | depth_view = MakeView(info.format, VK_IMAGE_ASPECT_DEPTH_BIT); | ||
| 1085 | return *depth_view; | 1061 | return *depth_view; |
| 1086 | } | 1062 | } |
| 1087 | 1063 | ||
| @@ -1089,18 +1065,38 @@ VkImageView ImageView::StencilView() { | |||
| 1089 | if (stencil_view) { | 1065 | if (stencil_view) { |
| 1090 | return *stencil_view; | 1066 | return *stencil_view; |
| 1091 | } | 1067 | } |
| 1092 | stencil_view = MakeDepthStencilView(VK_IMAGE_ASPECT_STENCIL_BIT); | 1068 | const auto& info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); |
| 1069 | stencil_view = MakeView(info.format, VK_IMAGE_ASPECT_STENCIL_BIT); | ||
| 1093 | return *stencil_view; | 1070 | return *stencil_view; |
| 1094 | } | 1071 | } |
| 1095 | 1072 | ||
| 1096 | vk::ImageView ImageView::MakeDepthStencilView(VkImageAspectFlags aspect_mask) { | 1073 | VkImageView ImageView::StorageView(Shader::TextureType texture_type, |
| 1074 | Shader::ImageFormat image_format) { | ||
| 1075 | if (image_format == Shader::ImageFormat::Typeless) { | ||
| 1076 | return Handle(texture_type); | ||
| 1077 | } | ||
| 1078 | const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || | ||
| 1079 | image_format == Shader::ImageFormat::R16_SINT}; | ||
| 1080 | if (!storage_views) { | ||
| 1081 | storage_views = std::make_unique<StorageViews>(); | ||
| 1082 | } | ||
| 1083 | auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; | ||
| 1084 | auto& view{views[static_cast<size_t>(texture_type)]}; | ||
| 1085 | if (view) { | ||
| 1086 | return *view; | ||
| 1087 | } | ||
| 1088 | view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); | ||
| 1089 | return *view; | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { | ||
| 1097 | return device->GetLogical().CreateImageView({ | 1093 | return device->GetLogical().CreateImageView({ |
| 1098 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 1094 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 1099 | .pNext = nullptr, | 1095 | .pNext = nullptr, |
| 1100 | .flags = 0, | 1096 | .flags = 0, |
| 1101 | .image = image_handle, | 1097 | .image = image_handle, |
| 1102 | .viewType = ImageViewType(type), | 1098 | .viewType = ImageViewType(type), |
| 1103 | .format = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format).format, | 1099 | .format = vk_format, |
| 1104 | .components{ | 1100 | .components{ |
| 1105 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | 1101 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 1106 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | 1102 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, |
| @@ -1164,7 +1160,6 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t | |||
| 1164 | 1160 | ||
| 1165 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, | 1161 | Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers, |
| 1166 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { | 1162 | ImageView* depth_buffer, const VideoCommon::RenderTargets& key) { |
| 1167 | std::vector<VkAttachmentDescription> descriptions; | ||
| 1168 | std::vector<VkImageView> attachments; | 1163 | std::vector<VkImageView> attachments; |
| 1169 | RenderPassKey renderpass_key{}; | 1164 | RenderPassKey renderpass_key{}; |
| 1170 | s32 num_layers = 1; | 1165 | s32 num_layers = 1; |
| @@ -1175,7 +1170,6 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1175 | renderpass_key.color_formats[index] = PixelFormat::Invalid; | 1170 | renderpass_key.color_formats[index] = PixelFormat::Invalid; |
| 1176 | continue; | 1171 | continue; |
| 1177 | } | 1172 | } |
| 1178 | descriptions.push_back(AttachmentDescription(runtime.device, color_buffer)); | ||
| 1179 | attachments.push_back(color_buffer->RenderTarget()); | 1173 | attachments.push_back(color_buffer->RenderTarget()); |
| 1180 | renderpass_key.color_formats[index] = color_buffer->format; | 1174 | renderpass_key.color_formats[index] = color_buffer->format; |
| 1181 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); | 1175 | num_layers = std::max(num_layers, color_buffer->range.extent.layers); |
| @@ -1185,10 +1179,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1185 | ++num_images; | 1179 | ++num_images; |
| 1186 | } | 1180 | } |
| 1187 | const size_t num_colors = attachments.size(); | 1181 | const size_t num_colors = attachments.size(); |
| 1188 | const VkAttachmentReference* depth_attachment = | ||
| 1189 | depth_buffer ? &ATTACHMENT_REFERENCES[num_colors] : nullptr; | ||
| 1190 | if (depth_buffer) { | 1182 | if (depth_buffer) { |
| 1191 | descriptions.push_back(AttachmentDescription(runtime.device, depth_buffer)); | ||
| 1192 | attachments.push_back(depth_buffer->RenderTarget()); | 1183 | attachments.push_back(depth_buffer->RenderTarget()); |
| 1193 | renderpass_key.depth_format = depth_buffer->format; | 1184 | renderpass_key.depth_format = depth_buffer->format; |
| 1194 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); | 1185 | num_layers = std::max(num_layers, depth_buffer->range.extent.layers); |
| @@ -1201,40 +1192,14 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1201 | } | 1192 | } |
| 1202 | renderpass_key.samples = samples; | 1193 | renderpass_key.samples = samples; |
| 1203 | 1194 | ||
| 1204 | const auto& device = runtime.device.GetLogical(); | 1195 | renderpass = runtime.render_pass_cache.Get(renderpass_key); |
| 1205 | const auto [cache_pair, is_new] = runtime.renderpass_cache.try_emplace(renderpass_key); | 1196 | |
| 1206 | if (is_new) { | ||
| 1207 | const VkSubpassDescription subpass{ | ||
| 1208 | .flags = 0, | ||
| 1209 | .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||
| 1210 | .inputAttachmentCount = 0, | ||
| 1211 | .pInputAttachments = nullptr, | ||
| 1212 | .colorAttachmentCount = static_cast<u32>(num_colors), | ||
| 1213 | .pColorAttachments = num_colors != 0 ? ATTACHMENT_REFERENCES.data() : nullptr, | ||
| 1214 | .pResolveAttachments = nullptr, | ||
| 1215 | .pDepthStencilAttachment = depth_attachment, | ||
| 1216 | .preserveAttachmentCount = 0, | ||
| 1217 | .pPreserveAttachments = nullptr, | ||
| 1218 | }; | ||
| 1219 | cache_pair->second = device.CreateRenderPass(VkRenderPassCreateInfo{ | ||
| 1220 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, | ||
| 1221 | .pNext = nullptr, | ||
| 1222 | .flags = 0, | ||
| 1223 | .attachmentCount = static_cast<u32>(descriptions.size()), | ||
| 1224 | .pAttachments = descriptions.data(), | ||
| 1225 | .subpassCount = 1, | ||
| 1226 | .pSubpasses = &subpass, | ||
| 1227 | .dependencyCount = 0, | ||
| 1228 | .pDependencies = nullptr, | ||
| 1229 | }); | ||
| 1230 | } | ||
| 1231 | renderpass = *cache_pair->second; | ||
| 1232 | render_area = VkExtent2D{ | 1197 | render_area = VkExtent2D{ |
| 1233 | .width = key.size.width, | 1198 | .width = key.size.width, |
| 1234 | .height = key.size.height, | 1199 | .height = key.size.height, |
| 1235 | }; | 1200 | }; |
| 1236 | num_color_buffers = static_cast<u32>(num_colors); | 1201 | num_color_buffers = static_cast<u32>(num_colors); |
| 1237 | framebuffer = device.CreateFramebuffer(VkFramebufferCreateInfo{ | 1202 | framebuffer = runtime.device.GetLogical().CreateFramebuffer({ |
| 1238 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | 1203 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, |
| 1239 | .pNext = nullptr, | 1204 | .pNext = nullptr, |
| 1240 | .flags = 0, | 1205 | .flags = 0, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 172bcdf98..0b73d55f8 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <compare> | 7 | #include <compare> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "shader_recompiler/shader_info.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 11 | #include "video_core/texture_cache/texture_cache.h" | 12 | #include "video_core/texture_cache/texture_cache.h" |
| 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| @@ -26,35 +27,10 @@ class Device; | |||
| 26 | class Image; | 27 | class Image; |
| 27 | class ImageView; | 28 | class ImageView; |
| 28 | class Framebuffer; | 29 | class Framebuffer; |
| 30 | class RenderPassCache; | ||
| 29 | class StagingBufferPool; | 31 | class StagingBufferPool; |
| 30 | class VKScheduler; | 32 | class VKScheduler; |
| 31 | 33 | ||
| 32 | struct RenderPassKey { | ||
| 33 | constexpr auto operator<=>(const RenderPassKey&) const noexcept = default; | ||
| 34 | |||
| 35 | std::array<PixelFormat, NUM_RT> color_formats; | ||
| 36 | PixelFormat depth_format; | ||
| 37 | VkSampleCountFlagBits samples; | ||
| 38 | }; | ||
| 39 | |||
| 40 | } // namespace Vulkan | ||
| 41 | |||
| 42 | namespace std { | ||
| 43 | template <> | ||
| 44 | struct hash<Vulkan::RenderPassKey> { | ||
| 45 | [[nodiscard]] constexpr size_t operator()(const Vulkan::RenderPassKey& key) const noexcept { | ||
| 46 | size_t value = static_cast<size_t>(key.depth_format) << 48; | ||
| 47 | value ^= static_cast<size_t>(key.samples) << 52; | ||
| 48 | for (size_t i = 0; i < key.color_formats.size(); ++i) { | ||
| 49 | value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6); | ||
| 50 | } | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | } // namespace std | ||
| 55 | |||
| 56 | namespace Vulkan { | ||
| 57 | |||
| 58 | struct TextureCacheRuntime { | 34 | struct TextureCacheRuntime { |
| 59 | const Device& device; | 35 | const Device& device; |
| 60 | VKScheduler& scheduler; | 36 | VKScheduler& scheduler; |
| @@ -62,13 +38,13 @@ struct TextureCacheRuntime { | |||
| 62 | StagingBufferPool& staging_buffer_pool; | 38 | StagingBufferPool& staging_buffer_pool; |
| 63 | BlitImageHelper& blit_image_helper; | 39 | BlitImageHelper& blit_image_helper; |
| 64 | ASTCDecoderPass& astc_decoder_pass; | 40 | ASTCDecoderPass& astc_decoder_pass; |
| 65 | std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; | 41 | RenderPassCache& render_pass_cache; |
| 66 | 42 | ||
| 67 | void Finish(); | 43 | void Finish(); |
| 68 | 44 | ||
| 69 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 45 | StagingBufferRef UploadStagingBuffer(size_t size); |
| 70 | 46 | ||
| 71 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 47 | StagingBufferRef DownloadStagingBuffer(size_t size); |
| 72 | 48 | ||
| 73 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 49 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 74 | const Region2D& dst_region, const Region2D& src_region, | 50 | const Region2D& dst_region, const Region2D& src_region, |
| @@ -79,7 +55,7 @@ struct TextureCacheRuntime { | |||
| 79 | 55 | ||
| 80 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); | 56 | void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view); |
| 81 | 57 | ||
| 82 | [[nodiscard]] bool CanAccelerateImageUpload(Image&) const noexcept { | 58 | bool CanAccelerateImageUpload(Image&) const noexcept { |
| 83 | return false; | 59 | return false; |
| 84 | } | 60 | } |
| 85 | 61 | ||
| @@ -117,8 +93,6 @@ public: | |||
| 117 | void UploadMemory(const StagingBufferRef& map, | 93 | void UploadMemory(const StagingBufferRef& map, |
| 118 | std::span<const VideoCommon::BufferImageCopy> copies); | 94 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 119 | 95 | ||
| 120 | void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); | ||
| 121 | |||
| 122 | void DownloadMemory(const StagingBufferRef& map, | 96 | void DownloadMemory(const StagingBufferRef& map, |
| 123 | std::span<const VideoCommon::BufferImageCopy> copies); | 97 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 124 | 98 | ||
| @@ -126,10 +100,6 @@ public: | |||
| 126 | return *image; | 100 | return *image; |
| 127 | } | 101 | } |
| 128 | 102 | ||
| 129 | [[nodiscard]] VkBuffer Buffer() const noexcept { | ||
| 130 | return *buffer; | ||
| 131 | } | ||
| 132 | |||
| 133 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { | 103 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { |
| 134 | return aspect_mask; | 104 | return aspect_mask; |
| 135 | } | 105 | } |
| @@ -146,7 +116,6 @@ public: | |||
| 146 | private: | 116 | private: |
| 147 | VKScheduler* scheduler; | 117 | VKScheduler* scheduler; |
| 148 | vk::Image image; | 118 | vk::Image image; |
| 149 | vk::Buffer buffer; | ||
| 150 | MemoryCommit commit; | 119 | MemoryCommit commit; |
| 151 | vk::ImageView image_view; | 120 | vk::ImageView image_view; |
| 152 | std::vector<vk::ImageView> storage_image_views; | 121 | std::vector<vk::ImageView> storage_image_views; |
| @@ -157,18 +126,19 @@ private: | |||
| 157 | class ImageView : public VideoCommon::ImageViewBase { | 126 | class ImageView : public VideoCommon::ImageViewBase { |
| 158 | public: | 127 | public: |
| 159 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | 128 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 129 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, | ||
| 130 | const VideoCommon::ImageViewInfo&, GPUVAddr); | ||
| 160 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | 131 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); |
| 161 | 132 | ||
| 162 | [[nodiscard]] VkImageView DepthView(); | 133 | [[nodiscard]] VkImageView DepthView(); |
| 163 | 134 | ||
| 164 | [[nodiscard]] VkImageView StencilView(); | 135 | [[nodiscard]] VkImageView StencilView(); |
| 165 | 136 | ||
| 166 | [[nodiscard]] VkImageView Handle(VideoCommon::ImageViewType query_type) const noexcept { | 137 | [[nodiscard]] VkImageView StorageView(Shader::TextureType texture_type, |
| 167 | return *image_views[static_cast<size_t>(query_type)]; | 138 | Shader::ImageFormat image_format); |
| 168 | } | ||
| 169 | 139 | ||
| 170 | [[nodiscard]] VkBufferView BufferView() const noexcept { | 140 | [[nodiscard]] VkImageView Handle(Shader::TextureType texture_type) const noexcept { |
| 171 | return *buffer_view; | 141 | return *image_views[static_cast<size_t>(texture_type)]; |
| 172 | } | 142 | } |
| 173 | 143 | ||
| 174 | [[nodiscard]] VkImage ImageHandle() const noexcept { | 144 | [[nodiscard]] VkImage ImageHandle() const noexcept { |
| @@ -179,26 +149,36 @@ public: | |||
| 179 | return render_target; | 149 | return render_target; |
| 180 | } | 150 | } |
| 181 | 151 | ||
| 182 | [[nodiscard]] PixelFormat ImageFormat() const noexcept { | ||
| 183 | return image_format; | ||
| 184 | } | ||
| 185 | |||
| 186 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { | 152 | [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { |
| 187 | return samples; | 153 | return samples; |
| 188 | } | 154 | } |
| 189 | 155 | ||
| 156 | [[nodiscard]] GPUVAddr GpuAddr() const noexcept { | ||
| 157 | return gpu_addr; | ||
| 158 | } | ||
| 159 | |||
| 160 | [[nodiscard]] u32 BufferSize() const noexcept { | ||
| 161 | return buffer_size; | ||
| 162 | } | ||
| 163 | |||
| 190 | private: | 164 | private: |
| 191 | [[nodiscard]] vk::ImageView MakeDepthStencilView(VkImageAspectFlags aspect_mask); | 165 | struct StorageViews { |
| 166 | std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> signeds; | ||
| 167 | std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> unsigneds; | ||
| 168 | }; | ||
| 169 | |||
| 170 | [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); | ||
| 192 | 171 | ||
| 193 | const Device* device = nullptr; | 172 | const Device* device = nullptr; |
| 194 | std::array<vk::ImageView, VideoCommon::NUM_IMAGE_VIEW_TYPES> image_views; | 173 | std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views; |
| 174 | std::unique_ptr<StorageViews> storage_views; | ||
| 195 | vk::ImageView depth_view; | 175 | vk::ImageView depth_view; |
| 196 | vk::ImageView stencil_view; | 176 | vk::ImageView stencil_view; |
| 197 | vk::BufferView buffer_view; | ||
| 198 | VkImage image_handle = VK_NULL_HANDLE; | 177 | VkImage image_handle = VK_NULL_HANDLE; |
| 199 | VkImageView render_target = VK_NULL_HANDLE; | 178 | VkImageView render_target = VK_NULL_HANDLE; |
| 200 | PixelFormat image_format = PixelFormat::Invalid; | ||
| 201 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; | 179 | VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; |
| 180 | GPUVAddr gpu_addr = 0; | ||
| 181 | u32 buffer_size = 0; | ||
| 202 | }; | 182 | }; |
| 203 | 183 | ||
| 204 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; | 184 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index dc45fdcb1..0df3a7fe9 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -15,7 +15,9 @@ | |||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) | 17 | VKUpdateDescriptorQueue::VKUpdateDescriptorQueue(const Device& device_, VKScheduler& scheduler_) |
| 18 | : device{device_}, scheduler{scheduler_} {} | 18 | : device{device_}, scheduler{scheduler_} { |
| 19 | payload_cursor = payload.data(); | ||
| 20 | } | ||
| 19 | 21 | ||
| 20 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; | 22 | VKUpdateDescriptorQueue::~VKUpdateDescriptorQueue() = default; |
| 21 | 23 | ||
| @@ -36,13 +38,4 @@ void VKUpdateDescriptorQueue::Acquire() { | |||
| 36 | upload_start = payload_cursor; | 38 | upload_start = payload_cursor; |
| 37 | } | 39 | } |
| 38 | 40 | ||
| 39 | void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, | ||
| 40 | VkDescriptorSet set) { | ||
| 41 | const void* const data = upload_start; | ||
| 42 | const vk::Device* const logical = &device.GetLogical(); | ||
| 43 | scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { | ||
| 44 | logical->UpdateDescriptorSet(set, update_template, data); | ||
| 45 | }); | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Vulkan | 41 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index d35e77c44..d7de4c490 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -39,7 +39,9 @@ public: | |||
| 39 | 39 | ||
| 40 | void Acquire(); | 40 | void Acquire(); |
| 41 | 41 | ||
| 42 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); | 42 | const DescriptorUpdateEntry* UpdateData() const noexcept { |
| 43 | return upload_start; | ||
| 44 | } | ||
| 43 | 45 | ||
| 44 | void AddSampledImage(VkImageView image_view, VkSampler sampler) { | 46 | void AddSampledImage(VkImageView image_view, VkSampler sampler) { |
| 45 | *(payload_cursor++) = VkDescriptorImageInfo{ | 47 | *(payload_cursor++) = VkDescriptorImageInfo{ |