diff options
| author | 2021-02-13 16:08:50 -0500 | |
|---|---|---|
| committer | 2021-03-13 12:16:03 -0500 | |
| commit | 2f30c105849c214345e2201f4bd6f9b4b76ab4a1 (patch) | |
| tree | 5e5889a44af4194fcf22d1375bdf9f91b5302dc1 /src/video_core/renderer_vulkan | |
| parent | astc_decoder: Fix out of bounds memory access (diff) | |
| download | yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.gz yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.tar.xz yuzu-2f30c105849c214345e2201f4bd6f9b4b76ab4a1.zip | |
astc_decoder: Reimplement Layers
Reimplements the approach to decoding layers in the compute shader. Fixes multilayer astc decoding when using Vulkan.
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 158 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 46 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 13 |
3 files changed, 122 insertions, 95 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7587ab1e0..a0050b68f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -31,6 +31,7 @@ namespace Vulkan { | |||
| 31 | 31 | ||
| 32 | using Tegra::Texture::SWIZZLE_TABLE; | 32 | using Tegra::Texture::SWIZZLE_TABLE; |
| 33 | using Tegra::Texture::ASTC::EncodingsValues; | 33 | using Tegra::Texture::ASTC::EncodingsValues; |
| 34 | using namespace Tegra::Texture::ASTC; | ||
| 34 | 35 | ||
| 35 | namespace { | 36 | namespace { |
| 36 | 37 | ||
| @@ -214,7 +215,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 214 | struct AstcPushConstants { | 215 | struct AstcPushConstants { |
| 215 | std::array<u32, 2> num_image_blocks; | 216 | std::array<u32, 2> num_image_blocks; |
| 216 | std::array<u32, 2> blocks_dims; | 217 | std::array<u32, 2> blocks_dims; |
| 217 | u32 layer; | ||
| 218 | VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; | 218 | VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; |
| 219 | }; | 219 | }; |
| 220 | 220 | ||
| @@ -226,6 +226,7 @@ struct AstcBufferData { | |||
| 226 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; | 226 | decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; |
| 227 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; | 227 | decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE; |
| 228 | } constexpr ASTC_BUFFER_DATA; | 228 | } constexpr ASTC_BUFFER_DATA; |
| 229 | |||
| 229 | } // Anonymous namespace | 230 | } // Anonymous namespace |
| 230 | 231 | ||
| 231 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, | 232 | VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, |
| @@ -403,7 +404,6 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | |||
| 403 | return {staging.buffer, staging.offset}; | 404 | return {staging.buffer, staging.offset}; |
| 404 | } | 405 | } |
| 405 | 406 | ||
| 406 | using namespace Tegra::Texture::ASTC; | ||
| 407 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | 407 | ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, |
| 408 | VKDescriptorPool& descriptor_pool_, | 408 | VKDescriptorPool& descriptor_pool_, |
| 409 | StagingBufferPool& staging_buffer_pool_, | 409 | StagingBufferPool& staging_buffer_pool_, |
| @@ -464,76 +464,94 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 464 | if (!data_buffer) { | 464 | if (!data_buffer) { |
| 465 | MakeDataBuffer(); | 465 | MakeDataBuffer(); |
| 466 | } | 466 | } |
| 467 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | ||
| 468 | const VkImage vk_image = image.Handle(); | ||
| 469 | const bool is_initialized = image.ExchangeInitialization(); | ||
| 470 | scheduler.Record([vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { | ||
| 471 | const VkImageMemoryBarrier image_barrier{ | ||
| 472 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 473 | .pNext = nullptr, | ||
| 474 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 475 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 476 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 477 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 478 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 479 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 480 | .image = vk_image, | ||
| 481 | .subresourceRange{ | ||
| 482 | .aspectMask = aspect_mask, | ||
| 483 | .baseMipLevel = 0, | ||
| 484 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 485 | .baseArrayLayer = 0, | ||
| 486 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 487 | }, | ||
| 488 | }; | ||
| 489 | cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||
| 490 | }); | ||
| 467 | const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; | 491 | const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; |
| 468 | for (s32 layer = 0; layer < image.info.resources.layers; layer++) { | 492 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { |
| 469 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | 493 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 470 | const size_t input_offset = swizzle.buffer_offset + map.offset; | 494 | const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); |
| 471 | const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | 495 | const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); |
| 472 | const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | 496 | const u32 num_dispatches_z = image.info.resources.layers; |
| 473 | const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; | 497 | const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; |
| 474 | const u32 layer_image_size = | 498 | const u32 layer_image_size = |
| 475 | image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); | 499 | image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); |
| 476 | 500 | ||
| 477 | update_descriptor_queue.Acquire(); | 501 | update_descriptor_queue.Acquire(); |
| 478 | update_descriptor_queue.AddBuffer(*data_buffer, | 502 | update_descriptor_queue.AddBuffer(*data_buffer, |
| 479 | offsetof(AstcBufferData, swizzle_table_buffer), | 503 | offsetof(AstcBufferData, swizzle_table_buffer), |
| 480 | sizeof(AstcBufferData::swizzle_table_buffer)); | 504 | sizeof(AstcBufferData::swizzle_table_buffer)); |
| 481 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes); | 505 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, layer_image_size); |
| 482 | update_descriptor_queue.AddBuffer(*data_buffer, | 506 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), |
| 483 | offsetof(AstcBufferData, encoding_values), | 507 | sizeof(AstcBufferData::encoding_values)); |
| 484 | sizeof(AstcBufferData::encoding_values)); | 508 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), |
| 485 | update_descriptor_queue.AddBuffer(*data_buffer, | 509 | sizeof(AstcBufferData::replicate_6_to_8)); |
| 486 | offsetof(AstcBufferData, replicate_6_to_8), | 510 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8), |
| 487 | sizeof(AstcBufferData::replicate_6_to_8)); | 511 | sizeof(AstcBufferData::replicate_7_to_8)); |
| 488 | update_descriptor_queue.AddBuffer(*data_buffer, | 512 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), |
| 489 | offsetof(AstcBufferData, replicate_7_to_8), | 513 | sizeof(AstcBufferData::replicate_8_to_8)); |
| 490 | sizeof(AstcBufferData::replicate_7_to_8)); | 514 | update_descriptor_queue.AddBuffer(*data_buffer, |
| 491 | update_descriptor_queue.AddBuffer(*data_buffer, | 515 | offsetof(AstcBufferData, replicate_byte_to_16), |
| 492 | offsetof(AstcBufferData, replicate_8_to_8), | 516 | sizeof(AstcBufferData::replicate_byte_to_16)); |
| 493 | sizeof(AstcBufferData::replicate_8_to_8)); | 517 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 494 | update_descriptor_queue.AddBuffer(*data_buffer, | 518 | |
| 495 | offsetof(AstcBufferData, replicate_byte_to_16), | 519 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 496 | sizeof(AstcBufferData::replicate_byte_to_16)); | 520 | const VkPipelineLayout vk_layout = *layout; |
| 497 | update_descriptor_queue.AddImage(image.StorageImageView()); | 521 | const VkPipeline vk_pipeline = *pipeline; |
| 498 | 522 | // To unswizzle the ASTC data | |
| 499 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 523 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 500 | // To unswizzle the ASTC data | 524 | scheduler.Record([vk_layout, vk_pipeline, buffer = map.buffer, num_dispatches_x, |
| 501 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 525 | num_dispatches_y, num_dispatches_z, num_image_blocks, block_dims, params, |
| 502 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = map.buffer, | 526 | set, input_offset](vk::CommandBuffer cmdbuf) { |
| 503 | num_dispatches_x, num_dispatches_y, layer_image_size, | 527 | const AstcPushConstants uniforms{num_image_blocks, block_dims, params}; |
| 504 | num_image_blocks, block_dims, layer, params, set, | 528 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); |
| 505 | image = image.Handle(), input_offset, | 529 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); |
| 506 | aspect_mask = image.AspectMask()](vk::CommandBuffer cmdbuf) { | 530 | cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); |
| 507 | const AstcPushConstants uniforms{num_image_blocks, block_dims, layer, params}; | 531 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| 508 | 532 | }); | |
| 509 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 510 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 511 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | ||
| 512 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, 1); | ||
| 513 | |||
| 514 | const VkImageMemoryBarrier image_barrier{ | ||
| 515 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 516 | .pNext = nullptr, | ||
| 517 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 518 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, | ||
| 519 | .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 520 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 521 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 522 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 523 | .image = image, | ||
| 524 | .subresourceRange{ | ||
| 525 | .aspectMask = aspect_mask, | ||
| 526 | .baseMipLevel = 0, | ||
| 527 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 528 | .baseArrayLayer = 0, | ||
| 529 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 530 | }, | ||
| 531 | }; | ||
| 532 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 533 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); | ||
| 534 | }); | ||
| 535 | } | ||
| 536 | } | 533 | } |
| 534 | scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) { | ||
| 535 | const VkImageMemoryBarrier image_barrier{ | ||
| 536 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 537 | .pNext = nullptr, | ||
| 538 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 539 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | ||
| 540 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 541 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 542 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 543 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 544 | .image = vk_image, | ||
| 545 | .subresourceRange{ | ||
| 546 | .aspectMask = aspect_mask, | ||
| 547 | .baseMipLevel = 0, | ||
| 548 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 549 | .baseArrayLayer = 0, | ||
| 550 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 551 | }, | ||
| 552 | }; | ||
| 553 | cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | ||
| 554 | }); | ||
| 537 | } | 555 | } |
| 538 | 556 | ||
| 539 | } // namespace Vulkan | 557 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f7f744587..18155e449 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -823,27 +823,31 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 823 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, | 823 | .usage = VK_IMAGE_USAGE_STORAGE_BIT, |
| 824 | }; | 824 | }; |
| 825 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { | 825 | if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { |
| 826 | storage_image_view = runtime.device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | 826 | const auto& device = runtime.device.GetLogical(); |
| 827 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 827 | storage_image_views.reserve(info.resources.levels); |
| 828 | .pNext = &storage_image_view_usage_create_info, | 828 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 829 | .flags = 0, | 829 | storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{ |
| 830 | .image = *image, | 830 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 831 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | 831 | .pNext = &storage_image_view_usage_create_info, |
| 832 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, | 832 | .flags = 0, |
| 833 | .components{ | 833 | .image = *image, |
| 834 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | 834 | .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, |
| 835 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | 835 | .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32, |
| 836 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | 836 | .components{ |
| 837 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | 837 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 838 | }, | 838 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 839 | .subresourceRange{ | 839 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 840 | .aspectMask = aspect_mask, | 840 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, |
| 841 | .baseMipLevel = 0, | 841 | }, |
| 842 | .levelCount = VK_REMAINING_MIP_LEVELS, | 842 | .subresourceRange{ |
| 843 | .baseArrayLayer = 0, | 843 | .aspectMask = aspect_mask, |
| 844 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 844 | .baseMipLevel = static_cast<u32>(level), |
| 845 | }, | 845 | .levelCount = 1, |
| 846 | }); | 846 | .baseArrayLayer = 0, |
| 847 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 848 | }, | ||
| 849 | })); | ||
| 850 | } | ||
| 847 | } | 851 | } |
| 848 | } | 852 | } |
| 849 | 853 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 51705eccb..628785d5e 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -121,12 +121,17 @@ public: | |||
| 121 | return *buffer; | 121 | return *buffer; |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { | 124 | [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept { |
| 125 | return aspect_mask; | 125 | return aspect_mask; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | [[nodiscard]] VkImageView StorageImageView() const noexcept { | 128 | [[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept { |
| 129 | return *storage_image_view; | 129 | return *storage_image_views[level]; |
| 130 | } | ||
| 131 | |||
| 132 | /// Returns true when the image is already initialized and mark it as initialized | ||
| 133 | [[nodiscard]] bool ExchangeInitialization() noexcept { | ||
| 134 | return std::exchange(initialized, true); | ||
| 130 | } | 135 | } |
| 131 | 136 | ||
| 132 | private: | 137 | private: |
| @@ -135,7 +140,7 @@ private: | |||
| 135 | vk::Buffer buffer; | 140 | vk::Buffer buffer; |
| 136 | MemoryCommit commit; | 141 | MemoryCommit commit; |
| 137 | vk::ImageView image_view; | 142 | vk::ImageView image_view; |
| 138 | vk::ImageView storage_image_view; | 143 | std::vector<vk::ImageView> storage_image_views; |
| 139 | VkImageAspectFlags aspect_mask = 0; | 144 | VkImageAspectFlags aspect_mask = 0; |
| 140 | bool initialized = false; | 145 | bool initialized = false; |
| 141 | }; | 146 | }; |