diff options
| author | 2021-03-25 16:53:51 -0400 | |
|---|---|---|
| committer | 2021-03-25 16:53:51 -0400 | |
| commit | 2f83d9a61bca42d9ef24074beb2b11b19bd4cecd (patch) | |
| tree | 514e40eb750280c2e3025f9301befb6f8c9b46e9 /src/video_core/renderer_vulkan | |
| parent | astc_decoder: Reimplement Layers (diff) | |
| download | yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.tar.gz yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.tar.xz yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.zip | |
astc_decoder: Refactor for style and more efficient memory use
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 175 |
1 files changed, 96 insertions, 79 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index a0050b68f..e11406e58 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -35,13 +35,13 @@ using namespace Tegra::Texture::ASTC; | |||
| 35 | 35 | ||
| 36 | namespace { | 36 | namespace { |
| 37 | 37 | ||
| 38 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 0; | 38 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; |
| 39 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 1; | 39 | constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; |
| 40 | constexpr u32 ASTC_BINDING_ENC_BUFFER = 2; | 40 | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; |
| 41 | constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 3; | 41 | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; |
| 42 | constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 4; | 42 | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; |
| 43 | constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 5; | 43 | constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; |
| 44 | constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 6; | 44 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; |
| 45 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; | 45 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; |
| 46 | 46 | ||
| 47 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 47 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| @@ -74,56 +74,56 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding | |||
| 74 | std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { | 74 | std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { |
| 75 | return {{ | 75 | return {{ |
| 76 | { | 76 | { |
| 77 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, // Swizzle buffer | 77 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| 78 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 78 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 79 | .descriptorCount = 1, | 79 | .descriptorCount = 1, |
| 80 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 80 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 81 | .pImmutableSamplers = nullptr, | 81 | .pImmutableSamplers = nullptr, |
| 82 | }, | 82 | }, |
| 83 | { | 83 | { |
| 84 | .binding = ASTC_BINDING_INPUT_BUFFER, // ASTC Img data buffer | 84 | .binding = ASTC_BINDING_ENC_BUFFER, |
| 85 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 85 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 86 | .descriptorCount = 1, | 86 | .descriptorCount = 1, |
| 87 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 87 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 88 | .pImmutableSamplers = nullptr, | 88 | .pImmutableSamplers = nullptr, |
| 89 | }, | 89 | }, |
| 90 | { | 90 | { |
| 91 | .binding = ASTC_BINDING_ENC_BUFFER, // Encodings buffer | 91 | .binding = ASTC_BINDING_6_TO_8_BUFFER, |
| 92 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 92 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 93 | .descriptorCount = 1, | 93 | .descriptorCount = 1, |
| 94 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 94 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 95 | .pImmutableSamplers = nullptr, | 95 | .pImmutableSamplers = nullptr, |
| 96 | }, | 96 | }, |
| 97 | { | 97 | { |
| 98 | .binding = ASTC_BINDING_6_TO_8_BUFFER, // BINDING_6_TO_8_BUFFER | 98 | .binding = ASTC_BINDING_7_TO_8_BUFFER, |
| 99 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 99 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 100 | .descriptorCount = 1, | 100 | .descriptorCount = 1, |
| 101 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 101 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 102 | .pImmutableSamplers = nullptr, | 102 | .pImmutableSamplers = nullptr, |
| 103 | }, | 103 | }, |
| 104 | { | 104 | { |
| 105 | .binding = ASTC_BINDING_7_TO_8_BUFFER, // BINDING_7_TO_8_BUFFER | 105 | .binding = ASTC_BINDING_8_TO_8_BUFFER, |
| 106 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 106 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 107 | .descriptorCount = 1, | 107 | .descriptorCount = 1, |
| 108 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 108 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 109 | .pImmutableSamplers = nullptr, | 109 | .pImmutableSamplers = nullptr, |
| 110 | }, | 110 | }, |
| 111 | { | 111 | { |
| 112 | .binding = ASTC_BINDING_8_TO_8_BUFFER, // BINDING_8_TO_8_BUFFER | 112 | .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, |
| 113 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 113 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 114 | .descriptorCount = 1, | 114 | .descriptorCount = 1, |
| 115 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 115 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 116 | .pImmutableSamplers = nullptr, | 116 | .pImmutableSamplers = nullptr, |
| 117 | }, | 117 | }, |
| 118 | { | 118 | { |
| 119 | .binding = ASTC_BINDING_BYTE_TO_16_BUFFER, // BINDING_BYTE_TO_16_BUFFER | 119 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, |
| 120 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 120 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 121 | .descriptorCount = 1, | 121 | .descriptorCount = 1, |
| 122 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 122 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| 123 | .pImmutableSamplers = nullptr, | 123 | .pImmutableSamplers = nullptr, |
| 124 | }, | 124 | }, |
| 125 | { | 125 | { |
| 126 | .binding = ASTC_BINDING_OUTPUT_IMAGE, // Output image | 126 | .binding = ASTC_BINDING_OUTPUT_IMAGE, |
| 127 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | 127 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, |
| 128 | .descriptorCount = 1, | 128 | .descriptorCount = 1, |
| 129 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 129 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| @@ -146,19 +146,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | |||
| 146 | std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { | 146 | std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { |
| 147 | return {{ | 147 | return {{ |
| 148 | { | 148 | { |
| 149 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 150 | .dstArrayElement = 0, | ||
| 151 | .descriptorCount = 1, | ||
| 152 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 153 | .offset = 0 * sizeof(DescriptorUpdateEntry), | ||
| 154 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 155 | }, | ||
| 156 | { | ||
| 157 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, | 149 | .dstBinding = ASTC_BINDING_INPUT_BUFFER, |
| 158 | .dstArrayElement = 0, | 150 | .dstArrayElement = 0, |
| 159 | .descriptorCount = 1, | 151 | .descriptorCount = 1, |
| 160 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 152 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 161 | .offset = 1 * sizeof(DescriptorUpdateEntry), | 153 | .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry), |
| 162 | .stride = sizeof(DescriptorUpdateEntry), | 154 | .stride = sizeof(DescriptorUpdateEntry), |
| 163 | }, | 155 | }, |
| 164 | { | 156 | { |
| @@ -166,7 +158,7 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 166 | .dstArrayElement = 0, | 158 | .dstArrayElement = 0, |
| 167 | .descriptorCount = 1, | 159 | .descriptorCount = 1, |
| 168 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 160 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 169 | .offset = 2 * sizeof(DescriptorUpdateEntry), | 161 | .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), |
| 170 | .stride = sizeof(DescriptorUpdateEntry), | 162 | .stride = sizeof(DescriptorUpdateEntry), |
| 171 | }, | 163 | }, |
| 172 | { | 164 | { |
| @@ -174,7 +166,7 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 174 | .dstArrayElement = 0, | 166 | .dstArrayElement = 0, |
| 175 | .descriptorCount = 1, | 167 | .descriptorCount = 1, |
| 176 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 168 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 177 | .offset = 3 * sizeof(DescriptorUpdateEntry), | 169 | .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), |
| 178 | .stride = sizeof(DescriptorUpdateEntry), | 170 | .stride = sizeof(DescriptorUpdateEntry), |
| 179 | }, | 171 | }, |
| 180 | { | 172 | { |
| @@ -182,7 +174,7 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 182 | .dstArrayElement = 0, | 174 | .dstArrayElement = 0, |
| 183 | .descriptorCount = 1, | 175 | .descriptorCount = 1, |
| 184 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 176 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 185 | .offset = 4 * sizeof(DescriptorUpdateEntry), | 177 | .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), |
| 186 | .stride = sizeof(DescriptorUpdateEntry), | 178 | .stride = sizeof(DescriptorUpdateEntry), |
| 187 | }, | 179 | }, |
| 188 | { | 180 | { |
| @@ -190,7 +182,7 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 190 | .dstArrayElement = 0, | 182 | .dstArrayElement = 0, |
| 191 | .descriptorCount = 1, | 183 | .descriptorCount = 1, |
| 192 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 184 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 193 | .offset = 5 * sizeof(DescriptorUpdateEntry), | 185 | .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry), |
| 194 | .stride = sizeof(DescriptorUpdateEntry), | 186 | .stride = sizeof(DescriptorUpdateEntry), |
| 195 | }, | 187 | }, |
| 196 | { | 188 | { |
| @@ -198,7 +190,15 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 198 | .dstArrayElement = 0, | 190 | .dstArrayElement = 0, |
| 199 | .descriptorCount = 1, | 191 | .descriptorCount = 1, |
| 200 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 192 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| 201 | .offset = 6 * sizeof(DescriptorUpdateEntry), | 193 | .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry), |
| 194 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 195 | }, | ||
| 196 | { | ||
| 197 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 198 | .dstArrayElement = 0, | ||
| 199 | .descriptorCount = 1, | ||
| 200 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 201 | .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 202 | .stride = sizeof(DescriptorUpdateEntry), | 202 | .stride = sizeof(DescriptorUpdateEntry), |
| 203 | }, | 203 | }, |
| 204 | { | 204 | { |
| @@ -206,16 +206,20 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT | |||
| 206 | .dstArrayElement = 0, | 206 | .dstArrayElement = 0, |
| 207 | .descriptorCount = 1, | 207 | .descriptorCount = 1, |
| 208 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | 208 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, |
| 209 | .offset = 7 * sizeof(DescriptorUpdateEntry), | 209 | .offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry), |
| 210 | .stride = sizeof(DescriptorUpdateEntry), | 210 | .stride = sizeof(DescriptorUpdateEntry), |
| 211 | }, | 211 | }, |
| 212 | }}; | 212 | }}; |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | struct AstcPushConstants { | 215 | struct AstcPushConstants { |
| 216 | std::array<u32, 2> num_image_blocks; | ||
| 217 | std::array<u32, 2> blocks_dims; | 216 | std::array<u32, 2> blocks_dims; |
| 218 | VideoCommon::Accelerated::BlockLinearSwizzle2DParams params; | 217 | u32 bytes_per_block_log2; |
| 218 | u32 layer_stride; | ||
| 219 | u32 block_size; | ||
| 220 | u32 x_shift; | ||
| 221 | u32 block_height; | ||
| 222 | u32 block_height_mask; | ||
| 219 | }; | 223 | }; |
| 220 | 224 | ||
| 221 | struct AstcBufferData { | 225 | struct AstcBufferData { |
| @@ -419,11 +423,12 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | |||
| 419 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 423 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| 420 | 424 | ||
| 421 | void ASTCDecoderPass::MakeDataBuffer() { | 425 | void ASTCDecoderPass::MakeDataBuffer() { |
| 426 | constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE); | ||
| 422 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 427 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ |
| 423 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 428 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 424 | .pNext = nullptr, | 429 | .pNext = nullptr, |
| 425 | .flags = 0, | 430 | .flags = 0, |
| 426 | .size = sizeof(ASTC_BUFFER_DATA), | 431 | .size = TOTAL_BUFFER_SIZE, |
| 427 | .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 432 | .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, |
| 428 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 433 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 429 | .queueFamilyIndexCount = 0, | 434 | .queueFamilyIndexCount = 0, |
| @@ -431,15 +436,19 @@ void ASTCDecoderPass::MakeDataBuffer() { | |||
| 431 | }); | 436 | }); |
| 432 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); | 437 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); |
| 433 | 438 | ||
| 434 | const auto staging_ref = | 439 | const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); |
| 435 | staging_buffer_pool.Request(sizeof(ASTC_BUFFER_DATA), MemoryUsage::Upload); | ||
| 436 | std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); | 440 | std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA)); |
| 437 | scheduler.Record([src = staging_ref.buffer, dst = *data_buffer](vk::CommandBuffer cmdbuf) { | 441 | // Tack on the swizzle table at the end of the buffer |
| 442 | std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE, | ||
| 443 | sizeof(SWIZZLE_TABLE)); | ||
| 444 | |||
| 445 | scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, | ||
| 446 | TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { | ||
| 438 | cmdbuf.CopyBuffer(src, dst, | 447 | cmdbuf.CopyBuffer(src, dst, |
| 439 | VkBufferCopy{ | 448 | VkBufferCopy{ |
| 440 | .srcOffset = 0, | 449 | .srcOffset = offset, |
| 441 | .dstOffset = 0, | 450 | .dstOffset = 0, |
| 442 | .size = sizeof(ASTC_BUFFER_DATA), | 451 | .size = TOTAL_BUFFER_SIZE, |
| 443 | }); | 452 | }); |
| 444 | cmdbuf.PipelineBarrier( | 453 | cmdbuf.PipelineBarrier( |
| 445 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, | 454 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, |
| @@ -448,61 +457,58 @@ void ASTCDecoderPass::MakeDataBuffer() { | |||
| 448 | .pNext = nullptr, | 457 | .pNext = nullptr, |
| 449 | .srcAccessMask = 0, | 458 | .srcAccessMask = 0, |
| 450 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | 459 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 451 | }, | 460 | }); |
| 452 | {}, {}); | ||
| 453 | }); | 461 | }); |
| 454 | } | 462 | } |
| 455 | 463 | ||
| 456 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | 464 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, |
| 457 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | 465 | std::span<const VideoCommon::SwizzleParameters> swizzles) { |
| 458 | using namespace VideoCommon::Accelerated; | 466 | using namespace VideoCommon::Accelerated; |
| 459 | const VideoCommon::Extent2D tile_size{ | 467 | const std::array<u32, 2> block_dims{ |
| 460 | .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), | 468 | VideoCore::Surface::DefaultBlockWidth(image.info.format), |
| 461 | .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), | 469 | VideoCore::Surface::DefaultBlockHeight(image.info.format), |
| 462 | }; | 470 | }; |
| 463 | scheduler.RequestOutsideRenderPassOperationContext(); | 471 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 464 | if (!data_buffer) { | 472 | if (!data_buffer) { |
| 465 | MakeDataBuffer(); | 473 | MakeDataBuffer(); |
| 466 | } | 474 | } |
| 475 | const VkPipeline vk_pipeline = *pipeline; | ||
| 467 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | 476 | const VkImageAspectFlags aspect_mask = image.AspectMask(); |
| 468 | const VkImage vk_image = image.Handle(); | 477 | const VkImage vk_image = image.Handle(); |
| 469 | const bool is_initialized = image.ExchangeInitialization(); | 478 | const bool is_initialized = image.ExchangeInitialization(); |
| 470 | scheduler.Record([vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { | 479 | scheduler.Record( |
| 471 | const VkImageMemoryBarrier image_barrier{ | 480 | [vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { |
| 472 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 481 | const VkImageMemoryBarrier image_barrier{ |
| 473 | .pNext = nullptr, | 482 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 474 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | 483 | .pNext = nullptr, |
| 475 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, | 484 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, |
| 476 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | 485 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, |
| 477 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 486 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, |
| 478 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 487 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 479 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 488 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 480 | .image = vk_image, | 489 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 481 | .subresourceRange{ | 490 | .image = vk_image, |
| 482 | .aspectMask = aspect_mask, | 491 | .subresourceRange{ |
| 483 | .baseMipLevel = 0, | 492 | .aspectMask = aspect_mask, |
| 484 | .levelCount = VK_REMAINING_MIP_LEVELS, | 493 | .baseMipLevel = 0, |
| 485 | .baseArrayLayer = 0, | 494 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 486 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 495 | .baseArrayLayer = 0, |
| 487 | }, | 496 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 488 | }; | 497 | }, |
| 489 | cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | 498 | }; |
| 490 | }); | 499 | cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : 0, |
| 491 | const std::array<u32, 2> block_dims{tile_size.width, tile_size.height}; | 500 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); |
| 501 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); | ||
| 502 | }); | ||
| 492 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | 503 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { |
| 493 | const size_t input_offset = swizzle.buffer_offset + map.offset; | 504 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 494 | const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | 505 | const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); |
| 495 | const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | 506 | const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); |
| 496 | const u32 num_dispatches_z = image.info.resources.layers; | 507 | const u32 num_dispatches_z = image.info.resources.layers; |
| 497 | const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height}; | ||
| 498 | const u32 layer_image_size = | ||
| 499 | image.guest_size_bytes - static_cast<u32>(swizzle.buffer_offset); | ||
| 500 | 508 | ||
| 501 | update_descriptor_queue.Acquire(); | 509 | update_descriptor_queue.Acquire(); |
| 502 | update_descriptor_queue.AddBuffer(*data_buffer, | 510 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, |
| 503 | offsetof(AstcBufferData, swizzle_table_buffer), | 511 | image.guest_size_bytes - swizzle.buffer_offset); |
| 504 | sizeof(AstcBufferData::swizzle_table_buffer)); | ||
| 505 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, layer_image_size); | ||
| 506 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), | 512 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values), |
| 507 | sizeof(AstcBufferData::encoding_values)); | 513 | sizeof(AstcBufferData::encoding_values)); |
| 508 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), | 514 | update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8), |
| @@ -514,18 +520,28 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 514 | update_descriptor_queue.AddBuffer(*data_buffer, | 520 | update_descriptor_queue.AddBuffer(*data_buffer, |
| 515 | offsetof(AstcBufferData, replicate_byte_to_16), | 521 | offsetof(AstcBufferData, replicate_byte_to_16), |
| 516 | sizeof(AstcBufferData::replicate_byte_to_16)); | 522 | sizeof(AstcBufferData::replicate_byte_to_16)); |
| 523 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), | ||
| 524 | sizeof(SWIZZLE_TABLE)); | ||
| 517 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 525 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 518 | 526 | ||
| 519 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 527 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 520 | const VkPipelineLayout vk_layout = *layout; | 528 | const VkPipelineLayout vk_layout = *layout; |
| 521 | const VkPipeline vk_pipeline = *pipeline; | 529 | |
| 522 | // To unswizzle the ASTC data | 530 | // To unswizzle the ASTC data |
| 523 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 531 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 524 | scheduler.Record([vk_layout, vk_pipeline, buffer = map.buffer, num_dispatches_x, | 532 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 525 | num_dispatches_y, num_dispatches_z, num_image_blocks, block_dims, params, | 533 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 526 | set, input_offset](vk::CommandBuffer cmdbuf) { | 534 | scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z, |
| 527 | const AstcPushConstants uniforms{num_image_blocks, block_dims, params}; | 535 | block_dims, params, set](vk::CommandBuffer cmdbuf) { |
| 528 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); | 536 | const AstcPushConstants uniforms{ |
| 537 | .blocks_dims = block_dims, | ||
| 538 | .bytes_per_block_log2 = params.bytes_per_block_log2, | ||
| 539 | .layer_stride = params.layer_stride, | ||
| 540 | .block_size = params.block_size, | ||
| 541 | .x_shift = params.x_shift, | ||
| 542 | .block_height = params.block_height, | ||
| 543 | .block_height_mask = params.block_height_mask, | ||
| 544 | }; | ||
| 529 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); | 545 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {}); |
| 530 | cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); | 546 | cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); |
| 531 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); | 547 | cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| @@ -550,7 +566,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 550 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 566 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 551 | }, | 567 | }, |
| 552 | }; | 568 | }; |
| 553 | cmdbuf.PipelineBarrier(0, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); | 569 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 570 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); | ||
| 554 | }); | 571 | }); |
| 555 | } | 572 | } |
| 556 | 573 | ||