diff options
| author | 2021-08-06 21:45:24 -0700 | |
|---|---|---|
| committer | 2021-08-06 21:45:24 -0700 | |
| commit | 268b5764c70a8300d24c32985dee595046a1e2e1 (patch) | |
| tree | 178317fbc7f34549a93b8e28d9f0b6857aa104c8 /src/video_core/renderer_vulkan | |
| parent | Merge pull request #6799 from ameerj/vp9-fixes (diff) | |
| parent | astc_decoder: Reduce workgroup size (diff) | |
| download | yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.gz yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.xz yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.zip | |
Merge pull request #6791 from ameerj/astc-opt
astc_decoder: Various performance and memory optimizations
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 97 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 5 |
2 files changed, 8 insertions, 94 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 561cf5e11..3e96c0f60 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -30,16 +30,12 @@ | |||
| 30 | namespace Vulkan { | 30 | namespace Vulkan { |
| 31 | 31 | ||
| 32 | using Tegra::Texture::SWIZZLE_TABLE; | 32 | using Tegra::Texture::SWIZZLE_TABLE; |
| 33 | using Tegra::Texture::ASTC::ASTC_ENCODINGS_VALUES; | ||
| 34 | using namespace Tegra::Texture::ASTC; | ||
| 35 | 33 | ||
| 36 | namespace { | 34 | namespace { |
| 37 | 35 | ||
| 38 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; | 36 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; |
| 39 | constexpr u32 ASTC_BINDING_ENC_BUFFER = 1; | 37 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1; |
| 40 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 2; | 38 | constexpr size_t ASTC_NUM_BINDINGS = 2; |
| 41 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 3; | ||
| 42 | constexpr size_t ASTC_NUM_BINDINGS = 4; | ||
| 43 | 39 | ||
| 44 | template <size_t size> | 40 | template <size_t size> |
| 45 | inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ | 41 | inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ |
| @@ -75,7 +71,7 @@ constexpr DescriptorBankInfo INPUT_OUTPUT_BANK_INFO{ | |||
| 75 | .score = 2, | 71 | .score = 2, |
| 76 | }; | 72 | }; |
| 77 | 73 | ||
| 78 | constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDINGS{{ | 74 | constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCRIPTOR_SET_BINDINGS{{ |
| 79 | { | 75 | { |
| 80 | .binding = ASTC_BINDING_INPUT_BUFFER, | 76 | .binding = ASTC_BINDING_INPUT_BUFFER, |
| 81 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | 77 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, |
| @@ -84,20 +80,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN | |||
| 84 | .pImmutableSamplers = nullptr, | 80 | .pImmutableSamplers = nullptr, |
| 85 | }, | 81 | }, |
| 86 | { | 82 | { |
| 87 | .binding = ASTC_BINDING_ENC_BUFFER, | ||
| 88 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 89 | .descriptorCount = 1, | ||
| 90 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 91 | .pImmutableSamplers = nullptr, | ||
| 92 | }, | ||
| 93 | { | ||
| 94 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 95 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 96 | .descriptorCount = 1, | ||
| 97 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 98 | .pImmutableSamplers = nullptr, | ||
| 99 | }, | ||
| 100 | { | ||
| 101 | .binding = ASTC_BINDING_OUTPUT_IMAGE, | 83 | .binding = ASTC_BINDING_OUTPUT_IMAGE, |
| 102 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | 84 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, |
| 103 | .descriptorCount = 1, | 85 | .descriptorCount = 1, |
| @@ -108,12 +90,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, 4> ASTC_DESCRIPTOR_SET_BINDIN | |||
| 108 | 90 | ||
| 109 | constexpr DescriptorBankInfo ASTC_BANK_INFO{ | 91 | constexpr DescriptorBankInfo ASTC_BANK_INFO{ |
| 110 | .uniform_buffers = 0, | 92 | .uniform_buffers = 0, |
| 111 | .storage_buffers = 3, | 93 | .storage_buffers = 1, |
| 112 | .texture_buffers = 0, | 94 | .texture_buffers = 0, |
| 113 | .image_buffers = 0, | 95 | .image_buffers = 0, |
| 114 | .textures = 0, | 96 | .textures = 0, |
| 115 | .images = 1, | 97 | .images = 1, |
| 116 | .score = 4, | 98 | .score = 2, |
| 117 | }; | 99 | }; |
| 118 | 100 | ||
| 119 | constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ | 101 | constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ |
| @@ -136,22 +118,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | |||
| 136 | .stride = sizeof(DescriptorUpdateEntry), | 118 | .stride = sizeof(DescriptorUpdateEntry), |
| 137 | }, | 119 | }, |
| 138 | { | 120 | { |
| 139 | .dstBinding = ASTC_BINDING_ENC_BUFFER, | ||
| 140 | .dstArrayElement = 0, | ||
| 141 | .descriptorCount = 1, | ||
| 142 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 143 | .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 144 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 145 | }, | ||
| 146 | { | ||
| 147 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 148 | .dstArrayElement = 0, | ||
| 149 | .descriptorCount = 1, | ||
| 150 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 151 | .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 152 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 153 | }, | ||
| 154 | { | ||
| 155 | .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, | 121 | .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, |
| 156 | .dstArrayElement = 0, | 122 | .dstArrayElement = 0, |
| 157 | .descriptorCount = 1, | 123 | .descriptorCount = 1, |
| @@ -163,7 +129,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | |||
| 163 | 129 | ||
| 164 | struct AstcPushConstants { | 130 | struct AstcPushConstants { |
| 165 | std::array<u32, 2> blocks_dims; | 131 | std::array<u32, 2> blocks_dims; |
| 166 | u32 bytes_per_block_log2; | ||
| 167 | u32 layer_stride; | 132 | u32 layer_stride; |
| 168 | u32 block_size; | 133 | u32 block_size; |
| 169 | u32 x_shift; | 134 | u32 x_shift; |
| @@ -354,46 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | |||
| 354 | 319 | ||
| 355 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 320 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| 356 | 321 | ||
| 357 | void ASTCDecoderPass::MakeDataBuffer() { | ||
| 358 | constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_ENCODINGS_VALUES) + sizeof(SWIZZLE_TABLE); | ||
| 359 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 360 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 361 | .pNext = nullptr, | ||
| 362 | .flags = 0, | ||
| 363 | .size = TOTAL_BUFFER_SIZE, | ||
| 364 | .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 365 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 366 | .queueFamilyIndexCount = 0, | ||
| 367 | .pQueueFamilyIndices = nullptr, | ||
| 368 | }); | ||
| 369 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); | ||
| 370 | |||
| 371 | const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); | ||
| 372 | std::memcpy(staging_ref.mapped_span.data(), &ASTC_ENCODINGS_VALUES, | ||
| 373 | sizeof(ASTC_ENCODINGS_VALUES)); | ||
| 374 | // Tack on the swizzle table at the end of the buffer | ||
| 375 | std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_ENCODINGS_VALUES), &SWIZZLE_TABLE, | ||
| 376 | sizeof(SWIZZLE_TABLE)); | ||
| 377 | |||
| 378 | scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, | ||
| 379 | TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { | ||
| 380 | static constexpr VkMemoryBarrier write_barrier{ | ||
| 381 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 382 | .pNext = nullptr, | ||
| 383 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 384 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, | ||
| 385 | }; | ||
| 386 | const VkBufferCopy copy{ | ||
| 387 | .srcOffset = offset, | ||
| 388 | .dstOffset = 0, | ||
| 389 | .size = TOTAL_BUFFER_SIZE, | ||
| 390 | }; | ||
| 391 | cmdbuf.CopyBuffer(src, dst, copy); | ||
| 392 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 393 | 0, write_barrier); | ||
| 394 | }); | ||
| 395 | } | ||
| 396 | |||
| 397 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | 322 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, |
| 398 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | 323 | std::span<const VideoCommon::SwizzleParameters> swizzles) { |
| 399 | using namespace VideoCommon::Accelerated; | 324 | using namespace VideoCommon::Accelerated; |
| @@ -402,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 402 | VideoCore::Surface::DefaultBlockHeight(image.info.format), | 327 | VideoCore::Surface::DefaultBlockHeight(image.info.format), |
| 403 | }; | 328 | }; |
| 404 | scheduler.RequestOutsideRenderPassOperationContext(); | 329 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 405 | if (!data_buffer) { | ||
| 406 | MakeDataBuffer(); | ||
| 407 | } | ||
| 408 | const VkPipeline vk_pipeline = *pipeline; | 330 | const VkPipeline vk_pipeline = *pipeline; |
| 409 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | 331 | const VkImageAspectFlags aspect_mask = image.AspectMask(); |
| 410 | const VkImage vk_image = image.Handle(); | 332 | const VkImage vk_image = image.Handle(); |
| @@ -436,16 +358,13 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 436 | }); | 358 | }); |
| 437 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { | 359 | for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { |
| 438 | const size_t input_offset = swizzle.buffer_offset + map.offset; | 360 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 439 | const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | 361 | const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 8U); |
| 440 | const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | 362 | const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U); |
| 441 | const u32 num_dispatches_z = image.info.resources.layers; | 363 | const u32 num_dispatches_z = image.info.resources.layers; |
| 442 | 364 | ||
| 443 | update_descriptor_queue.Acquire(); | 365 | update_descriptor_queue.Acquire(); |
| 444 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, | 366 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, |
| 445 | image.guest_size_bytes - swizzle.buffer_offset); | 367 | image.guest_size_bytes - swizzle.buffer_offset); |
| 446 | update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(ASTC_ENCODINGS_VALUES)); | ||
| 447 | update_descriptor_queue.AddBuffer(*data_buffer, sizeof(ASTC_ENCODINGS_VALUES), | ||
| 448 | sizeof(SWIZZLE_TABLE)); | ||
| 449 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 368 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 450 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | 369 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 451 | 370 | ||
| @@ -453,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 453 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 372 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 454 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | 373 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 455 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | 374 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 375 | ASSERT(params.bytes_per_block_log2 == 4); | ||
| 456 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, | 376 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, |
| 457 | params, descriptor_data](vk::CommandBuffer cmdbuf) { | 377 | params, descriptor_data](vk::CommandBuffer cmdbuf) { |
| 458 | const AstcPushConstants uniforms{ | 378 | const AstcPushConstants uniforms{ |
| 459 | .blocks_dims = block_dims, | 379 | .blocks_dims = block_dims, |
| 460 | .bytes_per_block_log2 = params.bytes_per_block_log2, | ||
| 461 | .layer_stride = params.layer_stride, | 380 | .layer_stride = params.layer_stride, |
| 462 | .block_size = params.block_size, | 381 | .block_size = params.block_size, |
| 463 | .x_shift = params.x_shift, | 382 | .x_shift = params.x_shift, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 114aef2bd..c7b92cce0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -96,15 +96,10 @@ public: | |||
| 96 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 96 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 97 | 97 | ||
| 98 | private: | 98 | private: |
| 99 | void MakeDataBuffer(); | ||
| 100 | |||
| 101 | VKScheduler& scheduler; | 99 | VKScheduler& scheduler; |
| 102 | StagingBufferPool& staging_buffer_pool; | 100 | StagingBufferPool& staging_buffer_pool; |
| 103 | VKUpdateDescriptorQueue& update_descriptor_queue; | 101 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 104 | MemoryAllocator& memory_allocator; | 102 | MemoryAllocator& memory_allocator; |
| 105 | |||
| 106 | vk::Buffer data_buffer; | ||
| 107 | MemoryCommit data_buffer_commit; | ||
| 108 | }; | 103 | }; |
| 109 | 104 | ||
| 110 | } // namespace Vulkan | 105 | } // namespace Vulkan |