diff options
| author | 2021-07-31 22:24:15 -0400 | |
|---|---|---|
| committer | 2021-08-01 01:22:26 -0400 | |
| commit | 5ab80535118e593ef3add3ce2b5935437e1dc1d3 (patch) | |
| tree | a051a40c1ce6bef0d60e1814cc180aa091ffd61f /src/video_core/renderer_vulkan | |
| parent | astc_decoder: Make use of uvec4 for payload data (diff) | |
| download | yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.tar.gz yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.tar.xz yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.zip | |
astc_decoder: Compute offset swizzles in-shader
Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes.
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 67 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.h | 5 |
2 files changed, 5 insertions, 67 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 328813a57..d13d58e8c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -34,9 +34,8 @@ using Tegra::Texture::SWIZZLE_TABLE; | |||
| 34 | namespace { | 34 | namespace { |
| 35 | 35 | ||
| 36 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; | 36 | constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; |
| 37 | constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; | 37 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1; |
| 38 | constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; | 38 | constexpr size_t ASTC_NUM_BINDINGS = 2; |
| 39 | constexpr size_t ASTC_NUM_BINDINGS = 3; | ||
| 40 | 39 | ||
| 41 | template <size_t size> | 40 | template <size_t size> |
| 42 | inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ | 41 | inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ |
| @@ -81,13 +80,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCR | |||
| 81 | .pImmutableSamplers = nullptr, | 80 | .pImmutableSamplers = nullptr, |
| 82 | }, | 81 | }, |
| 83 | { | 82 | { |
| 84 | .binding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 85 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 86 | .descriptorCount = 1, | ||
| 87 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 88 | .pImmutableSamplers = nullptr, | ||
| 89 | }, | ||
| 90 | { | ||
| 91 | .binding = ASTC_BINDING_OUTPUT_IMAGE, | 83 | .binding = ASTC_BINDING_OUTPUT_IMAGE, |
| 92 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, | 84 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, |
| 93 | .descriptorCount = 1, | 85 | .descriptorCount = 1, |
| @@ -98,12 +90,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCR | |||
| 98 | 90 | ||
| 99 | constexpr DescriptorBankInfo ASTC_BANK_INFO{ | 91 | constexpr DescriptorBankInfo ASTC_BANK_INFO{ |
| 100 | .uniform_buffers = 0, | 92 | .uniform_buffers = 0, |
| 101 | .storage_buffers = 2, | 93 | .storage_buffers = 1, |
| 102 | .texture_buffers = 0, | 94 | .texture_buffers = 0, |
| 103 | .image_buffers = 0, | 95 | .image_buffers = 0, |
| 104 | .textures = 0, | 96 | .textures = 0, |
| 105 | .images = 1, | 97 | .images = 1, |
| 106 | .score = 3, | 98 | .score = 2, |
| 107 | }; | 99 | }; |
| 108 | 100 | ||
| 109 | constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ | 101 | constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ |
| @@ -126,14 +118,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | |||
| 126 | .stride = sizeof(DescriptorUpdateEntry), | 118 | .stride = sizeof(DescriptorUpdateEntry), |
| 127 | }, | 119 | }, |
| 128 | { | 120 | { |
| 129 | .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, | ||
| 130 | .dstArrayElement = 0, | ||
| 131 | .descriptorCount = 1, | ||
| 132 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 133 | .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry), | ||
| 134 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 135 | }, | ||
| 136 | { | ||
| 137 | .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, | 121 | .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, |
| 138 | .dstArrayElement = 0, | 122 | .dstArrayElement = 0, |
| 139 | .descriptorCount = 1, | 123 | .descriptorCount = 1, |
| @@ -145,7 +129,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS> | |||
| 145 | 129 | ||
| 146 | struct AstcPushConstants { | 130 | struct AstcPushConstants { |
| 147 | std::array<u32, 2> blocks_dims; | 131 | std::array<u32, 2> blocks_dims; |
| 148 | u32 bytes_per_block_log2; | ||
| 149 | u32 layer_stride; | 132 | u32 layer_stride; |
| 150 | u32 block_size; | 133 | u32 block_size; |
| 151 | u32 x_shift; | 134 | u32 x_shift; |
| @@ -336,42 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_, | |||
| 336 | 319 | ||
| 337 | ASTCDecoderPass::~ASTCDecoderPass() = default; | 320 | ASTCDecoderPass::~ASTCDecoderPass() = default; |
| 338 | 321 | ||
| 339 | void ASTCDecoderPass::MakeDataBuffer() { | ||
| 340 | constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE); | ||
| 341 | data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 342 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 343 | .pNext = nullptr, | ||
| 344 | .flags = 0, | ||
| 345 | .size = TOTAL_BUFFER_SIZE, | ||
| 346 | .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 347 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 348 | .queueFamilyIndexCount = 0, | ||
| 349 | .pQueueFamilyIndices = nullptr, | ||
| 350 | }); | ||
| 351 | data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload); | ||
| 352 | |||
| 353 | const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload); | ||
| 354 | std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE)); | ||
| 355 | |||
| 356 | scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer, | ||
| 357 | TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) { | ||
| 358 | static constexpr VkMemoryBarrier write_barrier{ | ||
| 359 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 360 | .pNext = nullptr, | ||
| 361 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 362 | .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, | ||
| 363 | }; | ||
| 364 | const VkBufferCopy copy{ | ||
| 365 | .srcOffset = offset, | ||
| 366 | .dstOffset = 0, | ||
| 367 | .size = TOTAL_BUFFER_SIZE, | ||
| 368 | }; | ||
| 369 | cmdbuf.CopyBuffer(src, dst, copy); | ||
| 370 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 371 | 0, write_barrier); | ||
| 372 | }); | ||
| 373 | } | ||
| 374 | |||
| 375 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | 322 | void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, |
| 376 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | 323 | std::span<const VideoCommon::SwizzleParameters> swizzles) { |
| 377 | using namespace VideoCommon::Accelerated; | 324 | using namespace VideoCommon::Accelerated; |
| @@ -380,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 380 | VideoCore::Surface::DefaultBlockHeight(image.info.format), | 327 | VideoCore::Surface::DefaultBlockHeight(image.info.format), |
| 381 | }; | 328 | }; |
| 382 | scheduler.RequestOutsideRenderPassOperationContext(); | 329 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 383 | if (!data_buffer) { | ||
| 384 | MakeDataBuffer(); | ||
| 385 | } | ||
| 386 | const VkPipeline vk_pipeline = *pipeline; | 330 | const VkPipeline vk_pipeline = *pipeline; |
| 387 | const VkImageAspectFlags aspect_mask = image.AspectMask(); | 331 | const VkImageAspectFlags aspect_mask = image.AspectMask(); |
| 388 | const VkImage vk_image = image.Handle(); | 332 | const VkImage vk_image = image.Handle(); |
| @@ -421,7 +365,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 421 | update_descriptor_queue.Acquire(); | 365 | update_descriptor_queue.Acquire(); |
| 422 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, | 366 | update_descriptor_queue.AddBuffer(map.buffer, input_offset, |
| 423 | image.guest_size_bytes - swizzle.buffer_offset); | 367 | image.guest_size_bytes - swizzle.buffer_offset); |
| 424 | update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE)); | ||
| 425 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); | 368 | update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); |
| 426 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | 369 | const void* const descriptor_data{update_descriptor_queue.UpdateData()}; |
| 427 | 370 | ||
| @@ -429,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, | |||
| 429 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | 372 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); |
| 430 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); | 373 | ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); |
| 431 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); | 374 | ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); |
| 375 | ASSERT(params.bytes_per_block_log2 == 4); | ||
| 432 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, | 376 | scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, |
| 433 | params, descriptor_data](vk::CommandBuffer cmdbuf) { | 377 | params, descriptor_data](vk::CommandBuffer cmdbuf) { |
| 434 | const AstcPushConstants uniforms{ | 378 | const AstcPushConstants uniforms{ |
| 435 | .blocks_dims = block_dims, | 379 | .blocks_dims = block_dims, |
| 436 | .bytes_per_block_log2 = params.bytes_per_block_log2, | ||
| 437 | .layer_stride = params.layer_stride, | 380 | .layer_stride = params.layer_stride, |
| 438 | .block_size = params.block_size, | 381 | .block_size = params.block_size, |
| 439 | .x_shift = params.x_shift, | 382 | .x_shift = params.x_shift, |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 114aef2bd..c7b92cce0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -96,15 +96,10 @@ public: | |||
| 96 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 96 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 97 | 97 | ||
| 98 | private: | 98 | private: |
| 99 | void MakeDataBuffer(); | ||
| 100 | |||
| 101 | VKScheduler& scheduler; | 99 | VKScheduler& scheduler; |
| 102 | StagingBufferPool& staging_buffer_pool; | 100 | StagingBufferPool& staging_buffer_pool; |
| 103 | VKUpdateDescriptorQueue& update_descriptor_queue; | 101 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 104 | MemoryAllocator& memory_allocator; | 102 | MemoryAllocator& memory_allocator; |
| 105 | |||
| 106 | vk::Buffer data_buffer; | ||
| 107 | MemoryCommit data_buffer_commit; | ||
| 108 | }; | 103 | }; |
| 109 | 104 | ||
| 110 | } // namespace Vulkan | 105 | } // namespace Vulkan |