summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar ameerj2021-07-31 22:24:15 -0400
committerGravatar ameerj2021-08-01 01:22:26 -0400
commit5ab80535118e593ef3add3ce2b5935437e1dc1d3 (patch)
treea051a40c1ce6bef0d60e1814cc180aa091ffd61f /src/video_core/renderer_vulkan
parentastc_decoder: Make use of uvec4 for payload data (diff)
downloadyuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.tar.gz
yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.tar.xz
yuzu-5ab80535118e593ef3add3ce2b5935437e1dc1d3.zip
astc_decoder: Compute offset swizzles in-shader
Alleviates the dependency on the swizzle table and a uniform which is constant for all ASTC texture sizes.
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp67
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h5
2 files changed, 5 insertions, 67 deletions
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 328813a57..d13d58e8c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -34,9 +34,8 @@ using Tegra::Texture::SWIZZLE_TABLE;
34namespace { 34namespace {
35 35
36constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; 36constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
37constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; 37constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
38constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; 38constexpr size_t ASTC_NUM_BINDINGS = 2;
39constexpr size_t ASTC_NUM_BINDINGS = 3;
40 39
41template <size_t size> 40template <size_t size>
42inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ 41inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
@@ -81,13 +80,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCR
81 .pImmutableSamplers = nullptr, 80 .pImmutableSamplers = nullptr,
82 }, 81 },
83 { 82 {
84 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
85 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
86 .descriptorCount = 1,
87 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
88 .pImmutableSamplers = nullptr,
89 },
90 {
91 .binding = ASTC_BINDING_OUTPUT_IMAGE, 83 .binding = ASTC_BINDING_OUTPUT_IMAGE,
92 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 84 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
93 .descriptorCount = 1, 85 .descriptorCount = 1,
@@ -98,12 +90,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCR
98 90
99constexpr DescriptorBankInfo ASTC_BANK_INFO{ 91constexpr DescriptorBankInfo ASTC_BANK_INFO{
100 .uniform_buffers = 0, 92 .uniform_buffers = 0,
101 .storage_buffers = 2, 93 .storage_buffers = 1,
102 .texture_buffers = 0, 94 .texture_buffers = 0,
103 .image_buffers = 0, 95 .image_buffers = 0,
104 .textures = 0, 96 .textures = 0,
105 .images = 1, 97 .images = 1,
106 .score = 3, 98 .score = 2,
107}; 99};
108 100
109constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ 101constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
@@ -126,14 +118,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
126 .stride = sizeof(DescriptorUpdateEntry), 118 .stride = sizeof(DescriptorUpdateEntry),
127 }, 119 },
128 { 120 {
129 .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
130 .dstArrayElement = 0,
131 .descriptorCount = 1,
132 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
133 .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry),
134 .stride = sizeof(DescriptorUpdateEntry),
135 },
136 {
137 .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, 121 .dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
138 .dstArrayElement = 0, 122 .dstArrayElement = 0,
139 .descriptorCount = 1, 123 .descriptorCount = 1,
@@ -145,7 +129,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
145 129
146struct AstcPushConstants { 130struct AstcPushConstants {
147 std::array<u32, 2> blocks_dims; 131 std::array<u32, 2> blocks_dims;
148 u32 bytes_per_block_log2;
149 u32 layer_stride; 132 u32 layer_stride;
150 u32 block_size; 133 u32 block_size;
151 u32 x_shift; 134 u32 x_shift;
@@ -336,42 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
336 319
337ASTCDecoderPass::~ASTCDecoderPass() = default; 320ASTCDecoderPass::~ASTCDecoderPass() = default;
338 321
339void ASTCDecoderPass::MakeDataBuffer() {
340 constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE);
341 data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
342 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
343 .pNext = nullptr,
344 .flags = 0,
345 .size = TOTAL_BUFFER_SIZE,
346 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
347 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
348 .queueFamilyIndexCount = 0,
349 .pQueueFamilyIndices = nullptr,
350 });
351 data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
352
353 const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
354 std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE));
355
356 scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
357 TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
358 static constexpr VkMemoryBarrier write_barrier{
359 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
360 .pNext = nullptr,
361 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
362 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
363 };
364 const VkBufferCopy copy{
365 .srcOffset = offset,
366 .dstOffset = 0,
367 .size = TOTAL_BUFFER_SIZE,
368 };
369 cmdbuf.CopyBuffer(src, dst, copy);
370 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
371 0, write_barrier);
372 });
373}
374
375void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, 322void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
376 std::span<const VideoCommon::SwizzleParameters> swizzles) { 323 std::span<const VideoCommon::SwizzleParameters> swizzles) {
377 using namespace VideoCommon::Accelerated; 324 using namespace VideoCommon::Accelerated;
@@ -380,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
380 VideoCore::Surface::DefaultBlockHeight(image.info.format), 327 VideoCore::Surface::DefaultBlockHeight(image.info.format),
381 }; 328 };
382 scheduler.RequestOutsideRenderPassOperationContext(); 329 scheduler.RequestOutsideRenderPassOperationContext();
383 if (!data_buffer) {
384 MakeDataBuffer();
385 }
386 const VkPipeline vk_pipeline = *pipeline; 330 const VkPipeline vk_pipeline = *pipeline;
387 const VkImageAspectFlags aspect_mask = image.AspectMask(); 331 const VkImageAspectFlags aspect_mask = image.AspectMask();
388 const VkImage vk_image = image.Handle(); 332 const VkImage vk_image = image.Handle();
@@ -421,7 +365,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
421 update_descriptor_queue.Acquire(); 365 update_descriptor_queue.Acquire();
422 update_descriptor_queue.AddBuffer(map.buffer, input_offset, 366 update_descriptor_queue.AddBuffer(map.buffer, input_offset,
423 image.guest_size_bytes - swizzle.buffer_offset); 367 image.guest_size_bytes - swizzle.buffer_offset);
424 update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE));
425 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); 368 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
426 const void* const descriptor_data{update_descriptor_queue.UpdateData()}; 369 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
427 370
@@ -429,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
429 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); 372 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
430 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 373 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
431 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 374 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
375 ASSERT(params.bytes_per_block_log2 == 4);
432 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, 376 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
433 params, descriptor_data](vk::CommandBuffer cmdbuf) { 377 params, descriptor_data](vk::CommandBuffer cmdbuf) {
434 const AstcPushConstants uniforms{ 378 const AstcPushConstants uniforms{
435 .blocks_dims = block_dims, 379 .blocks_dims = block_dims,
436 .bytes_per_block_log2 = params.bytes_per_block_log2,
437 .layer_stride = params.layer_stride, 380 .layer_stride = params.layer_stride,
438 .block_size = params.block_size, 381 .block_size = params.block_size,
439 .x_shift = params.x_shift, 382 .x_shift = params.x_shift,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 114aef2bd..c7b92cce0 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -96,15 +96,10 @@ public:
96 std::span<const VideoCommon::SwizzleParameters> swizzles); 96 std::span<const VideoCommon::SwizzleParameters> swizzles);
97 97
98private: 98private:
99 void MakeDataBuffer();
100
101 VKScheduler& scheduler; 99 VKScheduler& scheduler;
102 StagingBufferPool& staging_buffer_pool; 100 StagingBufferPool& staging_buffer_pool;
103 VKUpdateDescriptorQueue& update_descriptor_queue; 101 VKUpdateDescriptorQueue& update_descriptor_queue;
104 MemoryAllocator& memory_allocator; 102 MemoryAllocator& memory_allocator;
105
106 vk::Buffer data_buffer;
107 MemoryCommit data_buffer_commit;
108}; 103};
109 104
110} // namespace Vulkan 105} // namespace Vulkan