summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar Rodrigo Locatti2021-03-30 19:31:52 -0300
committerGravatar GitHub2021-03-30 19:31:52 -0300
commit5ee669466fcebd2258229ed6bfe6b5e5529e0200 (patch)
tree6dbf84fb5c2c9656f1d1ef6c46b2527ea1a205ff /src/video_core/renderer_vulkan
parentMerge pull request #6116 from german77/userArgument (diff)
parentastc_decoder: Refactor for style and more efficient memory use (diff)
downloadyuzu-5ee669466fcebd2258229ed6bfe6b5e5529e0200.tar.gz
yuzu-5ee669466fcebd2258229ed6bfe6b5e5529e0200.tar.xz
yuzu-5ee669466fcebd2258229ed6bfe6b5e5529e0200.zip
Merge pull request #5927 from ameerj/astc-compute
video_core: Accelerate ASTC texture decoding using compute shaders
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp333
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h32
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp49
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h19
7 files changed, 431 insertions, 10 deletions
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 19aaf034f..f088447e9 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -166,7 +166,7 @@ struct FormatTuple {
166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT 166 {VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM 167 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT 168 {VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
169 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB 169 {VK_FORMAT_A8B8G8R8_SRGB_PACK32, Attachable}, // A8B8G8R8_SRGB
170 {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM 170 {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // R8G8_UNORM
171 {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM 171 {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // R8G8_SNORM
172 {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT 172 {VK_FORMAT_R8G8_SINT, Attachable | Storage}, // R8G8_SINT
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 2f9a7b028..e11406e58 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -11,18 +11,39 @@
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/div_ceil.h" 13#include "common/div_ceil.h"
14#include "video_core/host_shaders/astc_decoder_comp_spv.h"
14#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" 15#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
15#include "video_core/host_shaders/vulkan_uint8_comp_spv.h" 16#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
16#include "video_core/renderer_vulkan/vk_compute_pass.h" 17#include "video_core/renderer_vulkan/vk_compute_pass.h"
17#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 18#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
18#include "video_core/renderer_vulkan/vk_scheduler.h" 19#include "video_core/renderer_vulkan/vk_scheduler.h"
19#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 20#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
21#include "video_core/renderer_vulkan/vk_texture_cache.h"
20#include "video_core/renderer_vulkan/vk_update_descriptor.h" 22#include "video_core/renderer_vulkan/vk_update_descriptor.h"
23#include "video_core/texture_cache/accelerated_swizzle.h"
24#include "video_core/texture_cache/types.h"
25#include "video_core/textures/astc.h"
26#include "video_core/textures/decoders.h"
21#include "video_core/vulkan_common/vulkan_device.h" 27#include "video_core/vulkan_common/vulkan_device.h"
22#include "video_core/vulkan_common/vulkan_wrapper.h" 28#include "video_core/vulkan_common/vulkan_wrapper.h"
23 29
24namespace Vulkan { 30namespace Vulkan {
31
32using Tegra::Texture::SWIZZLE_TABLE;
33using Tegra::Texture::ASTC::EncodingsValues;
34using namespace Tegra::Texture::ASTC;
35
25namespace { 36namespace {
37
38constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
39constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
40constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
41constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
42constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
43constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5;
44constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6;
45constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7;
46
26VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 47VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
27 return { 48 return {
28 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 49 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
@@ -50,6 +71,67 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding
50 }}; 71 }};
51} 72}
52 73
74std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
75 return {{
76 {
77 .binding = ASTC_BINDING_INPUT_BUFFER,
78 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
79 .descriptorCount = 1,
80 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
81 .pImmutableSamplers = nullptr,
82 },
83 {
84 .binding = ASTC_BINDING_ENC_BUFFER,
85 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
86 .descriptorCount = 1,
87 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
88 .pImmutableSamplers = nullptr,
89 },
90 {
91 .binding = ASTC_BINDING_6_TO_8_BUFFER,
92 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
93 .descriptorCount = 1,
94 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
95 .pImmutableSamplers = nullptr,
96 },
97 {
98 .binding = ASTC_BINDING_7_TO_8_BUFFER,
99 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
100 .descriptorCount = 1,
101 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
102 .pImmutableSamplers = nullptr,
103 },
104 {
105 .binding = ASTC_BINDING_8_TO_8_BUFFER,
106 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
107 .descriptorCount = 1,
108 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
109 .pImmutableSamplers = nullptr,
110 },
111 {
112 .binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
113 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
114 .descriptorCount = 1,
115 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
116 .pImmutableSamplers = nullptr,
117 },
118 {
119 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
120 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
121 .descriptorCount = 1,
122 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
123 .pImmutableSamplers = nullptr,
124 },
125 {
126 .binding = ASTC_BINDING_OUTPUT_IMAGE,
127 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
128 .descriptorCount = 1,
129 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
130 .pImmutableSamplers = nullptr,
131 },
132 }};
133}
134
53VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { 135VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
54 return { 136 return {
55 .dstBinding = 0, 137 .dstBinding = 0,
@@ -61,6 +143,94 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
61 }; 143 };
62} 144}
63 145
146std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() {
147 return {{
148 {
149 .dstBinding = ASTC_BINDING_INPUT_BUFFER,
150 .dstArrayElement = 0,
151 .descriptorCount = 1,
152 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
153 .offset = ASTC_BINDING_INPUT_BUFFER * sizeof(DescriptorUpdateEntry),
154 .stride = sizeof(DescriptorUpdateEntry),
155 },
156 {
157 .dstBinding = ASTC_BINDING_ENC_BUFFER,
158 .dstArrayElement = 0,
159 .descriptorCount = 1,
160 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
161 .offset = ASTC_BINDING_ENC_BUFFER * sizeof(DescriptorUpdateEntry),
162 .stride = sizeof(DescriptorUpdateEntry),
163 },
164 {
165 .dstBinding = ASTC_BINDING_6_TO_8_BUFFER,
166 .dstArrayElement = 0,
167 .descriptorCount = 1,
168 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
169 .offset = ASTC_BINDING_6_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
170 .stride = sizeof(DescriptorUpdateEntry),
171 },
172 {
173 .dstBinding = ASTC_BINDING_7_TO_8_BUFFER,
174 .dstArrayElement = 0,
175 .descriptorCount = 1,
176 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
177 .offset = ASTC_BINDING_7_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
178 .stride = sizeof(DescriptorUpdateEntry),
179 },
180 {
181 .dstBinding = ASTC_BINDING_8_TO_8_BUFFER,
182 .dstArrayElement = 0,
183 .descriptorCount = 1,
184 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
185 .offset = ASTC_BINDING_8_TO_8_BUFFER * sizeof(DescriptorUpdateEntry),
186 .stride = sizeof(DescriptorUpdateEntry),
187 },
188 {
189 .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
190 .dstArrayElement = 0,
191 .descriptorCount = 1,
192 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
193 .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
194 .stride = sizeof(DescriptorUpdateEntry),
195 },
196 {
197 .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
198 .dstArrayElement = 0,
199 .descriptorCount = 1,
200 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
201 .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry),
202 .stride = sizeof(DescriptorUpdateEntry),
203 },
204 {
205 .dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
206 .dstArrayElement = 0,
207 .descriptorCount = 1,
208 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
209 .offset = ASTC_BINDING_OUTPUT_IMAGE * sizeof(DescriptorUpdateEntry),
210 .stride = sizeof(DescriptorUpdateEntry),
211 },
212 }};
213}
214
215struct AstcPushConstants {
216 std::array<u32, 2> blocks_dims;
217 u32 bytes_per_block_log2;
218 u32 layer_stride;
219 u32 block_size;
220 u32 x_shift;
221 u32 block_height;
222 u32 block_height_mask;
223};
224
225struct AstcBufferData {
226 decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
227 decltype(EncodingsValues) encoding_values = EncodingsValues;
228 decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
229 decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
230 decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
231 decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
232} constexpr ASTC_BUFFER_DATA;
233
64} // Anonymous namespace 234} // Anonymous namespace
65 235
66VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 236VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
@@ -238,4 +408,167 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
238 return {staging.buffer, staging.offset}; 408 return {staging.buffer, staging.offset};
239} 409}
240 410
411ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
412 VKDescriptorPool& descriptor_pool_,
413 StagingBufferPool& staging_buffer_pool_,
414 VKUpdateDescriptorQueue& update_descriptor_queue_,
415 MemoryAllocator& memory_allocator_)
416 : VKComputePass(device_, descriptor_pool_, BuildASTCDescriptorSetBindings(),
417 BuildASTCPassDescriptorUpdateTemplateEntry(),
418 BuildComputePushConstantRange(sizeof(AstcPushConstants)),
419 ASTC_DECODER_COMP_SPV),
420 device{device_}, scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
421 update_descriptor_queue{update_descriptor_queue_}, memory_allocator{memory_allocator_} {}
422
423ASTCDecoderPass::~ASTCDecoderPass() = default;
424
425void ASTCDecoderPass::MakeDataBuffer() {
426 constexpr size_t TOTAL_BUFFER_SIZE = sizeof(ASTC_BUFFER_DATA) + sizeof(SWIZZLE_TABLE);
427 data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
428 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
429 .pNext = nullptr,
430 .flags = 0,
431 .size = TOTAL_BUFFER_SIZE,
432 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
433 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
434 .queueFamilyIndexCount = 0,
435 .pQueueFamilyIndices = nullptr,
436 });
437 data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
438
439 const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
440 std::memcpy(staging_ref.mapped_span.data(), &ASTC_BUFFER_DATA, sizeof(ASTC_BUFFER_DATA));
441 // Tack on the swizzle table at the end of the buffer
442 std::memcpy(staging_ref.mapped_span.data() + sizeof(ASTC_BUFFER_DATA), &SWIZZLE_TABLE,
443 sizeof(SWIZZLE_TABLE));
444
445 scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
446 TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
447 cmdbuf.CopyBuffer(src, dst,
448 VkBufferCopy{
449 .srcOffset = offset,
450 .dstOffset = 0,
451 .size = TOTAL_BUFFER_SIZE,
452 });
453 cmdbuf.PipelineBarrier(
454 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
455 VkMemoryBarrier{
456 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
457 .pNext = nullptr,
458 .srcAccessMask = 0,
459 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
460 });
461 });
462}
463
464void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
465 std::span<const VideoCommon::SwizzleParameters> swizzles) {
466 using namespace VideoCommon::Accelerated;
467 const std::array<u32, 2> block_dims{
468 VideoCore::Surface::DefaultBlockWidth(image.info.format),
469 VideoCore::Surface::DefaultBlockHeight(image.info.format),
470 };
471 scheduler.RequestOutsideRenderPassOperationContext();
472 if (!data_buffer) {
473 MakeDataBuffer();
474 }
475 const VkPipeline vk_pipeline = *pipeline;
476 const VkImageAspectFlags aspect_mask = image.AspectMask();
477 const VkImage vk_image = image.Handle();
478 const bool is_initialized = image.ExchangeInitialization();
479 scheduler.Record(
480 [vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) {
481 const VkImageMemoryBarrier image_barrier{
482 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
483 .pNext = nullptr,
484 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
485 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
486 .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
487 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
488 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
489 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
490 .image = vk_image,
491 .subresourceRange{
492 .aspectMask = aspect_mask,
493 .baseMipLevel = 0,
494 .levelCount = VK_REMAINING_MIP_LEVELS,
495 .baseArrayLayer = 0,
496 .layerCount = VK_REMAINING_ARRAY_LAYERS,
497 },
498 };
499 cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : 0,
500 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier);
501 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline);
502 });
503 for (const VideoCommon::SwizzleParameters& swizzle : swizzles) {
504 const size_t input_offset = swizzle.buffer_offset + map.offset;
505 const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
506 const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
507 const u32 num_dispatches_z = image.info.resources.layers;
508
509 update_descriptor_queue.Acquire();
510 update_descriptor_queue.AddBuffer(map.buffer, input_offset,
511 image.guest_size_bytes - swizzle.buffer_offset);
512 update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
513 sizeof(AstcBufferData::encoding_values));
514 update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_6_to_8),
515 sizeof(AstcBufferData::replicate_6_to_8));
516 update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_7_to_8),
517 sizeof(AstcBufferData::replicate_7_to_8));
518 update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
519 sizeof(AstcBufferData::replicate_8_to_8));
520 update_descriptor_queue.AddBuffer(*data_buffer,
521 offsetof(AstcBufferData, replicate_byte_to_16),
522 sizeof(AstcBufferData::replicate_byte_to_16));
523 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
524 sizeof(SWIZZLE_TABLE));
525 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
526
527 const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
528 const VkPipelineLayout vk_layout = *layout;
529
530 // To unswizzle the ASTC data
531 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
532 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
533 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
534 scheduler.Record([vk_layout, num_dispatches_x, num_dispatches_y, num_dispatches_z,
535 block_dims, params, set](vk::CommandBuffer cmdbuf) {
536 const AstcPushConstants uniforms{
537 .blocks_dims = block_dims,
538 .bytes_per_block_log2 = params.bytes_per_block_log2,
539 .layer_stride = params.layer_stride,
540 .block_size = params.block_size,
541 .x_shift = params.x_shift,
542 .block_height = params.block_height,
543 .block_height_mask = params.block_height_mask,
544 };
545 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, vk_layout, 0, set, {});
546 cmdbuf.PushConstants(vk_layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms);
547 cmdbuf.Dispatch(num_dispatches_x, num_dispatches_y, num_dispatches_z);
548 });
549 }
550 scheduler.Record([vk_image, aspect_mask](vk::CommandBuffer cmdbuf) {
551 const VkImageMemoryBarrier image_barrier{
552 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
553 .pNext = nullptr,
554 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
555 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
556 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
557 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
558 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
559 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
560 .image = vk_image,
561 .subresourceRange{
562 .aspectMask = aspect_mask,
563 .baseMipLevel = 0,
564 .levelCount = VK_REMAINING_MIP_LEVELS,
565 .baseArrayLayer = 0,
566 .layerCount = VK_REMAINING_ARRAY_LAYERS,
567 },
568 };
569 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
570 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
571 });
572}
573
241} // namespace Vulkan 574} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 17d781d99..5ea187c30 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -11,14 +11,21 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 13#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
14#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 15#include "video_core/vulkan_common/vulkan_wrapper.h"
15 16
17namespace VideoCommon {
18struct SwizzleParameters;
19}
20
16namespace Vulkan { 21namespace Vulkan {
17 22
18class Device; 23class Device;
19class StagingBufferPool; 24class StagingBufferPool;
20class VKScheduler; 25class VKScheduler;
21class VKUpdateDescriptorQueue; 26class VKUpdateDescriptorQueue;
27class Image;
28struct StagingBufferRef;
22 29
23class VKComputePass { 30class VKComputePass {
24public: 31public:
@@ -77,4 +84,29 @@ private:
77 VKUpdateDescriptorQueue& update_descriptor_queue; 84 VKUpdateDescriptorQueue& update_descriptor_queue;
78}; 85};
79 86
87class ASTCDecoderPass final : public VKComputePass {
88public:
89 explicit ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
90 VKDescriptorPool& descriptor_pool_,
91 StagingBufferPool& staging_buffer_pool_,
92 VKUpdateDescriptorQueue& update_descriptor_queue_,
93 MemoryAllocator& memory_allocator_);
94 ~ASTCDecoderPass();
95
96 void Assemble(Image& image, const StagingBufferRef& map,
97 std::span<const VideoCommon::SwizzleParameters> swizzles);
98
99private:
100 void MakeDataBuffer();
101
102 const Device& device;
103 VKScheduler& scheduler;
104 StagingBufferPool& staging_buffer_pool;
105 VKUpdateDescriptorQueue& update_descriptor_queue;
106 MemoryAllocator& memory_allocator;
107
108 vk::Buffer data_buffer;
109 MemoryCommit data_buffer_commit;
110};
111
80} // namespace Vulkan 112} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index dfd38f575..df5b7b172 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -241,7 +241,10 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
241 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), 241 staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
242 update_descriptor_queue(device, scheduler), 242 update_descriptor_queue(device, scheduler),
243 blit_image(device, scheduler, state_tracker, descriptor_pool), 243 blit_image(device, scheduler, state_tracker, descriptor_pool),
244 texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, 244 astc_decoder_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue,
245 memory_allocator),
246 texture_cache_runtime{device, scheduler, memory_allocator,
247 staging_pool, blit_image, astc_decoder_pass},
245 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 248 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
246 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 249 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
247 update_descriptor_queue, descriptor_pool), 250 update_descriptor_queue, descriptor_pool),
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index acea1ba2d..235afc6f3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -173,6 +173,7 @@ private:
173 VKDescriptorPool descriptor_pool; 173 VKDescriptorPool descriptor_pool;
174 VKUpdateDescriptorQueue update_descriptor_queue; 174 VKUpdateDescriptorQueue update_descriptor_queue;
175 BlitImageHelper blit_image; 175 BlitImageHelper blit_image;
176 ASTCDecoderPass astc_decoder_pass;
176 177
177 GraphicsPipelineCacheKey graphics_key; 178 GraphicsPipelineCacheKey graphics_key;
178 179
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 22a1014a9..18155e449 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -10,6 +10,7 @@
10#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
11#include "video_core/renderer_vulkan/blit_image.h" 11#include "video_core/renderer_vulkan/blit_image.h"
12#include "video_core/renderer_vulkan/maxwell_to_vk.h" 12#include "video_core/renderer_vulkan/maxwell_to_vk.h"
13#include "video_core/renderer_vulkan/vk_compute_pass.h"
13#include "video_core/renderer_vulkan/vk_rasterizer.h" 14#include "video_core/renderer_vulkan/vk_rasterizer.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h" 15#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 16#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -807,7 +808,7 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
807 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); 808 commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
808 } 809 }
809 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { 810 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
810 flags |= VideoCommon::ImageFlagBits::Converted; 811 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
811 } 812 }
812 if (runtime.device.HasDebuggingToolAttached()) { 813 if (runtime.device.HasDebuggingToolAttached()) {
813 if (image) { 814 if (image) {
@@ -816,6 +817,38 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
816 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); 817 buffer.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
817 } 818 }
818 } 819 }
820 static constexpr VkImageViewUsageCreateInfo storage_image_view_usage_create_info{
821 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
822 .pNext = nullptr,
823 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
824 };
825 if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
826 const auto& device = runtime.device.GetLogical();
827 storage_image_views.reserve(info.resources.levels);
828 for (s32 level = 0; level < info.resources.levels; ++level) {
829 storage_image_views.push_back(device.CreateImageView(VkImageViewCreateInfo{
830 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
831 .pNext = &storage_image_view_usage_create_info,
832 .flags = 0,
833 .image = *image,
834 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
835 .format = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
836 .components{
837 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
838 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
839 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
840 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
841 },
842 .subresourceRange{
843 .aspectMask = aspect_mask,
844 .baseMipLevel = static_cast<u32>(level),
845 .levelCount = 1,
846 .baseArrayLayer = 0,
847 .layerCount = VK_REMAINING_ARRAY_LAYERS,
848 },
849 }));
850 }
851 }
819} 852}
820 853
821void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { 854void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
@@ -918,7 +951,6 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
918 } 951 }
919 } 952 }
920 const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); 953 const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
921 const VkFormat vk_format = format_info.format;
922 const VkImageViewUsageCreateInfo image_view_usage{ 954 const VkImageViewUsageCreateInfo image_view_usage{
923 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, 955 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
924 .pNext = nullptr, 956 .pNext = nullptr,
@@ -930,7 +962,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
930 .flags = 0, 962 .flags = 0,
931 .image = image.Handle(), 963 .image = image.Handle(),
932 .viewType = VkImageViewType{}, 964 .viewType = VkImageViewType{},
933 .format = vk_format, 965 .format = format_info.format,
934 .components{ 966 .components{
935 .r = ComponentSwizzle(swizzle[0]), 967 .r = ComponentSwizzle(swizzle[0]),
936 .g = ComponentSwizzle(swizzle[1]), 968 .g = ComponentSwizzle(swizzle[1]),
@@ -982,7 +1014,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
982 .pNext = nullptr, 1014 .pNext = nullptr,
983 .flags = 0, 1015 .flags = 0,
984 .buffer = image.Buffer(), 1016 .buffer = image.Buffer(),
985 .format = vk_format, 1017 .format = format_info.format,
986 .offset = 0, // TODO: Redesign buffer cache to support this 1018 .offset = 0, // TODO: Redesign buffer cache to support this
987 .range = image.guest_size_bytes, 1019 .range = image.guest_size_bytes,
988 }); 1020 });
@@ -1167,4 +1199,13 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
1167 } 1199 }
1168} 1200}
1169 1201
1202void TextureCacheRuntime::AccelerateImageUpload(
1203 Image& image, const StagingBufferRef& map,
1204 std::span<const VideoCommon::SwizzleParameters> swizzles) {
1205 if (IsPixelFormatASTC(image.info.format)) {
1206 return astc_decoder_pass.Assemble(image, map, swizzles);
1207 }
1208 UNREACHABLE();
1209}
1210
1170} // namespace Vulkan 1211} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 3aee27ce0..628785d5e 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -20,6 +20,7 @@ using VideoCommon::Offset2D;
20using VideoCommon::RenderTargets; 20using VideoCommon::RenderTargets;
21using VideoCore::Surface::PixelFormat; 21using VideoCore::Surface::PixelFormat;
22 22
23class ASTCDecoderPass;
23class BlitImageHelper; 24class BlitImageHelper;
24class Device; 25class Device;
25class Image; 26class Image;
@@ -60,6 +61,7 @@ struct TextureCacheRuntime {
60 MemoryAllocator& memory_allocator; 61 MemoryAllocator& memory_allocator;
61 StagingBufferPool& staging_buffer_pool; 62 StagingBufferPool& staging_buffer_pool;
62 BlitImageHelper& blit_image_helper; 63 BlitImageHelper& blit_image_helper;
64 ASTCDecoderPass& astc_decoder_pass;
63 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{}; 65 std::unordered_map<RenderPassKey, vk::RenderPass> renderpass_cache{};
64 66
65 void Finish(); 67 void Finish();
@@ -83,9 +85,7 @@ struct TextureCacheRuntime {
83 } 85 }
84 86
85 void AccelerateImageUpload(Image&, const StagingBufferRef&, 87 void AccelerateImageUpload(Image&, const StagingBufferRef&,
86 std::span<const VideoCommon::SwizzleParameters>) { 88 std::span<const VideoCommon::SwizzleParameters>);
87 UNREACHABLE();
88 }
89 89
90 void InsertUploadMemoryBarrier() {} 90 void InsertUploadMemoryBarrier() {}
91 91
@@ -121,15 +121,26 @@ public:
121 return *buffer; 121 return *buffer;
122 } 122 }
123 123
124 [[nodiscard]] VkImageCreateFlags AspectMask() const noexcept { 124 [[nodiscard]] VkImageAspectFlags AspectMask() const noexcept {
125 return aspect_mask; 125 return aspect_mask;
126 } 126 }
127 127
128 [[nodiscard]] VkImageView StorageImageView(s32 level) const noexcept {
129 return *storage_image_views[level];
130 }
131
132 /// Returns true when the image is already initialized and mark it as initialized
133 [[nodiscard]] bool ExchangeInitialization() noexcept {
134 return std::exchange(initialized, true);
135 }
136
128private: 137private:
129 VKScheduler* scheduler; 138 VKScheduler* scheduler;
130 vk::Image image; 139 vk::Image image;
131 vk::Buffer buffer; 140 vk::Buffer buffer;
132 MemoryCommit commit; 141 MemoryCommit commit;
142 vk::ImageView image_view;
143 std::vector<vk::ImageView> storage_image_views;
133 VkImageAspectFlags aspect_mask = 0; 144 VkImageAspectFlags aspect_mask = 0;
134 bool initialized = false; 145 bool initialized = false;
135}; 146};