summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp46
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp67
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h5
4 files changed, 25 insertions, 109 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 392f09c68..74ce058a9 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -10,8 +10,7 @@
10#define END_PUSH_CONSTANTS }; 10#define END_PUSH_CONSTANTS };
11#define UNIFORM(n) 11#define UNIFORM(n)
12#define BINDING_INPUT_BUFFER 0 12#define BINDING_INPUT_BUFFER 0
13#define BINDING_SWIZZLE_BUFFER 1 13#define BINDING_OUTPUT_IMAGE 1
14#define BINDING_OUTPUT_IMAGE 2
15 14
16#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv 15#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
17 16
@@ -19,7 +18,6 @@
19#define END_PUSH_CONSTANTS 18#define END_PUSH_CONSTANTS
20#define UNIFORM(n) layout(location = n) uniform 19#define UNIFORM(n) layout(location = n) uniform
21#define BINDING_INPUT_BUFFER 0 20#define BINDING_INPUT_BUFFER 0
22#define BINDING_SWIZZLE_BUFFER 1
23#define BINDING_OUTPUT_IMAGE 0 21#define BINDING_OUTPUT_IMAGE 0
24 22
25#endif 23#endif
@@ -28,13 +26,11 @@ layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
28 26
29BEGIN_PUSH_CONSTANTS 27BEGIN_PUSH_CONSTANTS
30UNIFORM(1) uvec2 block_dims; 28UNIFORM(1) uvec2 block_dims;
31 29UNIFORM(2) uint layer_stride;
32UNIFORM(2) uint bytes_per_block_log2; 30UNIFORM(3) uint block_size;
33UNIFORM(3) uint layer_stride; 31UNIFORM(4) uint x_shift;
34UNIFORM(4) uint block_size; 32UNIFORM(5) uint block_height;
35UNIFORM(5) uint x_shift; 33UNIFORM(6) uint block_height_mask;
36UNIFORM(6) uint block_height;
37UNIFORM(7) uint block_height_mask;
38END_PUSH_CONSTANTS 34END_PUSH_CONSTANTS
39 35
40struct EncodingData { 36struct EncodingData {
@@ -53,35 +49,17 @@ struct TexelWeightParams {
53 bool void_extent_hdr; 49 bool void_extent_hdr;
54}; 50};
55 51
56// Swizzle data
57layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
58 uint swizzle_table[];
59};
60
61layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { 52layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
62 uvec4 astc_data[]; 53 uvec4 astc_data[];
63}; 54};
64 55
65layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; 56layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
66 57
67const uint GOB_SIZE_X = 64;
68const uint GOB_SIZE_Y = 8;
69const uint GOB_SIZE_Z = 1;
70const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
71
72const uint GOB_SIZE_X_SHIFT = 6; 58const uint GOB_SIZE_X_SHIFT = 6;
73const uint GOB_SIZE_Y_SHIFT = 3; 59const uint GOB_SIZE_Y_SHIFT = 3;
74const uint GOB_SIZE_Z_SHIFT = 0; 60const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT;
75const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
76
77const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
78
79const int BLOCK_SIZE_IN_BYTES = 16;
80 61
81const int BLOCK_INFO_ERROR = 0; 62const uint BYTES_PER_BLOCK_LOG2 = 4;
82const int BLOCK_INFO_VOID_EXTENT_HDR = 1;
83const int BLOCK_INFO_VOID_EXTENT_LDR = 2;
84const int BLOCK_INFO_NORMAL = 3;
85 63
86const int JUST_BITS = 0; 64const int JUST_BITS = 0;
87const int QUINT = 1; 65const int QUINT = 1;
@@ -168,8 +146,10 @@ int texel_vector_index = 0;
168uint unquantized_texel_weights[2][144]; 146uint unquantized_texel_weights[2][144];
169 147
170uint SwizzleOffset(uvec2 pos) { 148uint SwizzleOffset(uvec2 pos) {
171 pos = pos & SWIZZLE_MASK; 149 uint x = pos.x;
172 return swizzle_table[pos.y * 64 + pos.x]; 150 uint y = pos.y;
151 return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
152 (y % 2) * 16 + (x % 16);
173} 153}
174 154
175// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] 155// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
@@ -1253,7 +1233,7 @@ void DecompressBlock(ivec3 coord) {
1253 1233
1254void main() { 1234void main() {
1255 uvec3 pos = gl_GlobalInvocationID; 1235 uvec3 pos = gl_GlobalInvocationID;
1256 pos.x <<= bytes_per_block_log2; 1236 pos.x <<= BYTES_PER_BLOCK_LOG2;
1257 1237
1258 // Read as soon as possible due to its latency 1238 // Read as soon as possible due to its latency
1259 const uint swizzle = SwizzleOffset(pos.xy); 1239 const uint swizzle = SwizzleOffset(pos.xy);
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index a2b264700..4e6f7cb00 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -68,7 +68,6 @@ UtilShaders::~UtilShaders() = default;
68void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, 68void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
69 std::span<const VideoCommon::SwizzleParameters> swizzles) { 69 std::span<const VideoCommon::SwizzleParameters> swizzles) {
70 static constexpr GLuint BINDING_INPUT_BUFFER = 0; 70 static constexpr GLuint BINDING_INPUT_BUFFER = 0;
71 static constexpr GLuint BINDING_SWIZZLE_BUFFER = 1;
72 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 71 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
73 72
74 const Extent2D tile_size{ 73 const Extent2D tile_size{
@@ -76,10 +75,9 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
76 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), 75 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
77 }; 76 };
78 program_manager.BindComputeProgram(astc_decoder_program.handle); 77 program_manager.BindComputeProgram(astc_decoder_program.handle);
79 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
80
81 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 78 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
82 glUniform2ui(1, tile_size.width, tile_size.height); 79 glUniform2ui(1, tile_size.width, tile_size.height);
80
83 // Ensure buffer data is valid before dispatching 81 // Ensure buffer data is valid before dispatching
84 glFlush(); 82 glFlush();
85 for (const SwizzleParameters& swizzle : swizzles) { 83 for (const SwizzleParameters& swizzle : swizzles) {
@@ -90,13 +88,13 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
90 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); 88 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
91 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 89 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
92 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 90 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
91 ASSERT(params.bytes_per_block_log2 == 4);
93 92
94 glUniform1ui(2, params.bytes_per_block_log2); 93 glUniform1ui(2, params.layer_stride);
95 glUniform1ui(3, params.layer_stride); 94 glUniform1ui(3, params.block_size);
96 glUniform1ui(4, params.block_size); 95 glUniform1ui(4, params.x_shift);
97 glUniform1ui(5, params.x_shift); 96 glUniform1ui(5, params.block_height);
98 glUniform1ui(6, params.block_height); 97 glUniform1ui(6, params.block_height_mask);
99 glUniform1ui(7, params.block_height_mask);
100 98
101 // ASTC texture data 99 // ASTC texture data
102 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, 100 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 328813a57..d13d58e8c 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -34,9 +34,8 @@ using Tegra::Texture::SWIZZLE_TABLE;
34namespace { 34namespace {
35 35
36constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0; 36constexpr u32 ASTC_BINDING_INPUT_BUFFER = 0;
37constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 1; 37constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 1;
38constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 2; 38constexpr size_t ASTC_NUM_BINDINGS = 2;
39constexpr size_t ASTC_NUM_BINDINGS = 3;
40 39
41template <size_t size> 40template <size_t size>
42inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ 41inline constexpr VkPushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
@@ -81,13 +80,6 @@ constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCR
81 .pImmutableSamplers = nullptr, 80 .pImmutableSamplers = nullptr,
82 }, 81 },
83 { 82 {
84 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
85 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
86 .descriptorCount = 1,
87 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
88 .pImmutableSamplers = nullptr,
89 },
90 {
91 .binding = ASTC_BINDING_OUTPUT_IMAGE, 83 .binding = ASTC_BINDING_OUTPUT_IMAGE,
92 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 84 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
93 .descriptorCount = 1, 85 .descriptorCount = 1,
@@ -98,12 +90,12 @@ constexpr std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> ASTC_DESCR
98 90
99constexpr DescriptorBankInfo ASTC_BANK_INFO{ 91constexpr DescriptorBankInfo ASTC_BANK_INFO{
100 .uniform_buffers = 0, 92 .uniform_buffers = 0,
101 .storage_buffers = 2, 93 .storage_buffers = 1,
102 .texture_buffers = 0, 94 .texture_buffers = 0,
103 .image_buffers = 0, 95 .image_buffers = 0,
104 .textures = 0, 96 .textures = 0,
105 .images = 1, 97 .images = 1,
106 .score = 3, 98 .score = 2,
107}; 99};
108 100
109constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{ 101constexpr VkDescriptorUpdateTemplateEntryKHR INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE{
@@ -126,14 +118,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
126 .stride = sizeof(DescriptorUpdateEntry), 118 .stride = sizeof(DescriptorUpdateEntry),
127 }, 119 },
128 { 120 {
129 .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
130 .dstArrayElement = 0,
131 .descriptorCount = 1,
132 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
133 .offset = ASTC_BINDING_SWIZZLE_BUFFER * sizeof(DescriptorUpdateEntry),
134 .stride = sizeof(DescriptorUpdateEntry),
135 },
136 {
137 .dstBinding = ASTC_BINDING_OUTPUT_IMAGE, 121 .dstBinding = ASTC_BINDING_OUTPUT_IMAGE,
138 .dstArrayElement = 0, 122 .dstArrayElement = 0,
139 .descriptorCount = 1, 123 .descriptorCount = 1,
@@ -145,7 +129,6 @@ constexpr std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
145 129
146struct AstcPushConstants { 130struct AstcPushConstants {
147 std::array<u32, 2> blocks_dims; 131 std::array<u32, 2> blocks_dims;
148 u32 bytes_per_block_log2;
149 u32 layer_stride; 132 u32 layer_stride;
150 u32 block_size; 133 u32 block_size;
151 u32 x_shift; 134 u32 x_shift;
@@ -336,42 +319,6 @@ ASTCDecoderPass::ASTCDecoderPass(const Device& device_, VKScheduler& scheduler_,
336 319
337ASTCDecoderPass::~ASTCDecoderPass() = default; 320ASTCDecoderPass::~ASTCDecoderPass() = default;
338 321
339void ASTCDecoderPass::MakeDataBuffer() {
340 constexpr size_t TOTAL_BUFFER_SIZE = sizeof(SWIZZLE_TABLE);
341 data_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
342 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
343 .pNext = nullptr,
344 .flags = 0,
345 .size = TOTAL_BUFFER_SIZE,
346 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
347 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
348 .queueFamilyIndexCount = 0,
349 .pQueueFamilyIndices = nullptr,
350 });
351 data_buffer_commit = memory_allocator.Commit(data_buffer, MemoryUsage::Upload);
352
353 const auto staging_ref = staging_buffer_pool.Request(TOTAL_BUFFER_SIZE, MemoryUsage::Upload);
354 std::memcpy(staging_ref.mapped_span.data(), &SWIZZLE_TABLE, sizeof(SWIZZLE_TABLE));
355
356 scheduler.Record([src = staging_ref.buffer, offset = staging_ref.offset, dst = *data_buffer,
357 TOTAL_BUFFER_SIZE](vk::CommandBuffer cmdbuf) {
358 static constexpr VkMemoryBarrier write_barrier{
359 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
360 .pNext = nullptr,
361 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
362 .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
363 };
364 const VkBufferCopy copy{
365 .srcOffset = offset,
366 .dstOffset = 0,
367 .size = TOTAL_BUFFER_SIZE,
368 };
369 cmdbuf.CopyBuffer(src, dst, copy);
370 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
371 0, write_barrier);
372 });
373}
374
375void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, 322void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
376 std::span<const VideoCommon::SwizzleParameters> swizzles) { 323 std::span<const VideoCommon::SwizzleParameters> swizzles) {
377 using namespace VideoCommon::Accelerated; 324 using namespace VideoCommon::Accelerated;
@@ -380,9 +327,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
380 VideoCore::Surface::DefaultBlockHeight(image.info.format), 327 VideoCore::Surface::DefaultBlockHeight(image.info.format),
381 }; 328 };
382 scheduler.RequestOutsideRenderPassOperationContext(); 329 scheduler.RequestOutsideRenderPassOperationContext();
383 if (!data_buffer) {
384 MakeDataBuffer();
385 }
386 const VkPipeline vk_pipeline = *pipeline; 330 const VkPipeline vk_pipeline = *pipeline;
387 const VkImageAspectFlags aspect_mask = image.AspectMask(); 331 const VkImageAspectFlags aspect_mask = image.AspectMask();
388 const VkImage vk_image = image.Handle(); 332 const VkImage vk_image = image.Handle();
@@ -421,7 +365,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
421 update_descriptor_queue.Acquire(); 365 update_descriptor_queue.Acquire();
422 update_descriptor_queue.AddBuffer(map.buffer, input_offset, 366 update_descriptor_queue.AddBuffer(map.buffer, input_offset,
423 image.guest_size_bytes - swizzle.buffer_offset); 367 image.guest_size_bytes - swizzle.buffer_offset);
424 update_descriptor_queue.AddBuffer(*data_buffer, 0, sizeof(SWIZZLE_TABLE));
425 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); 368 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
426 const void* const descriptor_data{update_descriptor_queue.UpdateData()}; 369 const void* const descriptor_data{update_descriptor_queue.UpdateData()};
427 370
@@ -429,11 +372,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
429 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); 372 const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
430 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0})); 373 ASSERT(params.origin == (std::array<u32, 3>{0, 0, 0}));
431 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0})); 374 ASSERT(params.destination == (std::array<s32, 3>{0, 0, 0}));
375 ASSERT(params.bytes_per_block_log2 == 4);
432 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, 376 scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims,
433 params, descriptor_data](vk::CommandBuffer cmdbuf) { 377 params, descriptor_data](vk::CommandBuffer cmdbuf) {
434 const AstcPushConstants uniforms{ 378 const AstcPushConstants uniforms{
435 .blocks_dims = block_dims, 379 .blocks_dims = block_dims,
436 .bytes_per_block_log2 = params.bytes_per_block_log2,
437 .layer_stride = params.layer_stride, 380 .layer_stride = params.layer_stride,
438 .block_size = params.block_size, 381 .block_size = params.block_size,
439 .x_shift = params.x_shift, 382 .x_shift = params.x_shift,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 114aef2bd..c7b92cce0 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -96,15 +96,10 @@ public:
96 std::span<const VideoCommon::SwizzleParameters> swizzles); 96 std::span<const VideoCommon::SwizzleParameters> swizzles);
97 97
98private: 98private:
99 void MakeDataBuffer();
100
101 VKScheduler& scheduler; 99 VKScheduler& scheduler;
102 StagingBufferPool& staging_buffer_pool; 100 StagingBufferPool& staging_buffer_pool;
103 VKUpdateDescriptorQueue& update_descriptor_queue; 101 VKUpdateDescriptorQueue& update_descriptor_queue;
104 MemoryAllocator& memory_allocator; 102 MemoryAllocator& memory_allocator;
105
106 vk::Buffer data_buffer;
107 MemoryCommit data_buffer_commit;
108}; 103};
109 104
110} // namespace Vulkan 105} // namespace Vulkan