summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp15
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp39
-rw-r--r--src/video_core/textures/astc.cpp2
-rw-r--r--src/video_core/textures/astc.h2
5 files changed, 16 insertions, 47 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index eaba1b103..71327e233 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -14,9 +14,8 @@
14#define BINDING_6_TO_8_BUFFER 2 14#define BINDING_6_TO_8_BUFFER 2
15#define BINDING_7_TO_8_BUFFER 3 15#define BINDING_7_TO_8_BUFFER 3
16#define BINDING_8_TO_8_BUFFER 4 16#define BINDING_8_TO_8_BUFFER 4
17#define BINDING_BYTE_TO_16_BUFFER 5 17#define BINDING_SWIZZLE_BUFFER 5
18#define BINDING_SWIZZLE_BUFFER 6 18#define BINDING_OUTPUT_IMAGE 6
19#define BINDING_OUTPUT_IMAGE 7
20 19
21#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv 20#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
22 21
@@ -29,7 +28,6 @@
29#define BINDING_6_TO_8_BUFFER 3 28#define BINDING_6_TO_8_BUFFER 3
30#define BINDING_7_TO_8_BUFFER 4 29#define BINDING_7_TO_8_BUFFER 4
31#define BINDING_8_TO_8_BUFFER 5 30#define BINDING_8_TO_8_BUFFER 5
32#define BINDING_BYTE_TO_16_BUFFER 6
33#define BINDING_OUTPUT_IMAGE 0 31#define BINDING_OUTPUT_IMAGE 0
34 32
35#endif 33#endif
@@ -86,9 +84,6 @@ layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_
86layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { 84layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 {
87 uint REPLICATE_8_BIT_TO_8_TABLE[]; 85 uint REPLICATE_8_BIT_TO_8_TABLE[];
88}; 86};
89layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 {
90 uint REPLICATE_BYTE_TO_16_TABLE[];
91};
92 87
93layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; 88layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
94 89
@@ -207,8 +202,7 @@ uint Replicate(uint val, uint num_bits, uint to_bit) {
207} 202}
208 203
209uvec4 ReplicateByteTo16(uvec4 value) { 204uvec4 ReplicateByteTo16(uvec4 value) {
210 return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], 205 return value * 0x101;
211 REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]);
212} 206}
213 207
214uint ReplicateBitTo7(uint value) { 208uint ReplicateBitTo7(uint value) {
@@ -1327,6 +1321,9 @@ void main() {
1327 offset += swizzle; 1321 offset += swizzle;
1328 1322
1329 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); 1323 const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1));
1324 if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
1325 return;
1326 }
1330 uint block_index = 1327 uint block_index =
1331 pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; 1328 pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
1332 1329
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 47fddcb6e..d57998cdc 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -83,7 +83,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
83 static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; 83 static constexpr GLuint BINDING_6_TO_8_BUFFER = 3;
84 static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; 84 static constexpr GLuint BINDING_7_TO_8_BUFFER = 4;
85 static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; 85 static constexpr GLuint BINDING_8_TO_8_BUFFER = 5;
86 static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6;
87 86
88 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 87 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
89 88
@@ -105,9 +104,6 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
105 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle, 104 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, astc_buffer.handle,
106 offsetof(AstcBufferData, replicate_8_to_8), 105 offsetof(AstcBufferData, replicate_8_to_8),
107 sizeof(AstcBufferData::replicate_8_to_8)); 106 sizeof(AstcBufferData::replicate_8_to_8));
108 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, astc_buffer.handle,
109 offsetof(AstcBufferData, replicate_byte_to_16),
110 sizeof(AstcBufferData::replicate_byte_to_16));
111 107
112 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 108 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
113 glUniform2ui(1, tile_size.width, tile_size.height); 109 glUniform2ui(1, tile_size.width, tile_size.height);
@@ -137,6 +133,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
137 133
138 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); 134 glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
139 } 135 }
136 glMemoryBarrier(GL_ALL_BARRIER_BITS);
140 program_manager.RestoreGuestCompute(); 137 program_manager.RestoreGuestCompute();
141} 138}
142 139
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index e11406e58..123bed794 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -40,9 +40,9 @@ constexpr u32 ASTC_BINDING_ENC_BUFFER = 1;
40constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2; 40constexpr u32 ASTC_BINDING_6_TO_8_BUFFER = 2;
41constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3; 41constexpr u32 ASTC_BINDING_7_TO_8_BUFFER = 3;
42constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4; 42constexpr u32 ASTC_BINDING_8_TO_8_BUFFER = 4;
43constexpr u32 ASTC_BINDING_BYTE_TO_16_BUFFER = 5; 43constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 5;
44constexpr u32 ASTC_BINDING_SWIZZLE_BUFFER = 6; 44constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 6;
45constexpr u32 ASTC_BINDING_OUTPUT_IMAGE = 7; 45constexpr size_t ASTC_NUM_BINDINGS = 7;
46 46
47VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { 47VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
48 return { 48 return {
@@ -71,7 +71,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBinding
71 }}; 71 }};
72} 72}
73 73
74std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() { 74std::array<VkDescriptorSetLayoutBinding, ASTC_NUM_BINDINGS> BuildASTCDescriptorSetBindings() {
75 return {{ 75 return {{
76 { 76 {
77 .binding = ASTC_BINDING_INPUT_BUFFER, 77 .binding = ASTC_BINDING_INPUT_BUFFER,
@@ -109,13 +109,6 @@ std::array<VkDescriptorSetLayoutBinding, 8> BuildASTCDescriptorSetBindings() {
109 .pImmutableSamplers = nullptr, 109 .pImmutableSamplers = nullptr,
110 }, 110 },
111 { 111 {
112 .binding = ASTC_BINDING_BYTE_TO_16_BUFFER,
113 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
114 .descriptorCount = 1,
115 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
116 .pImmutableSamplers = nullptr,
117 },
118 {
119 .binding = ASTC_BINDING_SWIZZLE_BUFFER, 112 .binding = ASTC_BINDING_SWIZZLE_BUFFER,
120 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 113 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
121 .descriptorCount = 1, 114 .descriptorCount = 1,
@@ -143,7 +136,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
143 }; 136 };
144} 137}
145 138
146std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateTemplateEntry() { 139std::array<VkDescriptorUpdateTemplateEntryKHR, ASTC_NUM_BINDINGS>
140BuildASTCPassDescriptorUpdateTemplateEntry() {
147 return {{ 141 return {{
148 { 142 {
149 .dstBinding = ASTC_BINDING_INPUT_BUFFER, 143 .dstBinding = ASTC_BINDING_INPUT_BUFFER,
@@ -186,14 +180,6 @@ std::array<VkDescriptorUpdateTemplateEntryKHR, 8> BuildASTCPassDescriptorUpdateT
186 .stride = sizeof(DescriptorUpdateEntry), 180 .stride = sizeof(DescriptorUpdateEntry),
187 }, 181 },
188 { 182 {
189 .dstBinding = ASTC_BINDING_BYTE_TO_16_BUFFER,
190 .dstArrayElement = 0,
191 .descriptorCount = 1,
192 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
193 .offset = ASTC_BINDING_BYTE_TO_16_BUFFER * sizeof(DescriptorUpdateEntry),
194 .stride = sizeof(DescriptorUpdateEntry),
195 },
196 {
197 .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER, 183 .dstBinding = ASTC_BINDING_SWIZZLE_BUFFER,
198 .dstArrayElement = 0, 184 .dstArrayElement = 0,
199 .descriptorCount = 1, 185 .descriptorCount = 1,
@@ -222,15 +208,6 @@ struct AstcPushConstants {
222 u32 block_height_mask; 208 u32 block_height_mask;
223}; 209};
224 210
225struct AstcBufferData {
226 decltype(SWIZZLE_TABLE) swizzle_table_buffer = SWIZZLE_TABLE;
227 decltype(EncodingsValues) encoding_values = EncodingsValues;
228 decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
229 decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
230 decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
231 decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
232} constexpr ASTC_BUFFER_DATA;
233
234} // Anonymous namespace 211} // Anonymous namespace
235 212
236VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool, 213VKComputePass::VKComputePass(const Device& device, VKDescriptorPool& descriptor_pool,
@@ -517,9 +494,6 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
517 sizeof(AstcBufferData::replicate_7_to_8)); 494 sizeof(AstcBufferData::replicate_7_to_8));
518 update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8), 495 update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, replicate_8_to_8),
519 sizeof(AstcBufferData::replicate_8_to_8)); 496 sizeof(AstcBufferData::replicate_8_to_8));
520 update_descriptor_queue.AddBuffer(*data_buffer,
521 offsetof(AstcBufferData, replicate_byte_to_16),
522 sizeof(AstcBufferData::replicate_byte_to_16));
523 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData), 497 update_descriptor_queue.AddBuffer(*data_buffer, sizeof(AstcBufferData),
524 sizeof(SWIZZLE_TABLE)); 498 sizeof(SWIZZLE_TABLE));
525 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); 499 update_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
@@ -569,6 +543,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
569 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 543 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
570 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); 544 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier);
571 }); 545 });
546 scheduler.Finish();
572} 547}
573 548
574} // namespace Vulkan 549} // namespace Vulkan
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 9b2177ebd..b6e2022f2 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -551,6 +551,8 @@ static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
551 } 551 }
552 } 552 }
553} 553}
554
555static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
554static constexpr u32 ReplicateByteTo16(std::size_t value) { 556static constexpr u32 ReplicateByteTo16(std::size_t value) {
555 return REPLICATE_BYTE_TO_16_TABLE[value]; 557 return REPLICATE_BYTE_TO_16_TABLE[value];
556} 558}
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index c1c37dfe7..441e8eb04 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -116,7 +116,6 @@ constexpr auto MakeReplicateTable() {
116 return table; 116 return table;
117} 117}
118 118
119constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
120constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); 119constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
121constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); 120constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
122constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); 121constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
@@ -126,7 +125,6 @@ struct AstcBufferData {
126 decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE; 125 decltype(REPLICATE_6_BIT_TO_8_TABLE) replicate_6_to_8 = REPLICATE_6_BIT_TO_8_TABLE;
127 decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE; 126 decltype(REPLICATE_7_BIT_TO_8_TABLE) replicate_7_to_8 = REPLICATE_7_BIT_TO_8_TABLE;
128 decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE; 127 decltype(REPLICATE_8_BIT_TO_8_TABLE) replicate_8_to_8 = REPLICATE_8_BIT_TO_8_TABLE;
129 decltype(REPLICATE_BYTE_TO_16_TABLE) replicate_byte_to_16 = REPLICATE_BYTE_TO_16_TABLE;
130} constexpr ASTC_BUFFER_DATA; 128} constexpr ASTC_BUFFER_DATA;
131 129
132void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, 130void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,