summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp122
1 files changed, 43 insertions, 79 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 8d8b64fbd..392f09c68 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -59,7 +59,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
59}; 59};
60 60
61layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { 61layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 {
62 uint astc_data[]; 62 uvec4 astc_data[];
63}; 63};
64 64
65layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; 65layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image;
@@ -141,32 +141,28 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] =
141// Input ASTC texture globals 141// Input ASTC texture globals
142uint current_index = 0; 142uint current_index = 0;
143int bitsread = 0; 143int bitsread = 0;
144uint total_bitsread = 0; 144int total_bitsread = 0;
145uint local_buff[16]; 145uvec4 local_buff;
146 146
147// Color data globals 147// Color data globals
148uint color_endpoint_data[16]; 148uvec4 color_endpoint_data;
149int color_bitsread = 0; 149int color_bitsread = 0;
150uint total_color_bitsread = 0;
151int color_index = 0;
152 150
153// Four values, two endpoints, four maximum paritions 151// Four values, two endpoints, four maximum paritions
154uint color_values[32]; 152uint color_values[32];
155int colvals_index = 0; 153int colvals_index = 0;
156 154
157// Weight data globals 155// Weight data globals
158uint texel_weight_data[16]; 156uvec4 texel_weight_data;
159int texel_bitsread = 0; 157int texel_bitsread = 0;
160uint total_texel_bitsread = 0;
161int texel_index = 0;
162 158
163bool texel_flag = false; 159bool texel_flag = false;
164 160
165// Global "vectors" to be pushed into when decoding 161// Global "vectors" to be pushed into when decoding
166EncodingData result_vector[100]; 162EncodingData result_vector[144];
167int result_index = 0; 163int result_index = 0;
168 164
169EncodingData texel_vector[100]; 165EncodingData texel_vector[144];
170int texel_vector_index = 0; 166int texel_vector_index = 0;
171 167
172uint unquantized_texel_weights[2][144]; 168uint unquantized_texel_weights[2][144];
@@ -176,11 +172,6 @@ uint SwizzleOffset(uvec2 pos) {
176 return swizzle_table[pos.y * 64 + pos.x]; 172 return swizzle_table[pos.y * 64 + pos.x];
177} 173}
178 174
179uint ReadTexel(uint offset) {
180 // extract the 8-bit value from the 32-bit packed data.
181 return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8);
182}
183
184// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] 175// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
185// is the same as [(num_bits - 1):0] and repeats all the way down. 176// is the same as [(num_bits - 1):0] and repeats all the way down.
186uint Replicate(uint val, uint num_bits, uint to_bit) { 177uint Replicate(uint val, uint num_bits, uint to_bit) {
@@ -356,54 +347,37 @@ uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool sma
356 } 347 }
357} 348}
358 349
359uint ReadBit() { 350uint ExtractBits(uvec4 payload, int offset, int bits) {
360 if (current_index >= local_buff.length()) { 351 if (bits <= 0) {
361 return 0; 352 return 0;
362 } 353 }
363 uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1); 354 int last_offset = offset + bits - 1;
364 ++bitsread; 355 int shifted_offset = offset >> 5;
365 ++total_bitsread; 356 if ((last_offset >> 5) == shifted_offset) {
366 if (bitsread == 8) { 357 return bitfieldExtract(payload[shifted_offset], offset & 31, bits);
367 ++current_index;
368 bitsread = 0;
369 } 358 }
370 return bit; 359 int first_bits = 32 - (offset & 31);
360 int result_first = int(bitfieldExtract(payload[shifted_offset], offset & 31, first_bits));
361 int result_second = int(bitfieldExtract(payload[shifted_offset + 1], 0, bits - first_bits));
362 return result_first | (result_second << first_bits);
371} 363}
372 364
373uint StreamBits(uint num_bits) { 365uint StreamBits(uint num_bits) {
374 uint ret = 0; 366 int int_bits = int(num_bits);
375 for (uint i = 0; i < num_bits; i++) { 367 uint ret = ExtractBits(local_buff, total_bitsread, int_bits);
376 ret |= ((ReadBit() & 1) << i); 368 total_bitsread += int_bits;
377 }
378 return ret; 369 return ret;
379} 370}
380 371
381uint ReadColorBit() {
382 uint bit = 0;
383 if (texel_flag) {
384 bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1);
385 ++texel_bitsread;
386 ++total_texel_bitsread;
387 if (texel_bitsread == 8) {
388 ++texel_index;
389 texel_bitsread = 0;
390 }
391 } else {
392 bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1);
393 ++color_bitsread;
394 ++total_color_bitsread;
395 if (color_bitsread == 8) {
396 ++color_index;
397 color_bitsread = 0;
398 }
399 }
400 return bit;
401}
402
403uint StreamColorBits(uint num_bits) { 372uint StreamColorBits(uint num_bits) {
404 uint ret = 0; 373 uint ret = 0;
405 for (uint i = 0; i < num_bits; i++) { 374 int int_bits = int(num_bits);
406 ret |= ((ReadColorBit() & 1) << i); 375 if (texel_flag) {
376 ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits);
377 texel_bitsread += int_bits;
378 } else {
379 ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
380 color_bitsread += int_bits;
407 } 381 }
408 return ret; 382 return ret;
409} 383}
@@ -1006,7 +980,7 @@ int FindLayout(uint mode) {
1006 return 5; 980 return 5;
1007} 981}
1008 982
1009TexelWeightParams DecodeBlockInfo(uint block_index) { 983TexelWeightParams DecodeBlockInfo() {
1010 TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false); 984 TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false);
1011 uint mode = StreamBits(11); 985 uint mode = StreamBits(11);
1012 if ((mode & 0x1ff) == 0x1fc) { 986 if ((mode & 0x1ff) == 0x1fc) {
@@ -1122,8 +1096,8 @@ void FillVoidExtentLDR(ivec3 coord) {
1122 } 1096 }
1123} 1097}
1124 1098
1125void DecompressBlock(ivec3 coord, uint block_index) { 1099void DecompressBlock(ivec3 coord) {
1126 TexelWeightParams params = DecodeBlockInfo(block_index); 1100 TexelWeightParams params = DecodeBlockInfo();
1127 if (params.error_state) { 1101 if (params.error_state) {
1128 FillError(coord); 1102 FillError(coord);
1129 return; 1103 return;
@@ -1190,7 +1164,7 @@ void DecompressBlock(ivec3 coord, uint block_index) {
1190 // Read color data... 1164 // Read color data...
1191 uint color_data_bits = remaining_bits; 1165 uint color_data_bits = remaining_bits;
1192 while (remaining_bits > 0) { 1166 while (remaining_bits > 0) {
1193 int nb = int(min(remaining_bits, 8U)); 1167 int nb = int(min(remaining_bits, 32U));
1194 uint b = StreamBits(nb); 1168 uint b = StreamBits(nb);
1195 color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb)); 1169 color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb));
1196 ++ced_pointer; 1170 ++ced_pointer;
@@ -1232,25 +1206,20 @@ void DecompressBlock(ivec3 coord, uint block_index) {
1232 ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); 1206 ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);
1233 } 1207 }
1234 1208
1235 for (uint i = 0; i < 16; i++) { 1209 texel_weight_data = local_buff;
1236 texel_weight_data[i] = local_buff[i]; 1210 texel_weight_data = bitfieldReverse(texel_weight_data).wzyx;
1237 }
1238 for (uint i = 0; i < 8; i++) {
1239#define REVERSE_BYTE(b) ((b * 0x0802U & 0x22110U) | (b * 0x8020U & 0x88440U)) * 0x10101U >> 16
1240 uint a = REVERSE_BYTE(texel_weight_data[i]);
1241 uint b = REVERSE_BYTE(texel_weight_data[15 - i]);
1242#undef REVERSE_BYTE
1243 texel_weight_data[i] = uint(bitfieldExtract(b, 0, 8));
1244 texel_weight_data[15 - i] = uint(bitfieldExtract(a, 0, 8));
1245 }
1246 uint clear_byte_start = 1211 uint clear_byte_start =
1247 (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; 1212 (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;
1248 texel_weight_data[clear_byte_start - 1] = 1213
1249 texel_weight_data[clear_byte_start - 1] & 1214 uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) &
1250 uint( 1215 uint(
1251 ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); 1216 ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));
1252 for (uint i = 0; i < 16 - clear_byte_start; i++) { 1217 uint vec_index = (clear_byte_start - 1) >> 2;
1253 texel_weight_data[clear_byte_start + i] = 0U; 1218 texel_weight_data[vec_index] =
1219 bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
1220 for (uint i = clear_byte_start; i < 16; ++i) {
1221 uint idx = i >> 2;
1222 texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8);
1254 } 1223 }
1255 texel_flag = true; // use texel "vector" and bit stream in integer decoding 1224 texel_flag = true; // use texel "vector" and bit stream in integer decoding
1256 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); 1225 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
@@ -1302,13 +1271,8 @@ void main() {
1302 if (any(greaterThanEqual(coord, imageSize(dest_image)))) { 1271 if (any(greaterThanEqual(coord, imageSize(dest_image)))) {
1303 return; 1272 return;
1304 } 1273 }
1305 uint block_index =
1306 pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x;
1307
1308 current_index = 0; 1274 current_index = 0;
1309 bitsread = 0; 1275 bitsread = 0;
1310 for (int i = 0; i < 16; i++) { 1276 local_buff = astc_data[offset / 16];
1311 local_buff[i] = ReadTexel(offset + i); 1277 DecompressBlock(coord);
1312 }
1313 DecompressBlock(coord, block_index);
1314} 1278}