diff options
| author | 2021-07-08 00:31:35 -0400 | |
|---|---|---|
| committer | 2021-07-31 22:28:04 -0400 | |
| commit | b2862e4772489f0ade41f79765d33ec4fc33712c (patch) | |
| tree | b3b360f64c03659ff97594ee4c64d0b48b758e76 | |
| parent | astc_decoder: Simplify Select2DPartition (diff) | |
| download | yuzu-b2862e4772489f0ade41f79765d33ec4fc33712c.tar.gz yuzu-b2862e4772489f0ade41f79765d33ec4fc33712c.tar.xz yuzu-b2862e4772489f0ade41f79765d33ec4fc33712c.zip | |
astc_decoder: Make use of uvec4 for payload data
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 122 |
1 files changed, 43 insertions, 79 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 8d8b64fbd..392f09c68 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -59,7 +59,7 @@ layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | |||
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { | 61 | layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { |
| 62 | uint astc_data[]; | 62 | uvec4 astc_data[]; |
| 63 | }; | 63 | }; |
| 64 | 64 | ||
| 65 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; | 65 | layout(binding = BINDING_OUTPUT_IMAGE, rgba8) uniform writeonly image2DArray dest_image; |
| @@ -141,32 +141,28 @@ const uint REPLICATE_7_BIT_TO_8_TABLE[128] = | |||
| 141 | // Input ASTC texture globals | 141 | // Input ASTC texture globals |
| 142 | uint current_index = 0; | 142 | uint current_index = 0; |
| 143 | int bitsread = 0; | 143 | int bitsread = 0; |
| 144 | uint total_bitsread = 0; | 144 | int total_bitsread = 0; |
| 145 | uint local_buff[16]; | 145 | uvec4 local_buff; |
| 146 | 146 | ||
| 147 | // Color data globals | 147 | // Color data globals |
| 148 | uint color_endpoint_data[16]; | 148 | uvec4 color_endpoint_data; |
| 149 | int color_bitsread = 0; | 149 | int color_bitsread = 0; |
| 150 | uint total_color_bitsread = 0; | ||
| 151 | int color_index = 0; | ||
| 152 | 150 | ||
| 153 | // Four values, two endpoints, four maximum paritions | 151 | // Four values, two endpoints, four maximum paritions |
| 154 | uint color_values[32]; | 152 | uint color_values[32]; |
| 155 | int colvals_index = 0; | 153 | int colvals_index = 0; |
| 156 | 154 | ||
| 157 | // Weight data globals | 155 | // Weight data globals |
| 158 | uint texel_weight_data[16]; | 156 | uvec4 texel_weight_data; |
| 159 | int texel_bitsread = 0; | 157 | int texel_bitsread = 0; |
| 160 | uint total_texel_bitsread = 0; | ||
| 161 | int texel_index = 0; | ||
| 162 | 158 | ||
| 163 | bool texel_flag = false; | 159 | bool texel_flag = false; |
| 164 | 160 | ||
| 165 | // Global "vectors" to be pushed into when decoding | 161 | // Global "vectors" to be pushed into when decoding |
| 166 | EncodingData result_vector[100]; | 162 | EncodingData result_vector[144]; |
| 167 | int result_index = 0; | 163 | int result_index = 0; |
| 168 | 164 | ||
| 169 | EncodingData texel_vector[100]; | 165 | EncodingData texel_vector[144]; |
| 170 | int texel_vector_index = 0; | 166 | int texel_vector_index = 0; |
| 171 | 167 | ||
| 172 | uint unquantized_texel_weights[2][144]; | 168 | uint unquantized_texel_weights[2][144]; |
| @@ -176,11 +172,6 @@ uint SwizzleOffset(uvec2 pos) { | |||
| 176 | return swizzle_table[pos.y * 64 + pos.x]; | 172 | return swizzle_table[pos.y * 64 + pos.x]; |
| 177 | } | 173 | } |
| 178 | 174 | ||
| 179 | uint ReadTexel(uint offset) { | ||
| 180 | // extract the 8-bit value from the 32-bit packed data. | ||
| 181 | return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); | ||
| 182 | } | ||
| 183 | |||
| 184 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | 175 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] |
| 185 | // is the same as [(num_bits - 1):0] and repeats all the way down. | 176 | // is the same as [(num_bits - 1):0] and repeats all the way down. |
| 186 | uint Replicate(uint val, uint num_bits, uint to_bit) { | 177 | uint Replicate(uint val, uint num_bits, uint to_bit) { |
| @@ -356,54 +347,37 @@ uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool sma | |||
| 356 | } | 347 | } |
| 357 | } | 348 | } |
| 358 | 349 | ||
| 359 | uint ReadBit() { | 350 | uint ExtractBits(uvec4 payload, int offset, int bits) { |
| 360 | if (current_index >= local_buff.length()) { | 351 | if (bits <= 0) { |
| 361 | return 0; | 352 | return 0; |
| 362 | } | 353 | } |
| 363 | uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1); | 354 | int last_offset = offset + bits - 1; |
| 364 | ++bitsread; | 355 | int shifted_offset = offset >> 5; |
| 365 | ++total_bitsread; | 356 | if ((last_offset >> 5) == shifted_offset) { |
| 366 | if (bitsread == 8) { | 357 | return bitfieldExtract(payload[shifted_offset], offset & 31, bits); |
| 367 | ++current_index; | ||
| 368 | bitsread = 0; | ||
| 369 | } | 358 | } |
| 370 | return bit; | 359 | int first_bits = 32 - (offset & 31); |
| 360 | int result_first = int(bitfieldExtract(payload[shifted_offset], offset & 31, first_bits)); | ||
| 361 | int result_second = int(bitfieldExtract(payload[shifted_offset + 1], 0, bits - first_bits)); | ||
| 362 | return result_first | (result_second << first_bits); | ||
| 371 | } | 363 | } |
| 372 | 364 | ||
| 373 | uint StreamBits(uint num_bits) { | 365 | uint StreamBits(uint num_bits) { |
| 374 | uint ret = 0; | 366 | int int_bits = int(num_bits); |
| 375 | for (uint i = 0; i < num_bits; i++) { | 367 | uint ret = ExtractBits(local_buff, total_bitsread, int_bits); |
| 376 | ret |= ((ReadBit() & 1) << i); | 368 | total_bitsread += int_bits; |
| 377 | } | ||
| 378 | return ret; | 369 | return ret; |
| 379 | } | 370 | } |
| 380 | 371 | ||
| 381 | uint ReadColorBit() { | ||
| 382 | uint bit = 0; | ||
| 383 | if (texel_flag) { | ||
| 384 | bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1); | ||
| 385 | ++texel_bitsread; | ||
| 386 | ++total_texel_bitsread; | ||
| 387 | if (texel_bitsread == 8) { | ||
| 388 | ++texel_index; | ||
| 389 | texel_bitsread = 0; | ||
| 390 | } | ||
| 391 | } else { | ||
| 392 | bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1); | ||
| 393 | ++color_bitsread; | ||
| 394 | ++total_color_bitsread; | ||
| 395 | if (color_bitsread == 8) { | ||
| 396 | ++color_index; | ||
| 397 | color_bitsread = 0; | ||
| 398 | } | ||
| 399 | } | ||
| 400 | return bit; | ||
| 401 | } | ||
| 402 | |||
| 403 | uint StreamColorBits(uint num_bits) { | 372 | uint StreamColorBits(uint num_bits) { |
| 404 | uint ret = 0; | 373 | uint ret = 0; |
| 405 | for (uint i = 0; i < num_bits; i++) { | 374 | int int_bits = int(num_bits); |
| 406 | ret |= ((ReadColorBit() & 1) << i); | 375 | if (texel_flag) { |
| 376 | ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits); | ||
| 377 | texel_bitsread += int_bits; | ||
| 378 | } else { | ||
| 379 | ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits); | ||
| 380 | color_bitsread += int_bits; | ||
| 407 | } | 381 | } |
| 408 | return ret; | 382 | return ret; |
| 409 | } | 383 | } |
| @@ -1006,7 +980,7 @@ int FindLayout(uint mode) { | |||
| 1006 | return 5; | 980 | return 5; |
| 1007 | } | 981 | } |
| 1008 | 982 | ||
| 1009 | TexelWeightParams DecodeBlockInfo(uint block_index) { | 983 | TexelWeightParams DecodeBlockInfo() { |
| 1010 | TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false); | 984 | TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false); |
| 1011 | uint mode = StreamBits(11); | 985 | uint mode = StreamBits(11); |
| 1012 | if ((mode & 0x1ff) == 0x1fc) { | 986 | if ((mode & 0x1ff) == 0x1fc) { |
| @@ -1122,8 +1096,8 @@ void FillVoidExtentLDR(ivec3 coord) { | |||
| 1122 | } | 1096 | } |
| 1123 | } | 1097 | } |
| 1124 | 1098 | ||
| 1125 | void DecompressBlock(ivec3 coord, uint block_index) { | 1099 | void DecompressBlock(ivec3 coord) { |
| 1126 | TexelWeightParams params = DecodeBlockInfo(block_index); | 1100 | TexelWeightParams params = DecodeBlockInfo(); |
| 1127 | if (params.error_state) { | 1101 | if (params.error_state) { |
| 1128 | FillError(coord); | 1102 | FillError(coord); |
| 1129 | return; | 1103 | return; |
| @@ -1190,7 +1164,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1190 | // Read color data... | 1164 | // Read color data... |
| 1191 | uint color_data_bits = remaining_bits; | 1165 | uint color_data_bits = remaining_bits; |
| 1192 | while (remaining_bits > 0) { | 1166 | while (remaining_bits > 0) { |
| 1193 | int nb = int(min(remaining_bits, 8U)); | 1167 | int nb = int(min(remaining_bits, 32U)); |
| 1194 | uint b = StreamBits(nb); | 1168 | uint b = StreamBits(nb); |
| 1195 | color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb)); | 1169 | color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb)); |
| 1196 | ++ced_pointer; | 1170 | ++ced_pointer; |
| @@ -1232,25 +1206,20 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1232 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); | 1206 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); |
| 1233 | } | 1207 | } |
| 1234 | 1208 | ||
| 1235 | for (uint i = 0; i < 16; i++) { | 1209 | texel_weight_data = local_buff; |
| 1236 | texel_weight_data[i] = local_buff[i]; | 1210 | texel_weight_data = bitfieldReverse(texel_weight_data).wzyx; |
| 1237 | } | ||
| 1238 | for (uint i = 0; i < 8; i++) { | ||
| 1239 | #define REVERSE_BYTE(b) ((b * 0x0802U & 0x22110U) | (b * 0x8020U & 0x88440U)) * 0x10101U >> 16 | ||
| 1240 | uint a = REVERSE_BYTE(texel_weight_data[i]); | ||
| 1241 | uint b = REVERSE_BYTE(texel_weight_data[15 - i]); | ||
| 1242 | #undef REVERSE_BYTE | ||
| 1243 | texel_weight_data[i] = uint(bitfieldExtract(b, 0, 8)); | ||
| 1244 | texel_weight_data[15 - i] = uint(bitfieldExtract(a, 0, 8)); | ||
| 1245 | } | ||
| 1246 | uint clear_byte_start = | 1211 | uint clear_byte_start = |
| 1247 | (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; | 1212 | (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; |
| 1248 | texel_weight_data[clear_byte_start - 1] = | 1213 | |
| 1249 | texel_weight_data[clear_byte_start - 1] & | 1214 | uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) & |
| 1250 | uint( | 1215 | uint( |
| 1251 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); | 1216 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); |
| 1252 | for (uint i = 0; i < 16 - clear_byte_start; i++) { | 1217 | uint vec_index = (clear_byte_start - 1) >> 2; |
| 1253 | texel_weight_data[clear_byte_start + i] = 0U; | 1218 | texel_weight_data[vec_index] = |
| 1219 | bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); | ||
| 1220 | for (uint i = clear_byte_start; i < 16; ++i) { | ||
| 1221 | uint idx = i >> 2; | ||
| 1222 | texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8); | ||
| 1254 | } | 1223 | } |
| 1255 | texel_flag = true; // use texel "vector" and bit stream in integer decoding | 1224 | texel_flag = true; // use texel "vector" and bit stream in integer decoding |
| 1256 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); | 1225 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); |
| @@ -1302,13 +1271,8 @@ void main() { | |||
| 1302 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { | 1271 | if (any(greaterThanEqual(coord, imageSize(dest_image)))) { |
| 1303 | return; | 1272 | return; |
| 1304 | } | 1273 | } |
| 1305 | uint block_index = | ||
| 1306 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; | ||
| 1307 | |||
| 1308 | current_index = 0; | 1274 | current_index = 0; |
| 1309 | bitsread = 0; | 1275 | bitsread = 0; |
| 1310 | for (int i = 0; i < 16; i++) { | 1276 | local_buff = astc_data[offset / 16]; |
| 1311 | local_buff[i] = ReadTexel(offset + i); | 1277 | DecompressBlock(coord); |
| 1312 | } | ||
| 1313 | DecompressBlock(coord, block_index); | ||
| 1314 | } | 1278 | } |