diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 1288 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 99 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 11 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 190 |
6 files changed, 1598 insertions, 2 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp new file mode 100644 index 000000000..070190a5c --- /dev/null +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -0,0 +1,1288 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 450 | ||
| 6 | |||
| 7 | #ifdef VULKAN | ||
| 8 | |||
| 9 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | ||
| 10 | #define END_PUSH_CONSTANTS }; | ||
| 11 | #define UNIFORM(n) | ||
| 12 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 13 | #define BINDING_INPUT_BUFFER 1 | ||
| 14 | #define BINDING_ENC_BUFFER 2 | ||
| 15 | #define BINDING_6_TO_8_BUFFER 3 | ||
| 16 | #define BINDING_7_TO_8_BUFFER 4 | ||
| 17 | #define BINDING_8_TO_8_BUFFER 5 | ||
| 18 | #define BINDING_BYTE_TO_16_BUFFER 6 | ||
| 19 | #define BINDING_OUTPUT_IMAGE 3 | ||
| 20 | |||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | ||
| 22 | |||
| 23 | #define BEGIN_PUSH_CONSTANTS | ||
| 24 | #define END_PUSH_CONSTANTS | ||
| 25 | #define UNIFORM(n) layout(location = n) uniform | ||
| 26 | #define BINDING_SWIZZLE_BUFFER 0 | ||
| 27 | #define BINDING_INPUT_BUFFER 1 | ||
| 28 | #define BINDING_ENC_BUFFER 2 | ||
| 29 | #define BINDING_6_TO_8_BUFFER 3 | ||
| 30 | #define BINDING_7_TO_8_BUFFER 4 | ||
| 31 | #define BINDING_8_TO_8_BUFFER 5 | ||
| 32 | #define BINDING_BYTE_TO_16_BUFFER 6 | ||
| 33 | #define BINDING_OUTPUT_IMAGE 0 | ||
| 34 | |||
| 35 | #endif | ||
| 36 | |||
| 37 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | ||
| 38 | |||
| 39 | BEGIN_PUSH_CONSTANTS | ||
| 40 | UNIFORM(0) uvec2 num_image_blocks; | ||
| 41 | UNIFORM(1) uvec2 block_dims; | ||
| 42 | UNIFORM(2) uint layer; | ||
| 43 | |||
| 44 | UNIFORM(3) uvec3 origin; | ||
| 45 | UNIFORM(4) ivec3 destination; | ||
| 46 | UNIFORM(5) uint bytes_per_block_log2; | ||
| 47 | UNIFORM(6) uint layer_stride; | ||
| 48 | UNIFORM(7) uint block_size; | ||
| 49 | UNIFORM(8) uint x_shift; | ||
| 50 | UNIFORM(9) uint block_height; | ||
| 51 | UNIFORM(10) uint block_height_mask; | ||
| 52 | |||
| 53 | END_PUSH_CONSTANTS | ||
| 54 | |||
| 55 | uint current_index = 0; | ||
| 56 | int bitsread = 0; | ||
| 57 | uint total_bitsread = 0; | ||
| 58 | uint local_buff[16]; | ||
| 59 | |||
| 60 | const int JustBits = 0; | ||
| 61 | const int Quint = 1; | ||
| 62 | const int Trit = 2; | ||
| 63 | |||
| 64 | struct EncodingData { | ||
| 65 | uint encoding; | ||
| 66 | uint num_bits; | ||
| 67 | uint bit_value; | ||
| 68 | uint quint_trit_value; | ||
| 69 | }; | ||
| 70 | |||
| 71 | struct TexelWeightParams { | ||
| 72 | uvec2 size; | ||
| 73 | bool dual_plane; | ||
| 74 | uint max_weight; | ||
| 75 | bool Error; | ||
| 76 | bool VoidExtentLDR; | ||
| 77 | bool VoidExtentHDR; | ||
| 78 | }; | ||
| 79 | |||
| 80 | // Swizzle data | ||
| 81 | layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable { | ||
| 82 | uint swizzle_table[]; | ||
| 83 | }; | ||
| 84 | |||
| 85 | layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { | ||
| 86 | uint astc_data[]; | ||
| 87 | }; | ||
| 88 | layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly image2D dest_image; | ||
| 89 | |||
| 90 | const uint GOB_SIZE_X = 64; | ||
| 91 | const uint GOB_SIZE_Y = 8; | ||
| 92 | const uint GOB_SIZE_Z = 1; | ||
| 93 | const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | ||
| 94 | |||
| 95 | const uint GOB_SIZE_X_SHIFT = 6; | ||
| 96 | const uint GOB_SIZE_Y_SHIFT = 3; | ||
| 97 | const uint GOB_SIZE_Z_SHIFT = 0; | ||
| 98 | const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | ||
| 99 | |||
| 100 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); | ||
| 101 | |||
| 102 | uint SwizzleOffset(uvec2 pos) { | ||
| 103 | pos = pos & SWIZZLE_MASK; | ||
| 104 | return swizzle_table[pos.y * 64 + pos.x]; | ||
| 105 | } | ||
| 106 | |||
| 107 | uint ReadTexel(uint offset) { | ||
| 108 | // extract the 8-bit value from the 32-bit packed data. | ||
| 109 | return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); | ||
| 110 | } | ||
| 111 | |||
| 112 | // ASTC Encodings data | ||
| 113 | layout(binding = BINDING_ENC_BUFFER, std430) readonly buffer EncodingsValues { | ||
| 114 | EncodingData encoding_values[256]; | ||
| 115 | }; | ||
| 116 | // ASTC Precompiled tables | ||
| 117 | layout(binding = BINDING_6_TO_8_BUFFER, std430) readonly buffer REPLICATE_6_BIT_TO_8 { | ||
| 118 | uint REPLICATE_6_BIT_TO_8_TABLE[]; | ||
| 119 | }; | ||
| 120 | layout(binding = BINDING_7_TO_8_BUFFER, std430) readonly buffer REPLICATE_7_BIT_TO_8 { | ||
| 121 | uint REPLICATE_7_BIT_TO_8_TABLE[]; | ||
| 122 | }; | ||
| 123 | layout(binding = BINDING_8_TO_8_BUFFER, std430) readonly buffer REPLICATE_8_BIT_TO_8 { | ||
| 124 | uint REPLICATE_8_BIT_TO_8_TABLE[]; | ||
| 125 | }; | ||
| 126 | layout(binding = BINDING_BYTE_TO_16_BUFFER, std430) readonly buffer REPLICATE_BYTE_TO_16 { | ||
| 127 | uint REPLICATE_BYTE_TO_16_TABLE[]; | ||
| 128 | }; | ||
| 129 | |||
| 130 | const int BLOCK_SIZE_IN_BYTES = 16; | ||
| 131 | |||
| 132 | const int BLOCK_INFO_ERROR = 0; | ||
| 133 | const int BLOCK_INFO_VOID_EXTENT_HDR = 1; | ||
| 134 | const int BLOCK_INFO_VOID_EXTENT_LDR = 2; | ||
| 135 | const int BLOCK_INFO_NORMAL = 3; | ||
| 136 | |||
| 137 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | ||
| 138 | // is the same as [(numBits - 1):0] and repeats all the way down. | ||
| 139 | uint Replicate(uint val, uint num_bits, uint to_bit) { | ||
| 140 | if (num_bits == 0) { | ||
| 141 | return 0; | ||
| 142 | } | ||
| 143 | if (to_bit == 0) { | ||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | const uint v = val & uint((1 << num_bits) - 1); | ||
| 147 | uint res = v; | ||
| 148 | uint reslen = num_bits; | ||
| 149 | while (reslen < to_bit) { | ||
| 150 | uint comp = 0; | ||
| 151 | if (num_bits > to_bit - reslen) { | ||
| 152 | uint newshift = to_bit - reslen; | ||
| 153 | comp = num_bits - newshift; | ||
| 154 | num_bits = newshift; | ||
| 155 | } | ||
| 156 | res = uint(res << num_bits); | ||
| 157 | res = uint(res | (v >> comp)); | ||
| 158 | reslen += num_bits; | ||
| 159 | } | ||
| 160 | return res; | ||
| 161 | } | ||
| 162 | |||
| 163 | uvec4 ReplicateByteTo16(uvec4 value) { | ||
| 164 | return uvec4(REPLICATE_BYTE_TO_16_TABLE[value.x], REPLICATE_BYTE_TO_16_TABLE[value.y], | ||
| 165 | REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); | ||
| 166 | } | ||
| 167 | |||
| 168 | const uint REPLICATE_BIT_TO_7_TABLE[2] = uint[](0, 127); | ||
| 169 | uint ReplicateBitTo7(uint value) { | ||
| 170 | return REPLICATE_BIT_TO_7_TABLE[value]; | ||
| 171 | ; | ||
| 172 | } | ||
| 173 | |||
| 174 | const uint REPLICATE_1_BIT_TO_9_TABLE[2] = uint[](0, 511); | ||
| 175 | uint ReplicateBitTo9(uint value) { | ||
| 176 | return REPLICATE_1_BIT_TO_9_TABLE[value]; | ||
| 177 | } | ||
| 178 | |||
| 179 | const uint REPLICATE_1_BIT_TO_8_TABLE[2] = uint[](0, 255); | ||
| 180 | const uint REPLICATE_2_BIT_TO_8_TABLE[4] = uint[](0, 85, 170, 255); | ||
| 181 | const uint REPLICATE_3_BIT_TO_8_TABLE[8] = uint[](0, 36, 73, 109, 146, 182, 219, 255); | ||
| 182 | const uint REPLICATE_4_BIT_TO_8_TABLE[16] = | ||
| 183 | uint[](0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255); | ||
| 184 | const uint REPLICATE_5_BIT_TO_8_TABLE[32] = | ||
| 185 | uint[](0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, | ||
| 186 | 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255); | ||
| 187 | |||
| 188 | uint FastReplicateTo8(uint value, uint num_bits) { | ||
| 189 | switch (num_bits) { | ||
| 190 | case 1: | ||
| 191 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 192 | case 2: | ||
| 193 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 194 | case 3: | ||
| 195 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 196 | case 4: | ||
| 197 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 198 | case 5: | ||
| 199 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 200 | case 6: | ||
| 201 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 202 | case 7: | ||
| 203 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 204 | case 8: | ||
| 205 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 206 | } | ||
| 207 | return Replicate(value, num_bits, 8); | ||
| 208 | } | ||
| 209 | |||
| 210 | const uint REPLICATE_1_BIT_TO_6_TABLE[2] = uint[](0, 63); | ||
| 211 | const uint REPLICATE_2_BIT_TO_6_TABLE[4] = uint[](0, 21, 42, 63); | ||
| 212 | const uint REPLICATE_3_BIT_TO_6_TABLE[8] = uint[](0, 9, 18, 27, 36, 45, 54, 63); | ||
| 213 | const uint REPLICATE_4_BIT_TO_6_TABLE[16] = | ||
| 214 | uint[](0, 4, 8, 12, 17, 21, 25, 29, 34, 38, 42, 46, 51, 55, 59, 63); | ||
| 215 | const uint REPLICATE_5_BIT_TO_6_TABLE[32] = | ||
| 216 | uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, | ||
| 217 | 47, 49, 51, 53, 55, 57, 59, 61, 63); | ||
| 218 | |||
| 219 | uint FastReplicateTo6(uint value, uint num_bits) { | ||
| 220 | switch (num_bits) { | ||
| 221 | case 1: | ||
| 222 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 223 | case 2: | ||
| 224 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 225 | case 3: | ||
| 226 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 227 | case 4: | ||
| 228 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 229 | case 5: | ||
| 230 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 231 | } | ||
| 232 | return Replicate(value, num_bits, 6); | ||
| 233 | } | ||
| 234 | |||
| 235 | uint hash52(uint p) { | ||
| 236 | p ^= p >> 15; | ||
| 237 | p -= p << 17; | ||
| 238 | p += p << 7; | ||
| 239 | p += p << 4; | ||
| 240 | p ^= p >> 5; | ||
| 241 | p += p << 16; | ||
| 242 | p ^= p >> 7; | ||
| 243 | p ^= p >> 3; | ||
| 244 | p ^= p << 6; | ||
| 245 | p ^= p >> 17; | ||
| 246 | return p; | ||
| 247 | } | ||
| 248 | |||
| 249 | uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bool small_block) { | ||
| 250 | if (1 == partition_count) | ||
| 251 | return 0; | ||
| 252 | |||
| 253 | if (small_block) { | ||
| 254 | x <<= 1; | ||
| 255 | y <<= 1; | ||
| 256 | z <<= 1; | ||
| 257 | } | ||
| 258 | |||
| 259 | seed += (partition_count - 1) * 1024; | ||
| 260 | |||
| 261 | uint rnum = hash52(uint(seed)); | ||
| 262 | uint seed1 = uint(rnum & 0xF); | ||
| 263 | uint seed2 = uint((rnum >> 4) & 0xF); | ||
| 264 | uint seed3 = uint((rnum >> 8) & 0xF); | ||
| 265 | uint seed4 = uint((rnum >> 12) & 0xF); | ||
| 266 | uint seed5 = uint((rnum >> 16) & 0xF); | ||
| 267 | uint seed6 = uint((rnum >> 20) & 0xF); | ||
| 268 | uint seed7 = uint((rnum >> 24) & 0xF); | ||
| 269 | uint seed8 = uint((rnum >> 28) & 0xF); | ||
| 270 | uint seed9 = uint((rnum >> 18) & 0xF); | ||
| 271 | uint seed10 = uint((rnum >> 22) & 0xF); | ||
| 272 | uint seed11 = uint((rnum >> 26) & 0xF); | ||
| 273 | uint seed12 = uint(((rnum >> 30) | (rnum << 2)) & 0xF); | ||
| 274 | |||
| 275 | seed1 = (seed1 * seed1); | ||
| 276 | seed2 = (seed2 * seed2); | ||
| 277 | seed3 = (seed3 * seed3); | ||
| 278 | seed4 = (seed4 * seed4); | ||
| 279 | seed5 = (seed5 * seed5); | ||
| 280 | seed6 = (seed6 * seed6); | ||
| 281 | seed7 = (seed7 * seed7); | ||
| 282 | seed8 = (seed8 * seed8); | ||
| 283 | seed9 = (seed9 * seed9); | ||
| 284 | seed10 = (seed10 * seed10); | ||
| 285 | seed11 = (seed11 * seed11); | ||
| 286 | seed12 = (seed12 * seed12); | ||
| 287 | |||
| 288 | int sh1, sh2, sh3; | ||
| 289 | if ((seed & 1) > 0) { | ||
| 290 | sh1 = (seed & 2) > 0 ? 4 : 5; | ||
| 291 | sh2 = (partition_count == 3) ? 6 : 5; | ||
| 292 | } else { | ||
| 293 | sh1 = (partition_count == 3) ? 6 : 5; | ||
| 294 | sh2 = (seed & 2) > 0 ? 4 : 5; | ||
| 295 | } | ||
| 296 | sh3 = (seed & 0x10) > 0 ? sh1 : sh2; | ||
| 297 | |||
| 298 | seed1 = (seed1 >> sh1); | ||
| 299 | seed2 = (seed2 >> sh2); | ||
| 300 | seed3 = (seed3 >> sh1); | ||
| 301 | seed4 = (seed4 >> sh2); | ||
| 302 | seed5 = (seed5 >> sh1); | ||
| 303 | seed6 = (seed6 >> sh2); | ||
| 304 | seed7 = (seed7 >> sh1); | ||
| 305 | seed8 = (seed8 >> sh2); | ||
| 306 | seed9 = (seed9 >> sh3); | ||
| 307 | seed10 = (seed10 >> sh3); | ||
| 308 | seed11 = (seed11 >> sh3); | ||
| 309 | seed12 = (seed12 >> sh3); | ||
| 310 | |||
| 311 | uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | ||
| 312 | uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | ||
| 313 | uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | ||
| 314 | uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | ||
| 315 | |||
| 316 | a &= 0x3F; | ||
| 317 | b &= 0x3F; | ||
| 318 | c &= 0x3F; | ||
| 319 | d &= 0x3F; | ||
| 320 | |||
| 321 | if (partition_count < 4) | ||
| 322 | d = 0; | ||
| 323 | if (partition_count < 3) | ||
| 324 | c = 0; | ||
| 325 | |||
| 326 | if (a >= b && a >= c && a >= d) | ||
| 327 | return 0; | ||
| 328 | else if (b >= c && b >= d) | ||
| 329 | return 1; | ||
| 330 | else if (c >= d) | ||
| 331 | return 2; | ||
| 332 | return 3; | ||
| 333 | } | ||
| 334 | |||
| 335 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) { | ||
| 336 | return SelectPartition(seed, x, y, 0, partition_count, small_block); | ||
| 337 | } | ||
| 338 | |||
| 339 | uint ReadBit() { | ||
| 340 | uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1); | ||
| 341 | bitsread++; | ||
| 342 | total_bitsread++; | ||
| 343 | if (bitsread == 8) { | ||
| 344 | current_index++; | ||
| 345 | bitsread = 0; | ||
| 346 | } | ||
| 347 | return bit; | ||
| 348 | } | ||
| 349 | |||
| 350 | uint StreamBits(uint num_bits) { | ||
| 351 | uint ret = 0; | ||
| 352 | for (uint i = 0; i < num_bits; i++) { | ||
| 353 | ret |= ((ReadBit() & 1) << i); | ||
| 354 | } | ||
| 355 | return ret; | ||
| 356 | } | ||
| 357 | |||
| 358 | // Define color data. | ||
| 359 | uint color_endpoint_data[16]; | ||
| 360 | int color_bitsread = 0; | ||
| 361 | uint total_color_bitsread = 0; | ||
| 362 | int color_index = 0; | ||
| 363 | |||
| 364 | // Define color data. | ||
| 365 | uint texel_weight_data[16]; | ||
| 366 | int texel_bitsread = 0; | ||
| 367 | uint total_texel_bitsread = 0; | ||
| 368 | int texel_index = 0; | ||
| 369 | |||
| 370 | bool texel_flag = false; | ||
| 371 | |||
| 372 | uint ReadColorBit() { | ||
| 373 | uint bit = 0; | ||
| 374 | if (texel_flag) { | ||
| 375 | bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1); | ||
| 376 | texel_bitsread++; | ||
| 377 | total_texel_bitsread++; | ||
| 378 | if (texel_bitsread == 8) { | ||
| 379 | texel_index++; | ||
| 380 | texel_bitsread = 0; | ||
| 381 | } | ||
| 382 | } else { | ||
| 383 | bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1); | ||
| 384 | color_bitsread++; | ||
| 385 | total_color_bitsread++; | ||
| 386 | if (color_bitsread == 8) { | ||
| 387 | color_index++; | ||
| 388 | color_bitsread = 0; | ||
| 389 | } | ||
| 390 | } | ||
| 391 | return bit; | ||
| 392 | } | ||
| 393 | |||
| 394 | uint StreamColorBits(uint num_bits) { | ||
| 395 | uint ret = 0; | ||
| 396 | for (uint i = 0; i < num_bits; i++) { | ||
| 397 | ret |= ((ReadColorBit() & 1) << i); | ||
| 398 | } | ||
| 399 | return ret; | ||
| 400 | } | ||
| 401 | |||
| 402 | EncodingData result_vector[100]; | ||
| 403 | int result_index = 0; | ||
| 404 | |||
| 405 | EncodingData texel_vector[100]; | ||
| 406 | int texel_vector_index = 0; | ||
| 407 | |||
| 408 | void ResultEmplaceBack(EncodingData val) { | ||
| 409 | if (texel_flag) { | ||
| 410 | texel_vector[texel_vector_index] = val; | ||
| 411 | texel_vector_index++; | ||
| 412 | } else { | ||
| 413 | result_vector[result_index] = val; | ||
| 414 | result_index++; | ||
| 415 | } | ||
| 416 | } | ||
| 417 | |||
| 418 | // Returns the number of bits required to encode n_vals values. | ||
| 419 | uint GetBitLength(uint n_vals, uint encoding_index) { | ||
| 420 | uint totalBits = encoding_values[encoding_index].num_bits * n_vals; | ||
| 421 | if (encoding_values[encoding_index].encoding == Trit) { | ||
| 422 | totalBits += (n_vals * 8 + 4) / 5; | ||
| 423 | } else if (encoding_values[encoding_index].encoding == Quint) { | ||
| 424 | totalBits += (n_vals * 7 + 2) / 3; | ||
| 425 | } | ||
| 426 | return totalBits; | ||
| 427 | } | ||
| 428 | |||
| 429 | uint GetNumWeightValues(uvec2 size, bool dual_plane) { | ||
| 430 | uint n_vals = size.x * size.y; | ||
| 431 | if (dual_plane) { | ||
| 432 | n_vals *= 2; | ||
| 433 | } | ||
| 434 | return n_vals; | ||
| 435 | } | ||
| 436 | |||
| 437 | uint GetPackedBitSize(uvec2 size, bool dual_plane, uint max_weight) { | ||
| 438 | uint n_vals = GetNumWeightValues(size, dual_plane); | ||
| 439 | return GetBitLength(n_vals, max_weight); | ||
| 440 | } | ||
| 441 | |||
| 442 | uint BitsBracket(uint bits, uint pos) { | ||
| 443 | return ((bits >> pos) & 1); | ||
| 444 | } | ||
| 445 | |||
| 446 | uint BitsOp(uint bits, uint start, uint end) { | ||
| 447 | if (start == end) { | ||
| 448 | return BitsBracket(bits, start); | ||
| 449 | } else if (start > end) { | ||
| 450 | uint t = start; | ||
| 451 | start = end; | ||
| 452 | end = t; | ||
| 453 | } | ||
| 454 | |||
| 455 | uint mask = (1 << (end - start + 1)) - 1; | ||
| 456 | return ((bits >> start) & mask); | ||
| 457 | } | ||
| 458 | |||
| 459 | void DecodeQuintBlock(uint num_bits) { // Value number of bits | ||
| 460 | uint m[3]; | ||
| 461 | uint q[3]; | ||
| 462 | uint Q; | ||
| 463 | m[0] = StreamColorBits(num_bits); | ||
| 464 | Q = StreamColorBits(3); | ||
| 465 | m[1] = StreamColorBits(num_bits); | ||
| 466 | Q |= StreamColorBits(2) << 3; | ||
| 467 | m[2] = StreamColorBits(num_bits); | ||
| 468 | Q |= StreamColorBits(2) << 5; | ||
| 469 | if (BitsOp(Q, 1, 2) == 3 && BitsOp(Q, 5, 6) == 0) { | ||
| 470 | q[0] = 4; | ||
| 471 | q[1] = 4; | ||
| 472 | q[2] = (BitsBracket(Q, 0) << 2) | ((BitsBracket(Q, 4) & ~BitsBracket(Q, 0)) << 1) | | ||
| 473 | (BitsBracket(Q, 3) & ~BitsBracket(Q, 0)); | ||
| 474 | } else { | ||
| 475 | uint C = 0; | ||
| 476 | if (BitsOp(Q, 1, 2) == 3) { | ||
| 477 | q[2] = 4; | ||
| 478 | C = (BitsOp(Q, 3, 4) << 3) | ((~BitsOp(Q, 5, 6) & 3) << 1) | BitsBracket(Q, 0); | ||
| 479 | } else { | ||
| 480 | q[2] = BitsOp(Q, 5, 6); | ||
| 481 | C = BitsOp(Q, 0, 4); | ||
| 482 | } | ||
| 483 | |||
| 484 | if (BitsOp(C, 0, 2) == 5) { | ||
| 485 | q[1] = 4; | ||
| 486 | q[0] = BitsOp(C, 3, 4); | ||
| 487 | } else { | ||
| 488 | q[1] = BitsOp(C, 3, 4); | ||
| 489 | q[0] = BitsOp(C, 0, 2); | ||
| 490 | } | ||
| 491 | } | ||
| 492 | |||
| 493 | for (uint i = 0; i < 3; i++) { | ||
| 494 | EncodingData val; | ||
| 495 | val.encoding = Quint; | ||
| 496 | val.num_bits = num_bits; | ||
| 497 | val.bit_value = m[i]; | ||
| 498 | val.quint_trit_value = q[i]; | ||
| 499 | ResultEmplaceBack(val); | ||
| 500 | } | ||
| 501 | } | ||
| 502 | |||
| 503 | void DecodeTritBlock(uint num_bits) { | ||
| 504 | uint m[5]; | ||
| 505 | uint t[5]; | ||
| 506 | uint T; | ||
| 507 | m[0] = StreamColorBits(num_bits); | ||
| 508 | T = StreamColorBits(2); | ||
| 509 | m[1] = StreamColorBits(num_bits); | ||
| 510 | T |= StreamColorBits(2) << 2; | ||
| 511 | m[2] = StreamColorBits(num_bits); | ||
| 512 | T |= StreamColorBits(1) << 4; | ||
| 513 | m[3] = StreamColorBits(num_bits); | ||
| 514 | T |= StreamColorBits(2) << 5; | ||
| 515 | m[4] = StreamColorBits(num_bits); | ||
| 516 | T |= StreamColorBits(1) << 7; | ||
| 517 | uint C = 0; | ||
| 518 | if (BitsOp(T, 2, 4) == 7) { | ||
| 519 | C = (BitsOp(T, 5, 7) << 2) | BitsOp(T, 0, 1); | ||
| 520 | t[4] = 2; | ||
| 521 | t[3] = 2; | ||
| 522 | } else { | ||
| 523 | C = BitsOp(T, 0, 4); | ||
| 524 | if (BitsOp(T, 5, 6) == 3) { | ||
| 525 | t[4] = 2; | ||
| 526 | t[3] = BitsBracket(T, 7); | ||
| 527 | } else { | ||
| 528 | t[4] = BitsBracket(T, 7); | ||
| 529 | t[3] = BitsOp(T, 5, 6); | ||
| 530 | } | ||
| 531 | } | ||
| 532 | if (BitsOp(C, 0, 1) == 3) { | ||
| 533 | t[2] = 2; | ||
| 534 | t[1] = BitsBracket(C, 4); | ||
| 535 | t[0] = (BitsBracket(C, 3) << 1) | (BitsBracket(C, 2) & ~BitsBracket(C, 3)); | ||
| 536 | } else if (BitsOp(C, 2, 3) == 3) { | ||
| 537 | t[2] = 2; | ||
| 538 | t[1] = 2; | ||
| 539 | t[0] = BitsOp(C, 0, 1); | ||
| 540 | } else { | ||
| 541 | t[2] = BitsBracket(C, 4); | ||
| 542 | t[1] = BitsOp(C, 2, 3); | ||
| 543 | t[0] = (BitsBracket(C, 1) << 1) | (BitsBracket(C, 0) & ~BitsBracket(C, 1)); | ||
| 544 | } | ||
| 545 | for (uint i = 0; i < 5; i++) { | ||
| 546 | EncodingData val; | ||
| 547 | val.encoding = Trit; | ||
| 548 | val.num_bits = num_bits; | ||
| 549 | val.bit_value = m[i]; | ||
| 550 | val.quint_trit_value = t[i]; | ||
| 551 | ResultEmplaceBack(val); | ||
| 552 | } | ||
| 553 | } | ||
| 554 | void DecodeIntegerSequence(uint max_range, uint num_values) { | ||
| 555 | EncodingData val = encoding_values[max_range]; | ||
| 556 | uint vals_decoded = 0; | ||
| 557 | while (vals_decoded < num_values) { | ||
| 558 | switch (val.encoding) { | ||
| 559 | case Quint: | ||
| 560 | DecodeQuintBlock(val.num_bits); | ||
| 561 | vals_decoded += 3; | ||
| 562 | break; | ||
| 563 | |||
| 564 | case Trit: | ||
| 565 | DecodeTritBlock(val.num_bits); | ||
| 566 | vals_decoded += 5; | ||
| 567 | break; | ||
| 568 | |||
| 569 | case JustBits: | ||
| 570 | val.bit_value = StreamColorBits(val.num_bits); | ||
| 571 | ResultEmplaceBack(val); | ||
| 572 | vals_decoded++; | ||
| 573 | break; | ||
| 574 | } | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | void DecodeColorValues(out uint color_values[32], uvec4 modes, uint num_partitions, | ||
| 579 | uint color_data_bits) { | ||
| 580 | uint num_values = 0; | ||
| 581 | for (uint i = 0; i < num_partitions; i++) { | ||
| 582 | num_values += ((modes[i] >> 2) + 1) << 1; | ||
| 583 | } | ||
| 584 | int range = 256; | ||
| 585 | while (--range > 0) { | ||
| 586 | EncodingData val = encoding_values[range]; | ||
| 587 | uint bitLength = GetBitLength(num_values, range); | ||
| 588 | if (bitLength <= color_data_bits) { | ||
| 589 | while (--range > 0) { | ||
| 590 | EncodingData newval = encoding_values[range]; | ||
| 591 | if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) { | ||
| 592 | break; | ||
| 593 | } | ||
| 594 | } | ||
| 595 | range++; | ||
| 596 | break; | ||
| 597 | } | ||
| 598 | } | ||
| 599 | DecodeIntegerSequence(range, num_values); | ||
| 600 | uint out_index = 0; | ||
| 601 | for (int itr = 0; itr < result_index; itr++) { | ||
| 602 | if (out_index >= num_values) { | ||
| 603 | break; | ||
| 604 | } | ||
| 605 | EncodingData val = result_vector[itr]; | ||
| 606 | uint bitlen = val.num_bits; | ||
| 607 | uint bitval = val.bit_value; | ||
| 608 | uint A = 0, B = 0, C = 0, D = 0; | ||
| 609 | A = ReplicateBitTo9((bitval & 1)); | ||
| 610 | switch (val.encoding) { | ||
| 611 | case JustBits: | ||
| 612 | color_values[out_index++] = FastReplicateTo8(bitval, bitlen); | ||
| 613 | break; | ||
| 614 | case Trit: { | ||
| 615 | D = val.quint_trit_value; | ||
| 616 | switch (bitlen) { | ||
| 617 | case 1: { | ||
| 618 | C = 204; | ||
| 619 | } break; | ||
| 620 | case 2: { | ||
| 621 | C = 93; | ||
| 622 | uint b = (bitval >> 1) & 1; | ||
| 623 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | ||
| 624 | } break; | ||
| 625 | |||
| 626 | case 3: { | ||
| 627 | C = 44; | ||
| 628 | uint cb = (bitval >> 1) & 3; | ||
| 629 | B = (cb << 7) | (cb << 2) | cb; | ||
| 630 | } break; | ||
| 631 | |||
| 632 | case 4: { | ||
| 633 | C = 22; | ||
| 634 | uint dcb = (bitval >> 1) & 7; | ||
| 635 | B = (dcb << 6) | dcb; | ||
| 636 | } break; | ||
| 637 | |||
| 638 | case 5: { | ||
| 639 | C = 11; | ||
| 640 | uint edcb = (bitval >> 1) & 0xF; | ||
| 641 | B = (edcb << 5) | (edcb >> 2); | ||
| 642 | } break; | ||
| 643 | |||
| 644 | case 6: { | ||
| 645 | C = 5; | ||
| 646 | uint fedcb = (bitval >> 1) & 0x1F; | ||
| 647 | B = (fedcb << 4) | (fedcb >> 4); | ||
| 648 | } break; | ||
| 649 | } | ||
| 650 | } break; | ||
| 651 | case Quint: { | ||
| 652 | D = val.quint_trit_value; | ||
| 653 | switch (bitlen) { | ||
| 654 | case 1: { | ||
| 655 | C = 113; | ||
| 656 | } break; | ||
| 657 | case 2: { | ||
| 658 | C = 54; | ||
| 659 | uint b = (bitval >> 1) & 1; | ||
| 660 | B = (b << 8) | (b << 3) | (b << 2); | ||
| 661 | } break; | ||
| 662 | case 3: { | ||
| 663 | C = 26; | ||
| 664 | uint cb = (bitval >> 1) & 3; | ||
| 665 | B = (cb << 7) | (cb << 1) | (cb >> 1); | ||
| 666 | } break; | ||
| 667 | case 4: { | ||
| 668 | C = 13; | ||
| 669 | uint dcb = (bitval >> 1) & 7; | ||
| 670 | B = (dcb << 6) | (dcb >> 1); | ||
| 671 | } break; | ||
| 672 | case 5: { | ||
| 673 | C = 6; | ||
| 674 | uint edcb = (bitval >> 1) & 0xF; | ||
| 675 | B = (edcb << 5) | (edcb >> 3); | ||
| 676 | } break; | ||
| 677 | } | ||
| 678 | } break; | ||
| 679 | } | ||
| 680 | |||
| 681 | if (val.encoding != JustBits) { | ||
| 682 | uint T = (D * C) + B; | ||
| 683 | T ^= A; | ||
| 684 | T = (A & 0x80) | (T >> 2); | ||
| 685 | color_values[out_index++] = T; | ||
| 686 | } | ||
| 687 | } | ||
| 688 | } | ||
| 689 | ivec2 BitTransferSigned(int a, int b) { | ||
| 690 | ivec2 transferred; | ||
| 691 | transferred[1] = b >> 1; | ||
| 692 | transferred[1] |= a & 0x80; | ||
| 693 | transferred[0] = a >> 1; | ||
| 694 | transferred[0] &= 0x3F; | ||
| 695 | if ((transferred[0] & 0x20) > 0) { | ||
| 696 | transferred[0] -= 0x40; | ||
| 697 | } | ||
| 698 | return transferred; | ||
| 699 | } | ||
| 700 | |||
| 701 | uvec4 ClampByte(ivec4 color) { | ||
| 702 | for (uint i = 0; i < 4; i++) { | ||
| 703 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | ||
| 704 | } | ||
| 705 | return uvec4(color); | ||
| 706 | } | ||
| 707 | ivec4 BlueContract(int a, int r, int g, int b) { | ||
| 708 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); | ||
| 709 | } | ||
| 710 | int colvals_index = 0; | ||
| 711 | void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_values[32], | ||
| 712 | uint color_endpoint_mode) { | ||
| 713 | #define READ_UINT_VALUES(N) \ | ||
| 714 | uint v[N]; \ | ||
| 715 | for (uint i = 0; i < N; i++) { \ | ||
| 716 | v[i] = color_values[colvals_index++]; \ | ||
| 717 | } | ||
| 718 | |||
| 719 | #define READ_INT_VALUES(N) \ | ||
| 720 | int v[N]; \ | ||
| 721 | for (uint i = 0; i < N; i++) { \ | ||
| 722 | v[i] = int(color_values[colvals_index++]); \ | ||
| 723 | } | ||
| 724 | |||
| 725 | switch (color_endpoint_mode) { | ||
| 726 | case 0: { | ||
| 727 | READ_UINT_VALUES(2) | ||
| 728 | ep1 = uvec4(0xFF, v[0], v[0], v[0]); | ||
| 729 | ep2 = uvec4(0xFF, v[1], v[1], v[1]); | ||
| 730 | } break; | ||
| 731 | |||
| 732 | case 1: { | ||
| 733 | READ_UINT_VALUES(2) | ||
| 734 | uint L0 = (v[0] >> 2) | (v[1] & 0xC0); | ||
| 735 | uint L1 = max(L0 + (v[1] & 0x3F), 0xFFU); | ||
| 736 | ep1 = uvec4(0xFF, L0, L0, L0); | ||
| 737 | ep2 = uvec4(0xFF, L1, L1, L1); | ||
| 738 | } break; | ||
| 739 | |||
| 740 | case 4: { | ||
| 741 | READ_UINT_VALUES(4) | ||
| 742 | ep1 = uvec4(v[2], v[0], v[0], v[0]); | ||
| 743 | ep2 = uvec4(v[3], v[1], v[1], v[1]); | ||
| 744 | } break; | ||
| 745 | |||
| 746 | case 5: { | ||
| 747 | READ_INT_VALUES(4) | ||
| 748 | ivec2 transferred = BitTransferSigned(v[1], v[0]); | ||
| 749 | v[1] = transferred[0]; | ||
| 750 | v[0] = transferred[1]; | ||
| 751 | transferred = BitTransferSigned(v[3], v[2]); | ||
| 752 | v[3] = transferred[0]; | ||
| 753 | v[2] = transferred[1]; | ||
| 754 | ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); | ||
| 755 | ep2 = ClampByte(ivec4((v[2] + v[3]), v[0] + v[1], v[0] + v[1], v[0] + v[1])); | ||
| 756 | } break; | ||
| 757 | |||
| 758 | case 6: { | ||
| 759 | READ_UINT_VALUES(4) | ||
| 760 | ep1 = uvec4(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | ||
| 761 | ep2 = uvec4(0xFF, v[0], v[1], v[2]); | ||
| 762 | } break; | ||
| 763 | |||
| 764 | case 8: { | ||
| 765 | READ_UINT_VALUES(6) | ||
| 766 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | ||
| 767 | ep1 = uvec4(0xFF, v[0], v[2], v[4]); | ||
| 768 | ep2 = uvec4(0xFF, v[1], v[3], v[5]); | ||
| 769 | } else { | ||
| 770 | ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); | ||
| 771 | ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); | ||
| 772 | } | ||
| 773 | } break; | ||
| 774 | |||
| 775 | case 9: { | ||
| 776 | READ_INT_VALUES(6) | ||
| 777 | ivec2 transferred = BitTransferSigned(v[1], v[0]); | ||
| 778 | v[1] = transferred[0]; | ||
| 779 | v[0] = transferred[1]; | ||
| 780 | transferred = BitTransferSigned(v[3], v[2]); | ||
| 781 | v[3] = transferred[0]; | ||
| 782 | v[2] = transferred[1]; | ||
| 783 | transferred = BitTransferSigned(v[5], v[4]); | ||
| 784 | v[5] = transferred[0]; | ||
| 785 | v[4] = transferred[1]; | ||
| 786 | if (v[1] + v[3] + v[5] >= 0) { | ||
| 787 | ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); | ||
| 788 | ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||
| 789 | } else { | ||
| 790 | ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||
| 791 | ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); | ||
| 792 | } | ||
| 793 | } break; | ||
| 794 | |||
| 795 | case 10: { | ||
| 796 | READ_UINT_VALUES(6) | ||
| 797 | ep1 = uvec4(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | ||
| 798 | ep2 = uvec4(v[5], v[0], v[1], v[2]); | ||
| 799 | } break; | ||
| 800 | |||
| 801 | case 12: { | ||
| 802 | READ_UINT_VALUES(8) | ||
| 803 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | ||
| 804 | ep1 = uvec4(v[6], v[0], v[2], v[4]); | ||
| 805 | ep2 = uvec4(v[7], v[1], v[3], v[5]); | ||
| 806 | } else { | ||
| 807 | ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); | ||
| 808 | ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); | ||
| 809 | } | ||
| 810 | } break; | ||
| 811 | |||
| 812 | case 13: { | ||
| 813 | READ_INT_VALUES(8) | ||
| 814 | ivec2 transferred = BitTransferSigned(v[1], v[0]); | ||
| 815 | v[1] = transferred[0]; | ||
| 816 | v[0] = transferred[1]; | ||
| 817 | transferred = BitTransferSigned(v[3], v[2]); | ||
| 818 | v[3] = transferred[0]; | ||
| 819 | v[2] = transferred[1]; | ||
| 820 | |||
| 821 | transferred = BitTransferSigned(v[5], v[4]); | ||
| 822 | v[5] = transferred[0]; | ||
| 823 | v[4] = transferred[1]; | ||
| 824 | |||
| 825 | transferred = BitTransferSigned(v[7], v[6]); | ||
| 826 | v[7] = transferred[0]; | ||
| 827 | v[6] = transferred[1]; | ||
| 828 | |||
| 829 | if (v[1] + v[3] + v[5] >= 0) { | ||
| 830 | ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); | ||
| 831 | ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||
| 832 | } else { | ||
| 833 | ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | ||
| 834 | ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); | ||
| 835 | } | ||
| 836 | } break; | ||
| 837 | } | ||
| 838 | #undef READ_UINT_VALUES | ||
| 839 | #undef READ_INT_VALUES | ||
| 840 | } | ||
| 841 | |||
| 842 | uint UnquantizeTexelWeight(EncodingData val) { | ||
| 843 | uint bitval = val.bit_value; | ||
| 844 | uint bitlen = val.num_bits; | ||
| 845 | uint A = ReplicateBitTo7((bitval & 1)); | ||
| 846 | uint B = 0, C = 0, D = 0; | ||
| 847 | uint result = 0; | ||
| 848 | switch (val.encoding) { | ||
| 849 | case JustBits: | ||
| 850 | result = FastReplicateTo6(bitval, bitlen); | ||
| 851 | break; | ||
| 852 | case Trit: { | ||
| 853 | D = val.quint_trit_value; | ||
| 854 | switch (bitlen) { | ||
| 855 | case 0: { | ||
| 856 | uint results[3] = {0, 32, 63}; | ||
| 857 | result = results[D]; | ||
| 858 | } break; | ||
| 859 | case 1: { | ||
| 860 | C = 50; | ||
| 861 | } break; | ||
| 862 | case 2: { | ||
| 863 | C = 23; | ||
| 864 | uint b = (bitval >> 1) & 1; | ||
| 865 | B = (b << 6) | (b << 2) | b; | ||
| 866 | } break; | ||
| 867 | case 3: { | ||
| 868 | C = 11; | ||
| 869 | uint cb = (bitval >> 1) & 3; | ||
| 870 | B = (cb << 5) | cb; | ||
| 871 | } break; | ||
| 872 | default: | ||
| 873 | break; | ||
| 874 | } | ||
| 875 | } break; | ||
| 876 | case Quint: { | ||
| 877 | D = val.quint_trit_value; | ||
| 878 | switch (bitlen) { | ||
| 879 | case 0: { | ||
| 880 | uint results[5] = {0, 16, 32, 47, 63}; | ||
| 881 | result = results[D]; | ||
| 882 | } break; | ||
| 883 | case 1: { | ||
| 884 | C = 28; | ||
| 885 | } break; | ||
| 886 | case 2: { | ||
| 887 | C = 13; | ||
| 888 | uint b = (bitval >> 1) & 1; | ||
| 889 | B = (b << 6) | (b << 1); | ||
| 890 | } break; | ||
| 891 | } | ||
| 892 | } break; | ||
| 893 | } | ||
| 894 | if (val.encoding != JustBits && bitlen > 0) { | ||
| 895 | result = D * C + B; | ||
| 896 | result ^= A; | ||
| 897 | result = (A & 0x20) | (result >> 2); | ||
| 898 | } | ||
| 899 | if (result > 32) { | ||
| 900 | result += 1; | ||
| 901 | } | ||
| 902 | return result; | ||
| 903 | } | ||
| 904 | |||
| 905 | void UnquantizeTexelWeights(out uint outbuffer[2][144], bool dual_plane, uvec2 size) { | ||
| 906 | uint weight_idx = 0; | ||
| 907 | uint unquantized[2][144]; | ||
| 908 | uint area = size.x * size.y; | ||
| 909 | for (uint itr = 0; itr < texel_vector_index; itr++) { | ||
| 910 | unquantized[0][weight_idx] = UnquantizeTexelWeight(texel_vector[itr]); | ||
| 911 | if (dual_plane) { | ||
| 912 | ++itr; | ||
| 913 | unquantized[1][weight_idx] = UnquantizeTexelWeight(texel_vector[itr]); | ||
| 914 | if (itr == texel_vector_index) { | ||
| 915 | break; | ||
| 916 | } | ||
| 917 | } | ||
| 918 | if (++weight_idx >= (area)) | ||
| 919 | break; | ||
| 920 | } | ||
| 921 | uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); | ||
| 922 | uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); | ||
| 923 | uint kPlaneScale = dual_plane ? 2 : 1; | ||
| 924 | for (uint plane = 0; plane < kPlaneScale; plane++) | ||
| 925 | for (uint t = 0; t < block_dims.y; t++) | ||
| 926 | for (uint s = 0; s < block_dims.x; s++) { | ||
| 927 | uint cs = Ds * s; | ||
| 928 | uint ct = Dt * t; | ||
| 929 | uint gs = (cs * (size.x - 1) + 32) >> 6; | ||
| 930 | uint gt = (ct * (size.y - 1) + 32) >> 6; | ||
| 931 | uint js = gs >> 4; | ||
| 932 | uint fs = gs & 0xF; | ||
| 933 | uint jt = gt >> 4; | ||
| 934 | uint ft = gt & 0x0F; | ||
| 935 | uint w11 = (fs * ft + 8) >> 4; | ||
| 936 | uint w10 = ft - w11; | ||
| 937 | uint w01 = fs - w11; | ||
| 938 | uint w00 = 16 - fs - ft + w11; | ||
| 939 | uvec4 w = uvec4(w00, w01, w10, w11); | ||
| 940 | uint v0 = jt * size.x + js; | ||
| 941 | |||
| 942 | uvec4 p = uvec4(0); | ||
| 943 | if (v0 < area) { | ||
| 944 | p.x = unquantized[plane][v0]; | ||
| 945 | } | ||
| 946 | if ((v0 + 1) < (area)) { | ||
| 947 | p.y = unquantized[plane][v0 + 1]; | ||
| 948 | } | ||
| 949 | if ((v0 + size.x) < (area)) { | ||
| 950 | p.z = unquantized[plane][(v0 + size.x)]; | ||
| 951 | } | ||
| 952 | if ((v0 + size.x + 1) < (area)) { | ||
| 953 | p.w = unquantized[plane][(v0 + size.x + 1)]; | ||
| 954 | } | ||
| 955 | outbuffer[plane][t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4; | ||
| 956 | } | ||
| 957 | } | ||
| 958 | |||
| 959 | int FindLayout(uint mode) { | ||
| 960 | if ((mode & 3) != 0) { | ||
| 961 | if ((mode & 8) != 0) { | ||
| 962 | if ((mode & 4) != 0) { | ||
| 963 | if ((mode & 0x100) != 0) { | ||
| 964 | return 4; | ||
| 965 | } | ||
| 966 | return 3; | ||
| 967 | } | ||
| 968 | return 2; | ||
| 969 | } | ||
| 970 | if ((mode & 4) != 0) { | ||
| 971 | return 1; | ||
| 972 | } | ||
| 973 | return 0; | ||
| 974 | } | ||
| 975 | if ((mode & 0x100) != 0) { | ||
| 976 | if ((mode & 0x80) != 0) { | ||
| 977 | if ((mode & 0x20) != 0) { | ||
| 978 | return 8; | ||
| 979 | } | ||
| 980 | return 7; | ||
| 981 | } | ||
| 982 | return 9; | ||
| 983 | } | ||
| 984 | if ((mode & 0x80) != 0) { | ||
| 985 | return 6; | ||
| 986 | } | ||
| 987 | return 5; | ||
| 988 | } | ||
| 989 | |||
| 990 | TexelWeightParams DecodeBlockInfo(uint block_index) { | ||
| 991 | TexelWeightParams params = TexelWeightParams(uvec2(0), false, 0, false, false, false); | ||
| 992 | uint mode = StreamBits(11); | ||
| 993 | if ((mode & 0x1ff) == 0x1fc) { | ||
| 994 | if ((mode & 0x200) != 0) { | ||
| 995 | params.VoidExtentHDR = true; | ||
| 996 | } else { | ||
| 997 | params.VoidExtentLDR = true; | ||
| 998 | } | ||
| 999 | if ((mode & 0x400) == 0 || StreamBits(1) == 0) { | ||
| 1000 | params.Error = true; | ||
| 1001 | } | ||
| 1002 | return params; | ||
| 1003 | } | ||
| 1004 | if ((mode & 0xf) == 0) { | ||
| 1005 | params.Error = true; | ||
| 1006 | return params; | ||
| 1007 | } | ||
| 1008 | if ((mode & 3) == 0 && (mode & 0x1c0) == 0x1c0) { | ||
| 1009 | params.Error = true; | ||
| 1010 | return params; | ||
| 1011 | } | ||
| 1012 | uint A, B; | ||
| 1013 | uint mode_layout = FindLayout(mode); | ||
| 1014 | switch (mode_layout) { | ||
| 1015 | case 0: | ||
| 1016 | A = (mode >> 5) & 0x3; | ||
| 1017 | B = (mode >> 7) & 0x3; | ||
| 1018 | params.size = uvec2(B + 4, A + 2); | ||
| 1019 | break; | ||
| 1020 | case 1: | ||
| 1021 | A = (mode >> 5) & 0x3; | ||
| 1022 | B = (mode >> 7) & 0x3; | ||
| 1023 | params.size = uvec2(B + 8, A + 2); | ||
| 1024 | break; | ||
| 1025 | case 2: | ||
| 1026 | A = (mode >> 5) & 0x3; | ||
| 1027 | B = (mode >> 7) & 0x3; | ||
| 1028 | params.size = uvec2(A + 2, B + 8); | ||
| 1029 | break; | ||
| 1030 | case 3: | ||
| 1031 | A = (mode >> 5) & 0x3; | ||
| 1032 | B = (mode >> 7) & 0x1; | ||
| 1033 | params.size = uvec2(A + 2, B + 6); | ||
| 1034 | break; | ||
| 1035 | case 4: | ||
| 1036 | A = (mode >> 5) & 0x3; | ||
| 1037 | B = (mode >> 7) & 0x1; | ||
| 1038 | params.size = uvec2(B + 2, A + 2); | ||
| 1039 | break; | ||
| 1040 | case 5: | ||
| 1041 | A = (mode >> 5) & 0x3; | ||
| 1042 | params.size = uvec2(12, A + 2); | ||
| 1043 | break; | ||
| 1044 | case 6: | ||
| 1045 | A = (mode >> 5) & 0x3; | ||
| 1046 | params.size = uvec2(A + 2, 12); | ||
| 1047 | break; | ||
| 1048 | case 7: | ||
| 1049 | params.size = uvec2(6, 10); | ||
| 1050 | break; | ||
| 1051 | case 8: | ||
| 1052 | params.size = uvec2(10, 6); | ||
| 1053 | break; | ||
| 1054 | case 9: | ||
| 1055 | A = (mode >> 5) & 0x3; | ||
| 1056 | B = (mode >> 9) & 0x3; | ||
| 1057 | params.size = uvec2(A + 6, B + 6); | ||
| 1058 | break; | ||
| 1059 | default: | ||
| 1060 | params.Error = true; | ||
| 1061 | break; | ||
| 1062 | } | ||
| 1063 | params.dual_plane = (mode_layout != 9) && ((mode & 0x400) != 0); | ||
| 1064 | uint weight_index = (mode & 0x10) != 0 ? 1 : 0; | ||
| 1065 | if (mode_layout < 5) { | ||
| 1066 | weight_index |= (mode & 0x3) << 1; | ||
| 1067 | } else { | ||
| 1068 | weight_index |= (mode & 0xc) >> 1; | ||
| 1069 | } | ||
| 1070 | weight_index -= 2; | ||
| 1071 | if ((mode_layout != 9) && ((mode & 0x200) != 0)) { | ||
| 1072 | const int max_weights[6] = int[6](9, 11, 15, 19, 23, 31); | ||
| 1073 | params.max_weight = max_weights[weight_index]; | ||
| 1074 | } else { | ||
| 1075 | const int max_weights[6] = int[6](1, 2, 3, 4, 5, 7); | ||
| 1076 | params.max_weight = max_weights[weight_index]; | ||
| 1077 | } | ||
| 1078 | return params; | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | void FillError(ivec3 coord) { | ||
| 1082 | for (uint j = 0; j < block_dims.y; j++) { | ||
| 1083 | for (uint i = 0; i < block_dims.x; i++) { | ||
| 1084 | imageStore(dest_image, coord.xy + ivec2(i, j), vec4(1.0, 1.0, 0.0, 1.0)); | ||
| 1085 | } | ||
| 1086 | } | ||
| 1087 | return; | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | void FillVoidExtentLDR(ivec3 coord, uint block_index) { | ||
| 1091 | for (int i = 0; i < 4; i++) { | ||
| 1092 | StreamBits(13); | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | uint r_u = StreamBits(16); | ||
| 1096 | uint g_u = StreamBits(16); | ||
| 1097 | uint b_u = StreamBits(16); | ||
| 1098 | uint a_u = StreamBits(16); | ||
| 1099 | float a = float(a_u) / 65535.0f; | ||
| 1100 | float r = float(r_u) / 65535.0f; | ||
| 1101 | float g = float(g_u) / 65535.0f; | ||
| 1102 | float b = float(b_u) / 65535.0f; | ||
| 1103 | for (uint j = 0; j < block_dims.y; j++) { | ||
| 1104 | for (uint i = 0; i < block_dims.x; i++) { | ||
| 1105 | imageStore(dest_image, coord.xy + ivec2(i, j), vec4(r, g, b, a)); | ||
| 1106 | } | ||
| 1107 | } | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | void DecompressBlock(ivec3 coord, uint block_index) { | ||
| 1111 | TexelWeightParams params; | ||
| 1112 | params = DecodeBlockInfo(block_index); | ||
| 1113 | if (params.Error) { | ||
| 1114 | FillError(coord); | ||
| 1115 | return; | ||
| 1116 | } | ||
| 1117 | if (params.VoidExtentHDR) { | ||
| 1118 | FillError(coord); | ||
| 1119 | return; | ||
| 1120 | } | ||
| 1121 | if (params.VoidExtentLDR) { | ||
| 1122 | FillVoidExtentLDR(coord, block_index); | ||
| 1123 | return; | ||
| 1124 | } | ||
| 1125 | if (params.size.x > block_dims.x || params.size.y > block_dims.y) { | ||
| 1126 | FillError(coord); | ||
| 1127 | return; | ||
| 1128 | } | ||
| 1129 | uint num_partitions = StreamBits(2) + 1; | ||
| 1130 | if (num_partitions > 4 || (num_partitions == 4 && params.dual_plane)) { | ||
| 1131 | FillError(coord); | ||
| 1132 | return; | ||
| 1133 | } | ||
| 1134 | int plane_index = -1; | ||
| 1135 | uint partition_index = 1; | ||
| 1136 | uvec4 color_endpoint_mode = uvec4(0); | ||
| 1137 | uint ced_pointer = 0; | ||
| 1138 | uint base_cem = 0; | ||
| 1139 | if (num_partitions == 1) { | ||
| 1140 | color_endpoint_mode[0] = StreamBits(4); | ||
| 1141 | partition_index = 0; | ||
| 1142 | } else { | ||
| 1143 | partition_index = StreamBits(10); | ||
| 1144 | base_cem = StreamBits(6); | ||
| 1145 | } | ||
| 1146 | uint base_mode = base_cem & 3; | ||
| 1147 | uint weight_bits = GetPackedBitSize(params.size, params.dual_plane, params.max_weight); | ||
| 1148 | uint remaining_bits = 128 - weight_bits - total_bitsread; | ||
| 1149 | uint extra_cem_bits = 0; | ||
| 1150 | if (base_mode > 0) { | ||
| 1151 | switch (num_partitions) { | ||
| 1152 | case 2: | ||
| 1153 | extra_cem_bits += 2; | ||
| 1154 | break; | ||
| 1155 | case 3: | ||
| 1156 | extra_cem_bits += 5; | ||
| 1157 | break; | ||
| 1158 | case 4: | ||
| 1159 | extra_cem_bits += 8; | ||
| 1160 | break; | ||
| 1161 | default: | ||
| 1162 | return; | ||
| 1163 | } | ||
| 1164 | } | ||
| 1165 | remaining_bits -= extra_cem_bits; | ||
| 1166 | uint plane_selector_bits = 0; | ||
| 1167 | if (params.dual_plane) { | ||
| 1168 | plane_selector_bits = 2; | ||
| 1169 | } | ||
| 1170 | remaining_bits -= plane_selector_bits; | ||
| 1171 | // Read color data... | ||
| 1172 | uint color_data_bits = remaining_bits; | ||
| 1173 | while (remaining_bits > 0) { | ||
| 1174 | uint nb = min(remaining_bits, 8); | ||
| 1175 | uint b = StreamBits(nb); | ||
| 1176 | color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, 8)); | ||
| 1177 | ced_pointer++; | ||
| 1178 | remaining_bits -= nb; | ||
| 1179 | } | ||
| 1180 | plane_index = int(StreamBits(plane_selector_bits)); | ||
| 1181 | if (base_mode > 0) { | ||
| 1182 | uint extra_cem = StreamBits(extra_cem_bits); | ||
| 1183 | uint cem = (extra_cem << 6) | base_cem; | ||
| 1184 | cem >>= 2; | ||
| 1185 | uint C[4] = {0, 0, 0, 0}; | ||
| 1186 | for (uint i = 0; i < num_partitions; i++) { | ||
| 1187 | C[i] = cem & 1; | ||
| 1188 | cem >>= 1; | ||
| 1189 | } | ||
| 1190 | uint M[4] = {0, 0, 0, 0}; | ||
| 1191 | for (uint i = 0; i < num_partitions; i++) { | ||
| 1192 | M[i] = cem & 3; | ||
| 1193 | cem >>= 2; | ||
| 1194 | } | ||
| 1195 | for (uint i = 0; i < num_partitions; i++) { | ||
| 1196 | color_endpoint_mode[i] = base_mode; | ||
| 1197 | if ((C[i]) == 0) { | ||
| 1198 | color_endpoint_mode[i] -= 1; | ||
| 1199 | } | ||
| 1200 | color_endpoint_mode[i] <<= 2; | ||
| 1201 | color_endpoint_mode[i] |= M[i]; | ||
| 1202 | } | ||
| 1203 | } else if (num_partitions > 1) { | ||
| 1204 | uint cem = base_cem >> 2; | ||
| 1205 | for (uint i = 0; i < num_partitions; i++) { | ||
| 1206 | color_endpoint_mode[i] = cem; | ||
| 1207 | } | ||
| 1208 | } | ||
| 1209 | |||
| 1210 | uint color_values[32]; // Four values, two endpoints, four maximum paritions | ||
| 1211 | DecodeColorValues(color_values, color_endpoint_mode, num_partitions, color_data_bits); | ||
| 1212 | uvec4 endpoints[4][2]; | ||
| 1213 | for (uint i = 0; i < num_partitions; i++) { | ||
| 1214 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_values, color_endpoint_mode[i]); | ||
| 1215 | } | ||
| 1216 | for (uint i = 0; i < 16; i++) { | ||
| 1217 | texel_weight_data[i] = local_buff[i]; | ||
| 1218 | } | ||
| 1219 | for (uint i = 0; i < 8; i++) { | ||
| 1220 | #define REVERSE_BYTE(b) ((b * 0x0802U & 0x22110U) | (b * 0x8020U & 0x88440U)) * 0x10101U >> 16 | ||
| 1221 | uint a = REVERSE_BYTE(texel_weight_data[i]); | ||
| 1222 | uint b = REVERSE_BYTE(texel_weight_data[15 - i]); | ||
| 1223 | #undef REVERSE_BYTE | ||
| 1224 | texel_weight_data[i] = uint(bitfieldExtract(b, 0, 8)); | ||
| 1225 | texel_weight_data[15 - i] = uint(bitfieldExtract(a, 0, 8)); | ||
| 1226 | } | ||
| 1227 | uint clear_byte_start = | ||
| 1228 | (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; | ||
| 1229 | texel_weight_data[clear_byte_start - 1] = | ||
| 1230 | texel_weight_data[clear_byte_start - 1] & | ||
| 1231 | uint( | ||
| 1232 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); | ||
| 1233 | for (uint i = 0; i < 16 - clear_byte_start; i++) { | ||
| 1234 | texel_weight_data[clear_byte_start + i] = uint(0U); | ||
| 1235 | } | ||
| 1236 | texel_flag = true; // use texel "vector" and bit stream in integer decoding | ||
| 1237 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); | ||
| 1238 | uint weights[2][144]; | ||
| 1239 | UnquantizeTexelWeights(weights, params.dual_plane, params.size); | ||
| 1240 | for (uint j = 0; j < block_dims.y; j++) { | ||
| 1241 | for (uint i = 0; i < block_dims.x; i++) { | ||
| 1242 | uint local_partition = Select2DPartition(partition_index, i, j, num_partitions, | ||
| 1243 | (block_dims.y * block_dims.x) < 32); | ||
| 1244 | vec4 p; | ||
| 1245 | uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); | ||
| 1246 | uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); | ||
| 1247 | uvec4 plane_vec = uvec4(0); | ||
| 1248 | uvec4 weight_vec = uvec4(0); | ||
| 1249 | for (uint c = 0; c < 4; c++) { | ||
| 1250 | if (params.dual_plane && (((plane_index + 1) & 3) == c)) { | ||
| 1251 | plane_vec[c] = 1; | ||
| 1252 | } | ||
| 1253 | weight_vec[c] = weights[plane_vec[c]][j * block_dims.x + i]; | ||
| 1254 | } | ||
| 1255 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); | ||
| 1256 | p = (Cf / 65535.0); | ||
| 1257 | imageStore(dest_image, coord.xy + ivec2(i, j), p.gbar); | ||
| 1258 | } | ||
| 1259 | } | ||
| 1260 | } | ||
| 1261 | |||
| 1262 | void main() { | ||
| 1263 | uvec3 pos = gl_GlobalInvocationID + origin; | ||
| 1264 | pos.x <<= bytes_per_block_log2; | ||
| 1265 | |||
| 1266 | // Read as soon as possible due to its latency | ||
| 1267 | const uint swizzle = SwizzleOffset(pos.xy); | ||
| 1268 | |||
| 1269 | const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT; | ||
| 1270 | |||
| 1271 | uint offset = 0; | ||
| 1272 | offset += layer * layer_stride; | ||
| 1273 | offset += (block_y >> block_height) * block_size; | ||
| 1274 | offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT; | ||
| 1275 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | ||
| 1276 | offset += swizzle; | ||
| 1277 | |||
| 1278 | const ivec3 invocation_destination = ivec3(gl_GlobalInvocationID + destination); | ||
| 1279 | const ivec3 coord = ivec3(invocation_destination * uvec3(block_dims, 1.0)); | ||
| 1280 | uint block_index = | ||
| 1281 | layer * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; | ||
| 1282 | current_index = 0; | ||
| 1283 | bitsread = 0; | ||
| 1284 | for (int i = 0; i < 16; i++) { | ||
| 1285 | local_buff[i] = ReadTexel(offset + i); | ||
| 1286 | } | ||
| 1287 | DecompressBlock(coord, block_index); | ||
| 1288 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index e028677e9..29105ecad 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -307,7 +307,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 307 | 307 | ||
| 308 | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, | 308 | [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, |
| 309 | const VideoCommon::ImageInfo& info) { | 309 | const VideoCommon::ImageInfo& info) { |
| 310 | // Disable accelerated uploads for now as they don't implement swizzled uploads | 310 | return (!runtime.HasNativeASTC() && IsPixelFormatASTC(info.format)); |
| 311 | // Disable other accelerated uploads for now as they don't implement swizzled uploads | ||
| 311 | return false; | 312 | return false; |
| 312 | switch (info.type) { | 313 | switch (info.type) { |
| 313 | case ImageType::e2D: | 314 | case ImageType::e2D: |
| @@ -567,6 +568,9 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | |||
| 567 | 568 | ||
| 568 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, | 569 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 569 | std::span<const SwizzleParameters> swizzles) { | 570 | std::span<const SwizzleParameters> swizzles) { |
| 571 | if (IsPixelFormatASTC(image.info.format)) { | ||
| 572 | return util_shaders.ASTCDecode(image, map, swizzles); | ||
| 573 | } | ||
| 570 | switch (image.info.type) { | 574 | switch (image.info.type) { |
| 571 | case ImageType::e2D: | 575 | case ImageType::e2D: |
| 572 | return util_shaders.BlockLinearUpload2D(image, map, swizzles); | 576 | return util_shaders.BlockLinearUpload2D(image, map, swizzles); |
| @@ -599,6 +603,10 @@ FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal | |||
| 599 | } | 603 | } |
| 600 | } | 604 | } |
| 601 | 605 | ||
| 606 | bool TextureCacheRuntime::HasNativeASTC() const noexcept { | ||
| 607 | return device.HasASTC(); | ||
| 608 | } | ||
| 609 | |||
| 602 | TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) | 610 | TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_) |
| 603 | : storage_flags{storage_flags_}, map_flags{map_flags_} {} | 611 | : storage_flags{storage_flags_}, map_flags{map_flags_} {} |
| 604 | 612 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3fbaa102f..3c871541b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -95,6 +95,8 @@ public: | |||
| 95 | return has_broken_texture_view_formats; | 95 | return has_broken_texture_view_formats; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | bool HasNativeASTC() const noexcept; | ||
| 99 | |||
| 98 | private: | 100 | private: |
| 99 | struct StagingBuffers { | 101 | struct StagingBuffers { |
| 100 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | 102 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 2fe4799bc..2a4220661 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -3,7 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <bit> | 5 | #include <bit> |
| 6 | #include <fstream> | ||
| 6 | #include <span> | 7 | #include <span> |
| 8 | #include <streambuf> | ||
| 9 | #include <string> | ||
| 7 | #include <string_view> | 10 | #include <string_view> |
| 8 | 11 | ||
| 9 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| @@ -24,11 +27,13 @@ | |||
| 24 | #include "video_core/texture_cache/accelerated_swizzle.h" | 27 | #include "video_core/texture_cache/accelerated_swizzle.h" |
| 25 | #include "video_core/texture_cache/types.h" | 28 | #include "video_core/texture_cache/types.h" |
| 26 | #include "video_core/texture_cache/util.h" | 29 | #include "video_core/texture_cache/util.h" |
| 30 | #include "video_core/textures/astc.h" | ||
| 27 | #include "video_core/textures/decoders.h" | 31 | #include "video_core/textures/decoders.h" |
| 28 | 32 | ||
| 29 | namespace OpenGL { | 33 | namespace OpenGL { |
| 30 | 34 | ||
| 31 | using namespace HostShaders; | 35 | using namespace HostShaders; |
| 36 | using namespace Tegra::Texture::ASTC; | ||
| 32 | 37 | ||
| 33 | using VideoCommon::Extent3D; | 38 | using VideoCommon::Extent3D; |
| 34 | using VideoCommon::ImageCopy; | 39 | using VideoCommon::ImageCopy; |
| @@ -63,12 +68,104 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 63 | pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | 68 | pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), |
| 64 | copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), | 69 | copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), |
| 65 | copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | 70 | copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { |
| 71 | // TODO: Load shader string as a header | ||
| 72 | std::string astc_path = "astc_decoder.comp"; | ||
| 73 | std::ifstream t(astc_path); | ||
| 74 | std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>()); | ||
| 75 | astc_decoder_program = MakeProgram(str); | ||
| 76 | MakeBuffers(); | ||
| 77 | } | ||
| 78 | |||
| 79 | UtilShaders::~UtilShaders() = default; | ||
| 80 | |||
| 81 | void UtilShaders::MakeBuffers() { | ||
| 66 | const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | 82 | const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); |
| 67 | swizzle_table_buffer.Create(); | 83 | swizzle_table_buffer.Create(); |
| 68 | glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); | 84 | glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); |
| 85 | |||
| 86 | astc_encodings_buffer.Create(); | ||
| 87 | glNamedBufferStorage(astc_encodings_buffer.handle, sizeof(EncodingsValues), &EncodingsValues, | ||
| 88 | 0); | ||
| 89 | replicate_6_to_8_buffer.Create(); | ||
| 90 | glNamedBufferStorage(replicate_6_to_8_buffer.handle, sizeof(REPLICATE_6_BIT_TO_8_TABLE), | ||
| 91 | &REPLICATE_6_BIT_TO_8_TABLE, 0); | ||
| 92 | replicate_7_to_8_buffer.Create(); | ||
| 93 | glNamedBufferStorage(replicate_7_to_8_buffer.handle, sizeof(REPLICATE_7_BIT_TO_8_TABLE), | ||
| 94 | &REPLICATE_7_BIT_TO_8_TABLE, 0); | ||
| 95 | replicate_8_to_8_buffer.Create(); | ||
| 96 | glNamedBufferStorage(replicate_8_to_8_buffer.handle, sizeof(REPLICATE_8_BIT_TO_8_TABLE), | ||
| 97 | &REPLICATE_8_BIT_TO_8_TABLE, 0); | ||
| 98 | replicate_byte_to_16_buffer.Create(); | ||
| 99 | glNamedBufferStorage(replicate_byte_to_16_buffer.handle, sizeof(REPLICATE_BYTE_TO_16_TABLE), | ||
| 100 | &REPLICATE_BYTE_TO_16_TABLE, 0); | ||
| 69 | } | 101 | } |
| 70 | 102 | ||
| 71 | UtilShaders::~UtilShaders() = default; | 103 | void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, |
| 104 | std::span<const VideoCommon::SwizzleParameters> swizzles) { | ||
| 105 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | ||
| 106 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||
| 107 | static constexpr GLuint BINDING_ENC_BUFFER = 2; | ||
| 108 | |||
| 109 | static constexpr GLuint BINDING_6_TO_8_BUFFER = 3; | ||
| 110 | static constexpr GLuint BINDING_7_TO_8_BUFFER = 4; | ||
| 111 | static constexpr GLuint BINDING_8_TO_8_BUFFER = 5; | ||
| 112 | static constexpr GLuint BINDING_BYTE_TO_16_BUFFER = 6; | ||
| 113 | |||
| 114 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||
| 115 | static constexpr GLuint LOC_NUM_IMAGE_BLOCKS = 0; | ||
| 116 | static constexpr GLuint LOC_BLOCK_DIMS = 1; | ||
| 117 | static constexpr GLuint LOC_LAYER = 2; | ||
| 118 | |||
| 119 | const Extent3D tile_size = { | ||
| 120 | VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||
| 121 | VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||
| 122 | }; | ||
| 123 | program_manager.BindHostCompute(astc_decoder_program.handle); | ||
| 124 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||
| 125 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_encodings_buffer.handle); | ||
| 126 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_6_TO_8_BUFFER, | ||
| 127 | replicate_6_to_8_buffer.handle); | ||
| 128 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_7_TO_8_BUFFER, | ||
| 129 | replicate_7_to_8_buffer.handle); | ||
| 130 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_8_TO_8_BUFFER, | ||
| 131 | replicate_8_to_8_buffer.handle); | ||
| 132 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_BYTE_TO_16_BUFFER, | ||
| 133 | replicate_byte_to_16_buffer.handle); | ||
| 134 | |||
| 135 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||
| 136 | glUniform2ui(LOC_BLOCK_DIMS, tile_size.width, tile_size.height); | ||
| 137 | |||
| 138 | for (u32 layer = 0; layer < image.info.resources.layers; layer++) { | ||
| 139 | for (const SwizzleParameters& swizzle : swizzles) { | ||
| 140 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_FALSE, | ||
| 141 | layer, GL_WRITE_ONLY, GL_RGBA8); | ||
| 142 | const size_t input_offset = swizzle.buffer_offset + map.offset; | ||
| 143 | const auto num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U); | ||
| 144 | const auto num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U); | ||
| 145 | |||
| 146 | glUniform2ui(LOC_NUM_IMAGE_BLOCKS, swizzle.num_tiles.width, swizzle.num_tiles.height); | ||
| 147 | glUniform1ui(LOC_LAYER, layer); | ||
| 148 | |||
| 149 | // To unswizzle the ASTC data | ||
| 150 | const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info); | ||
| 151 | glUniform3uiv(3, 1, params.origin.data()); | ||
| 152 | glUniform3iv(4, 1, params.destination.data()); | ||
| 153 | glUniform1ui(5, params.bytes_per_block_log2); | ||
| 154 | glUniform1ui(6, params.layer_stride); | ||
| 155 | glUniform1ui(7, params.block_size); | ||
| 156 | glUniform1ui(8, params.x_shift); | ||
| 157 | glUniform1ui(9, params.block_height); | ||
| 158 | glUniform1ui(10, params.block_height_mask); | ||
| 159 | |||
| 160 | // ASTC texture data | ||
| 161 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, | ||
| 162 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | ||
| 163 | |||
| 164 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | ||
| 165 | } | ||
| 166 | } | ||
| 167 | program_manager.RestoreGuestCompute(); | ||
| 168 | } | ||
| 72 | 169 | ||
| 73 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | 170 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |
| 74 | std::span<const SwizzleParameters> swizzles) { | 171 | std::span<const SwizzleParameters> swizzles) { |
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 93b009743..08a1cb9b2 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -40,6 +40,11 @@ public: | |||
| 40 | explicit UtilShaders(ProgramManager& program_manager); | 40 | explicit UtilShaders(ProgramManager& program_manager); |
| 41 | ~UtilShaders(); | 41 | ~UtilShaders(); |
| 42 | 42 | ||
| 43 | void MakeBuffers(); | ||
| 44 | |||
| 45 | void ASTCDecode(Image& image, const ImageBufferMap& map, | ||
| 46 | std::span<const VideoCommon::SwizzleParameters> swizzles); | ||
| 47 | |||
| 43 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | 48 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |
| 44 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 49 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 45 | 50 | ||
| @@ -59,7 +64,13 @@ private: | |||
| 59 | ProgramManager& program_manager; | 64 | ProgramManager& program_manager; |
| 60 | 65 | ||
| 61 | OGLBuffer swizzle_table_buffer; | 66 | OGLBuffer swizzle_table_buffer; |
| 67 | OGLBuffer astc_encodings_buffer; | ||
| 68 | OGLBuffer replicate_6_to_8_buffer; | ||
| 69 | OGLBuffer replicate_7_to_8_buffer; | ||
| 70 | OGLBuffer replicate_8_to_8_buffer; | ||
| 71 | OGLBuffer replicate_byte_to_16_buffer; | ||
| 62 | 72 | ||
| 73 | OGLProgram astc_decoder_program; | ||
| 63 | OGLProgram block_linear_unswizzle_2d_program; | 74 | OGLProgram block_linear_unswizzle_2d_program; |
| 64 | OGLProgram block_linear_unswizzle_3d_program; | 75 | OGLProgram block_linear_unswizzle_3d_program; |
| 65 | OGLProgram pitch_unswizzle_program; | 76 | OGLProgram pitch_unswizzle_program; |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 9105119bc..bc8bddaec 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -8,6 +8,196 @@ | |||
| 8 | 8 | ||
| 9 | namespace Tegra::Texture::ASTC { | 9 | namespace Tegra::Texture::ASTC { |
| 10 | 10 | ||
| 11 | /// Count the number of bits set in a number. | ||
| 12 | constexpr u32 Popcnt(u32 n) { | ||
| 13 | u32 c = 0; | ||
| 14 | for (; n; c++) { | ||
| 15 | n &= n - 1; | ||
| 16 | } | ||
| 17 | return c; | ||
| 18 | } | ||
| 19 | |||
| 20 | enum class IntegerEncoding { JustBits, Qus32, Trit }; | ||
| 21 | |||
| 22 | struct IntegerEncodedValue { | ||
| 23 | constexpr IntegerEncodedValue() = default; | ||
| 24 | |||
| 25 | constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) | ||
| 26 | : encoding{encoding_}, num_bits{num_bits_} {} | ||
| 27 | |||
| 28 | constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { | ||
| 29 | return encoding == other.encoding && num_bits == other.num_bits; | ||
| 30 | } | ||
| 31 | |||
| 32 | // Returns the number of bits required to encode nVals values. | ||
| 33 | u32 GetBitLength(u32 nVals) const { | ||
| 34 | u32 totalBits = num_bits * nVals; | ||
| 35 | if (encoding == IntegerEncoding::Trit) { | ||
| 36 | totalBits += (nVals * 8 + 4) / 5; | ||
| 37 | } else if (encoding == IntegerEncoding::Qus32) { | ||
| 38 | totalBits += (nVals * 7 + 2) / 3; | ||
| 39 | } | ||
| 40 | return totalBits; | ||
| 41 | } | ||
| 42 | |||
| 43 | IntegerEncoding encoding{}; | ||
| 44 | u32 num_bits = 0; | ||
| 45 | u32 bit_value = 0; | ||
| 46 | union { | ||
| 47 | u32 qus32_value = 0; | ||
| 48 | u32 trit_value; | ||
| 49 | }; | ||
| 50 | }; | ||
| 51 | |||
| 52 | // Returns a new instance of this struct that corresponds to the | ||
| 53 | // can take no more than maxval values | ||
| 54 | static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { | ||
| 55 | while (maxVal > 0) { | ||
| 56 | u32 check = maxVal + 1; | ||
| 57 | |||
| 58 | // Is maxVal a power of two? | ||
| 59 | if (!(check & (check - 1))) { | ||
| 60 | return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); | ||
| 61 | } | ||
| 62 | |||
| 63 | // Is maxVal of the type 3*2^n - 1? | ||
| 64 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { | ||
| 65 | return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); | ||
| 66 | } | ||
| 67 | |||
| 68 | // Is maxVal of the type 5*2^n - 1? | ||
| 69 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { | ||
| 70 | return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); | ||
| 71 | } | ||
| 72 | |||
| 73 | // Apparently it can't be represented with a bounded integer sequence... | ||
| 74 | // just iterate. | ||
| 75 | maxVal--; | ||
| 76 | } | ||
| 77 | return IntegerEncodedValue(IntegerEncoding::JustBits, 0); | ||
| 78 | } | ||
| 79 | |||
| 80 | static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { | ||
| 81 | std::array<IntegerEncodedValue, 256> encodings{}; | ||
| 82 | for (std::size_t i = 0; i < encodings.size(); ++i) { | ||
| 83 | encodings[i] = CreateEncoding(static_cast<u32>(i)); | ||
| 84 | } | ||
| 85 | return encodings; | ||
| 86 | } | ||
| 87 | |||
| 88 | static constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues(); | ||
| 89 | |||
| 90 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | ||
| 91 | // is the same as [(numBits - 1):0] and repeats all the way down. | ||
| 92 | template <typename IntType> | ||
| 93 | static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { | ||
| 94 | if (numBits == 0) { | ||
| 95 | return 0; | ||
| 96 | } | ||
| 97 | if (toBit == 0) { | ||
| 98 | return 0; | ||
| 99 | } | ||
| 100 | const IntType v = val & static_cast<IntType>((1 << numBits) - 1); | ||
| 101 | IntType res = v; | ||
| 102 | u32 reslen = numBits; | ||
| 103 | while (reslen < toBit) { | ||
| 104 | u32 comp = 0; | ||
| 105 | if (numBits > toBit - reslen) { | ||
| 106 | u32 newshift = toBit - reslen; | ||
| 107 | comp = numBits - newshift; | ||
| 108 | numBits = newshift; | ||
| 109 | } | ||
| 110 | res = static_cast<IntType>(res << numBits); | ||
| 111 | res = static_cast<IntType>(res | (v >> comp)); | ||
| 112 | reslen += numBits; | ||
| 113 | } | ||
| 114 | return res; | ||
| 115 | } | ||
| 116 | |||
| 117 | static constexpr std::size_t NumReplicateEntries(u32 num_bits) { | ||
| 118 | return std::size_t(1) << num_bits; | ||
| 119 | } | ||
| 120 | |||
| 121 | template <typename IntType, u32 num_bits, u32 to_bit> | ||
| 122 | static constexpr auto MakeReplicateTable() { | ||
| 123 | std::array<IntType, NumReplicateEntries(num_bits)> table{}; | ||
| 124 | for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { | ||
| 125 | table[value] = Replicate(value, num_bits, to_bit); | ||
| 126 | } | ||
| 127 | return table; | ||
| 128 | } | ||
| 129 | |||
| 130 | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 131 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | ||
| 132 | return REPLICATE_BYTE_TO_16_TABLE[value]; | ||
| 133 | } | ||
| 134 | |||
| 135 | static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); | ||
| 136 | static constexpr u32 ReplicateBitTo7(std::size_t value) { | ||
| 137 | return REPLICATE_BIT_TO_7_TABLE[value]; | ||
| 138 | } | ||
| 139 | |||
| 140 | static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); | ||
| 141 | static constexpr u32 ReplicateBitTo9(std::size_t value) { | ||
| 142 | return REPLICATE_BIT_TO_9_TABLE[value]; | ||
| 143 | } | ||
| 144 | |||
| 145 | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||
| 146 | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||
| 147 | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||
| 148 | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||
| 149 | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||
| 150 | static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||
| 151 | static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||
| 152 | static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||
| 153 | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback | ||
| 154 | /// to the runtime implementation | ||
| 155 | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||
| 156 | switch (num_bits) { | ||
| 157 | case 1: | ||
| 158 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 159 | case 2: | ||
| 160 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 161 | case 3: | ||
| 162 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 163 | case 4: | ||
| 164 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 165 | case 5: | ||
| 166 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 167 | case 6: | ||
| 168 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 169 | case 7: | ||
| 170 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 171 | case 8: | ||
| 172 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 173 | default: | ||
| 174 | return Replicate(value, num_bits, 8); | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||
| 179 | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||
| 180 | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||
| 181 | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||
| 182 | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||
| 183 | |||
| 184 | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||
| 185 | switch (num_bits) { | ||
| 186 | case 1: | ||
| 187 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 188 | case 2: | ||
| 189 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 190 | case 3: | ||
| 191 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 192 | case 4: | ||
| 193 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 194 | case 5: | ||
| 195 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 196 | default: | ||
| 197 | return Replicate(value, num_bits, 6); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | |||
| 11 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 201 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 12 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); | 202 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); |
| 13 | 203 | ||