diff options
| author | 2021-03-25 16:53:51 -0400 | |
|---|---|---|
| committer | 2021-03-25 16:53:51 -0400 | |
| commit | 2f83d9a61bca42d9ef24074beb2b11b19bd4cecd (patch) | |
| tree | 514e40eb750280c2e3025f9301befb6f8c9b46e9 /src/video_core/host_shaders | |
| parent | astc_decoder: Reimplement Layers (diff) | |
| download | yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.tar.gz yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.tar.xz yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.zip | |
astc_decoder: Refactor for style and more efficient memory use
Diffstat (limited to 'src/video_core/host_shaders')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 569 |
1 files changed, 307 insertions, 262 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index b903a2d37..703e34587 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -9,13 +9,13 @@ | |||
| 9 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { | 9 | #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { |
| 10 | #define END_PUSH_CONSTANTS }; | 10 | #define END_PUSH_CONSTANTS }; |
| 11 | #define UNIFORM(n) | 11 | #define UNIFORM(n) |
| 12 | #define BINDING_SWIZZLE_BUFFER 0 | 12 | #define BINDING_INPUT_BUFFER 0 |
| 13 | #define BINDING_INPUT_BUFFER 1 | 13 | #define BINDING_ENC_BUFFER 1 |
| 14 | #define BINDING_ENC_BUFFER 2 | 14 | #define BINDING_6_TO_8_BUFFER 2 |
| 15 | #define BINDING_6_TO_8_BUFFER 3 | 15 | #define BINDING_7_TO_8_BUFFER 3 |
| 16 | #define BINDING_7_TO_8_BUFFER 4 | 16 | #define BINDING_8_TO_8_BUFFER 4 |
| 17 | #define BINDING_8_TO_8_BUFFER 5 | 17 | #define BINDING_BYTE_TO_16_BUFFER 5 |
| 18 | #define BINDING_BYTE_TO_16_BUFFER 6 | 18 | #define BINDING_SWIZZLE_BUFFER 6 |
| 19 | #define BINDING_OUTPUT_IMAGE 7 | 19 | #define BINDING_OUTPUT_IMAGE 7 |
| 20 | 20 | ||
| 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv | 21 | #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv |
| @@ -37,28 +37,16 @@ | |||
| 37 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; | 37 | layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; |
| 38 | 38 | ||
| 39 | BEGIN_PUSH_CONSTANTS | 39 | BEGIN_PUSH_CONSTANTS |
| 40 | UNIFORM(0) uvec2 num_image_blocks; | ||
| 41 | UNIFORM(1) uvec2 block_dims; | 40 | UNIFORM(1) uvec2 block_dims; |
| 42 | 41 | ||
| 43 | UNIFORM(2) uvec3 origin; | 42 | UNIFORM(2) uint bytes_per_block_log2; |
| 44 | UNIFORM(3) ivec3 destination; | 43 | UNIFORM(3) uint layer_stride; |
| 45 | UNIFORM(4) uint bytes_per_block_log2; | 44 | UNIFORM(4) uint block_size; |
| 46 | UNIFORM(5) uint layer_stride; | 45 | UNIFORM(5) uint x_shift; |
| 47 | UNIFORM(6) uint block_size; | 46 | UNIFORM(6) uint block_height; |
| 48 | UNIFORM(7) uint x_shift; | 47 | UNIFORM(7) uint block_height_mask; |
| 49 | UNIFORM(8) uint block_height; | ||
| 50 | UNIFORM(9) uint block_height_mask; | ||
| 51 | END_PUSH_CONSTANTS | 48 | END_PUSH_CONSTANTS |
| 52 | 49 | ||
| 53 | uint current_index = 0; | ||
| 54 | int bitsread = 0; | ||
| 55 | uint total_bitsread = 0; | ||
| 56 | uint local_buff[16]; | ||
| 57 | |||
| 58 | const int JustBits = 0; | ||
| 59 | const int Quint = 1; | ||
| 60 | const int Trit = 2; | ||
| 61 | |||
| 62 | struct EncodingData { | 50 | struct EncodingData { |
| 63 | uint encoding; | 51 | uint encoding; |
| 64 | uint num_bits; | 52 | uint num_bits; |
| @@ -68,11 +56,11 @@ struct EncodingData { | |||
| 68 | 56 | ||
| 69 | struct TexelWeightParams { | 57 | struct TexelWeightParams { |
| 70 | uvec2 size; | 58 | uvec2 size; |
| 71 | bool dual_plane; | ||
| 72 | uint max_weight; | 59 | uint max_weight; |
| 73 | bool Error; | 60 | bool dual_plane; |
| 74 | bool VoidExtentLDR; | 61 | bool error_state; |
| 75 | bool VoidExtentHDR; | 62 | bool void_extent_ldr; |
| 63 | bool void_extent_hdr; | ||
| 76 | }; | 64 | }; |
| 77 | 65 | ||
| 78 | // Swizzle data | 66 | // Swizzle data |
| @@ -116,6 +104,75 @@ const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHI | |||
| 116 | 104 | ||
| 117 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); | 105 | const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1); |
| 118 | 106 | ||
| 107 | const int BLOCK_SIZE_IN_BYTES = 16; | ||
| 108 | |||
| 109 | const int BLOCK_INFO_ERROR = 0; | ||
| 110 | const int BLOCK_INFO_VOID_EXTENT_HDR = 1; | ||
| 111 | const int BLOCK_INFO_VOID_EXTENT_LDR = 2; | ||
| 112 | const int BLOCK_INFO_NORMAL = 3; | ||
| 113 | |||
| 114 | const int JUST_BITS = 0; | ||
| 115 | const int QUINT = 1; | ||
| 116 | const int TRIT = 2; | ||
| 117 | |||
| 118 | // The following constants are expanded variants of the Replicate() | ||
| 119 | // function calls corresponding to the following arguments: | ||
| 120 | // value: index into the generated table | ||
| 121 | // num_bits: the after "REPLICATE" in the table name. i.e. 4 is num_bits in REPLICATE_4. | ||
| 122 | // to_bit: the integer after "TO_" | ||
| 123 | const uint REPLICATE_BIT_TO_7_TABLE[2] = uint[](0, 127); | ||
| 124 | const uint REPLICATE_1_BIT_TO_9_TABLE[2] = uint[](0, 511); | ||
| 125 | |||
| 126 | const uint REPLICATE_1_BIT_TO_8_TABLE[2] = uint[](0, 255); | ||
| 127 | const uint REPLICATE_2_BIT_TO_8_TABLE[4] = uint[](0, 85, 170, 255); | ||
| 128 | const uint REPLICATE_3_BIT_TO_8_TABLE[8] = uint[](0, 36, 73, 109, 146, 182, 219, 255); | ||
| 129 | const uint REPLICATE_4_BIT_TO_8_TABLE[16] = | ||
| 130 | uint[](0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255); | ||
| 131 | const uint REPLICATE_5_BIT_TO_8_TABLE[32] = | ||
| 132 | uint[](0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, | ||
| 133 | 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255); | ||
| 134 | const uint REPLICATE_1_BIT_TO_6_TABLE[2] = uint[](0, 63); | ||
| 135 | const uint REPLICATE_2_BIT_TO_6_TABLE[4] = uint[](0, 21, 42, 63); | ||
| 136 | const uint REPLICATE_3_BIT_TO_6_TABLE[8] = uint[](0, 9, 18, 27, 36, 45, 54, 63); | ||
| 137 | const uint REPLICATE_4_BIT_TO_6_TABLE[16] = | ||
| 138 | uint[](0, 4, 8, 12, 17, 21, 25, 29, 34, 38, 42, 46, 51, 55, 59, 63); | ||
| 139 | const uint REPLICATE_5_BIT_TO_6_TABLE[32] = | ||
| 140 | uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, | ||
| 141 | 47, 49, 51, 53, 55, 57, 59, 61, 63); | ||
| 142 | |||
| 143 | // Input ASTC texture globals | ||
| 144 | uint current_index = 0; | ||
| 145 | int bitsread = 0; | ||
| 146 | uint total_bitsread = 0; | ||
| 147 | uint local_buff[16]; | ||
| 148 | |||
| 149 | // Color data globals | ||
| 150 | uint color_endpoint_data[16]; | ||
| 151 | int color_bitsread = 0; | ||
| 152 | uint total_color_bitsread = 0; | ||
| 153 | int color_index = 0; | ||
| 154 | |||
| 155 | // Four values, two endpoints, four maximum paritions | ||
| 156 | uint color_values[32]; | ||
| 157 | int colvals_index = 0; | ||
| 158 | |||
| 159 | // Weight data globals | ||
| 160 | uint texel_weight_data[16]; | ||
| 161 | int texel_bitsread = 0; | ||
| 162 | uint total_texel_bitsread = 0; | ||
| 163 | int texel_index = 0; | ||
| 164 | |||
| 165 | bool texel_flag = false; | ||
| 166 | |||
| 167 | // Global "vectors" to be pushed into when decoding | ||
| 168 | EncodingData result_vector[100]; | ||
| 169 | int result_index = 0; | ||
| 170 | |||
| 171 | EncodingData texel_vector[100]; | ||
| 172 | int texel_vector_index = 0; | ||
| 173 | |||
| 174 | uint unquantized_texel_weights[2][144]; | ||
| 175 | |||
| 119 | uint SwizzleOffset(uvec2 pos) { | 176 | uint SwizzleOffset(uvec2 pos) { |
| 120 | pos = pos & SWIZZLE_MASK; | 177 | pos = pos & SWIZZLE_MASK; |
| 121 | return swizzle_table[pos.y * 64 + pos.x]; | 178 | return swizzle_table[pos.y * 64 + pos.x]; |
| @@ -126,21 +183,10 @@ uint ReadTexel(uint offset) { | |||
| 126 | return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); | 183 | return bitfieldExtract(astc_data[offset / 4], int((offset * 8) & 24), 8); |
| 127 | } | 184 | } |
| 128 | 185 | ||
| 129 | 186 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | |
| 130 | const int BLOCK_SIZE_IN_BYTES = 16; | 187 | // is the same as [(num_bits - 1):0] and repeats all the way down. |
| 131 | |||
| 132 | const int BLOCK_INFO_ERROR = 0; | ||
| 133 | const int BLOCK_INFO_VOID_EXTENT_HDR = 1; | ||
| 134 | const int BLOCK_INFO_VOID_EXTENT_LDR = 2; | ||
| 135 | const int BLOCK_INFO_NORMAL = 3; | ||
| 136 | |||
| 137 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | ||
| 138 | // is the same as [(numBits - 1):0] and repeats all the way down. | ||
| 139 | uint Replicate(uint val, uint num_bits, uint to_bit) { | 188 | uint Replicate(uint val, uint num_bits, uint to_bit) { |
| 140 | if (num_bits == 0) { | 189 | if (num_bits == 0 || to_bit == 0) { |
| 141 | return 0; | ||
| 142 | } | ||
| 143 | if (to_bit == 0) { | ||
| 144 | return 0; | 190 | return 0; |
| 145 | } | 191 | } |
| 146 | const uint v = val & uint((1 << num_bits) - 1); | 192 | const uint v = val & uint((1 << num_bits) - 1); |
| @@ -165,26 +211,14 @@ uvec4 ReplicateByteTo16(uvec4 value) { | |||
| 165 | REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); | 211 | REPLICATE_BYTE_TO_16_TABLE[value.z], REPLICATE_BYTE_TO_16_TABLE[value.w]); |
| 166 | } | 212 | } |
| 167 | 213 | ||
| 168 | const uint REPLICATE_BIT_TO_7_TABLE[2] = uint[](0, 127); | ||
| 169 | uint ReplicateBitTo7(uint value) { | 214 | uint ReplicateBitTo7(uint value) { |
| 170 | return REPLICATE_BIT_TO_7_TABLE[value]; | 215 | return REPLICATE_BIT_TO_7_TABLE[value]; |
| 171 | ; | ||
| 172 | } | 216 | } |
| 173 | 217 | ||
| 174 | const uint REPLICATE_1_BIT_TO_9_TABLE[2] = uint[](0, 511); | ||
| 175 | uint ReplicateBitTo9(uint value) { | 218 | uint ReplicateBitTo9(uint value) { |
| 176 | return REPLICATE_1_BIT_TO_9_TABLE[value]; | 219 | return REPLICATE_1_BIT_TO_9_TABLE[value]; |
| 177 | } | 220 | } |
| 178 | 221 | ||
| 179 | const uint REPLICATE_1_BIT_TO_8_TABLE[2] = uint[](0, 255); | ||
| 180 | const uint REPLICATE_2_BIT_TO_8_TABLE[4] = uint[](0, 85, 170, 255); | ||
| 181 | const uint REPLICATE_3_BIT_TO_8_TABLE[8] = uint[](0, 36, 73, 109, 146, 182, 219, 255); | ||
| 182 | const uint REPLICATE_4_BIT_TO_8_TABLE[16] = | ||
| 183 | uint[](0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255); | ||
| 184 | const uint REPLICATE_5_BIT_TO_8_TABLE[32] = | ||
| 185 | uint[](0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, | ||
| 186 | 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255); | ||
| 187 | |||
| 188 | uint FastReplicateTo8(uint value, uint num_bits) { | 222 | uint FastReplicateTo8(uint value, uint num_bits) { |
| 189 | switch (num_bits) { | 223 | switch (num_bits) { |
| 190 | case 1: | 224 | case 1: |
| @@ -207,15 +241,6 @@ uint FastReplicateTo8(uint value, uint num_bits) { | |||
| 207 | return Replicate(value, num_bits, 8); | 241 | return Replicate(value, num_bits, 8); |
| 208 | } | 242 | } |
| 209 | 243 | ||
| 210 | const uint REPLICATE_1_BIT_TO_6_TABLE[2] = uint[](0, 63); | ||
| 211 | const uint REPLICATE_2_BIT_TO_6_TABLE[4] = uint[](0, 21, 42, 63); | ||
| 212 | const uint REPLICATE_3_BIT_TO_6_TABLE[8] = uint[](0, 9, 18, 27, 36, 45, 54, 63); | ||
| 213 | const uint REPLICATE_4_BIT_TO_6_TABLE[16] = | ||
| 214 | uint[](0, 4, 8, 12, 17, 21, 25, 29, 34, 38, 42, 46, 51, 55, 59, 63); | ||
| 215 | const uint REPLICATE_5_BIT_TO_6_TABLE[32] = | ||
| 216 | uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 33, 35, 37, 39, 41, 43, 45, | ||
| 217 | 47, 49, 51, 53, 55, 57, 59, 61, 63); | ||
| 218 | |||
| 219 | uint FastReplicateTo6(uint value, uint num_bits) { | 244 | uint FastReplicateTo6(uint value, uint num_bits) { |
| 220 | switch (num_bits) { | 245 | switch (num_bits) { |
| 221 | case 1: | 246 | case 1: |
| @@ -232,7 +257,23 @@ uint FastReplicateTo6(uint value, uint num_bits) { | |||
| 232 | return Replicate(value, num_bits, 6); | 257 | return Replicate(value, num_bits, 6); |
| 233 | } | 258 | } |
| 234 | 259 | ||
| 235 | uint hash52(uint p) { | 260 | uint Div3Floor(uint v) { |
| 261 | return (v * 0x5556) >> 16; | ||
| 262 | } | ||
| 263 | |||
| 264 | uint Div3Ceil(uint v) { | ||
| 265 | return Div3Floor(v + 2); | ||
| 266 | } | ||
| 267 | |||
| 268 | uint Div5Floor(uint v) { | ||
| 269 | return (v * 0x3334) >> 16; | ||
| 270 | } | ||
| 271 | |||
| 272 | uint Div5Ceil(uint v) { | ||
| 273 | return Div5Floor(v + 4); | ||
| 274 | } | ||
| 275 | |||
| 276 | uint Hash52(uint p) { | ||
| 236 | p ^= p >> 15; | 277 | p ^= p >> 15; |
| 237 | p -= p << 17; | 278 | p -= p << 17; |
| 238 | p += p << 7; | 279 | p += p << 7; |
| @@ -247,9 +288,9 @@ uint hash52(uint p) { | |||
| 247 | } | 288 | } |
| 248 | 289 | ||
| 249 | uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bool small_block) { | 290 | uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bool small_block) { |
| 250 | if (1 == partition_count) | 291 | if (partition_count == 1) { |
| 251 | return 0; | 292 | return 0; |
| 252 | 293 | } | |
| 253 | if (small_block) { | 294 | if (small_block) { |
| 254 | x <<= 1; | 295 | x <<= 1; |
| 255 | y <<= 1; | 296 | y <<= 1; |
| @@ -258,7 +299,7 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo | |||
| 258 | 299 | ||
| 259 | seed += (partition_count - 1) * 1024; | 300 | seed += (partition_count - 1) * 1024; |
| 260 | 301 | ||
| 261 | uint rnum = hash52(uint(seed)); | 302 | uint rnum = Hash52(uint(seed)); |
| 262 | uint seed1 = uint(rnum & 0xF); | 303 | uint seed1 = uint(rnum & 0xF); |
| 263 | uint seed2 = uint((rnum >> 4) & 0xF); | 304 | uint seed2 = uint((rnum >> 4) & 0xF); |
| 264 | uint seed3 = uint((rnum >> 8) & 0xF); | 305 | uint seed3 = uint((rnum >> 8) & 0xF); |
| @@ -318,18 +359,22 @@ uint SelectPartition(uint seed, uint x, uint y, uint z, uint partition_count, bo | |||
| 318 | c &= 0x3F; | 359 | c &= 0x3F; |
| 319 | d &= 0x3F; | 360 | d &= 0x3F; |
| 320 | 361 | ||
| 321 | if (partition_count < 4) | 362 | if (partition_count < 4) { |
| 322 | d = 0; | 363 | d = 0; |
| 323 | if (partition_count < 3) | 364 | } |
| 365 | if (partition_count < 3) { | ||
| 324 | c = 0; | 366 | c = 0; |
| 367 | } | ||
| 325 | 368 | ||
| 326 | if (a >= b && a >= c && a >= d) | 369 | if (a >= b && a >= c && a >= d) { |
| 327 | return 0; | 370 | return 0; |
| 328 | else if (b >= c && b >= d) | 371 | } else if (b >= c && b >= d) { |
| 329 | return 1; | 372 | return 1; |
| 330 | else if (c >= d) | 373 | } else if (c >= d) { |
| 331 | return 2; | 374 | return 2; |
| 332 | return 3; | 375 | } else { |
| 376 | return 3; | ||
| 377 | } | ||
| 333 | } | 378 | } |
| 334 | 379 | ||
| 335 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) { | 380 | uint Select2DPartition(uint seed, uint x, uint y, uint partition_count, bool small_block) { |
| @@ -341,10 +386,10 @@ uint ReadBit() { | |||
| 341 | return 0; | 386 | return 0; |
| 342 | } | 387 | } |
| 343 | uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1); | 388 | uint bit = bitfieldExtract(local_buff[current_index], bitsread, 1); |
| 344 | bitsread++; | 389 | ++bitsread; |
| 345 | total_bitsread++; | 390 | ++total_bitsread; |
| 346 | if (bitsread == 8) { | 391 | if (bitsread == 8) { |
| 347 | current_index++; | 392 | ++current_index; |
| 348 | bitsread = 0; | 393 | bitsread = 0; |
| 349 | } | 394 | } |
| 350 | return bit; | 395 | return bit; |
| @@ -358,36 +403,22 @@ uint StreamBits(uint num_bits) { | |||
| 358 | return ret; | 403 | return ret; |
| 359 | } | 404 | } |
| 360 | 405 | ||
| 361 | // Define color data. | ||
| 362 | uint color_endpoint_data[16]; | ||
| 363 | int color_bitsread = 0; | ||
| 364 | uint total_color_bitsread = 0; | ||
| 365 | int color_index = 0; | ||
| 366 | |||
| 367 | // Define color data. | ||
| 368 | uint texel_weight_data[16]; | ||
| 369 | int texel_bitsread = 0; | ||
| 370 | uint total_texel_bitsread = 0; | ||
| 371 | int texel_index = 0; | ||
| 372 | |||
| 373 | bool texel_flag = false; | ||
| 374 | |||
| 375 | uint ReadColorBit() { | 406 | uint ReadColorBit() { |
| 376 | uint bit = 0; | 407 | uint bit = 0; |
| 377 | if (texel_flag) { | 408 | if (texel_flag) { |
| 378 | bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1); | 409 | bit = bitfieldExtract(texel_weight_data[texel_index], texel_bitsread, 1); |
| 379 | texel_bitsread++; | 410 | ++texel_bitsread; |
| 380 | total_texel_bitsread++; | 411 | ++total_texel_bitsread; |
| 381 | if (texel_bitsread == 8) { | 412 | if (texel_bitsread == 8) { |
| 382 | texel_index++; | 413 | ++texel_index; |
| 383 | texel_bitsread = 0; | 414 | texel_bitsread = 0; |
| 384 | } | 415 | } |
| 385 | } else { | 416 | } else { |
| 386 | bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1); | 417 | bit = bitfieldExtract(color_endpoint_data[color_index], color_bitsread, 1); |
| 387 | color_bitsread++; | 418 | ++color_bitsread; |
| 388 | total_color_bitsread++; | 419 | ++total_color_bitsread; |
| 389 | if (color_bitsread == 8) { | 420 | if (color_bitsread == 8) { |
| 390 | color_index++; | 421 | ++color_index; |
| 391 | color_bitsread = 0; | 422 | color_bitsread = 0; |
| 392 | } | 423 | } |
| 393 | } | 424 | } |
| @@ -402,31 +433,25 @@ uint StreamColorBits(uint num_bits) { | |||
| 402 | return ret; | 433 | return ret; |
| 403 | } | 434 | } |
| 404 | 435 | ||
| 405 | EncodingData result_vector[100]; | ||
| 406 | int result_index = 0; | ||
| 407 | |||
| 408 | EncodingData texel_vector[100]; | ||
| 409 | int texel_vector_index = 0; | ||
| 410 | |||
| 411 | void ResultEmplaceBack(EncodingData val) { | 436 | void ResultEmplaceBack(EncodingData val) { |
| 412 | if (texel_flag) { | 437 | if (texel_flag) { |
| 413 | texel_vector[texel_vector_index] = val; | 438 | texel_vector[texel_vector_index] = val; |
| 414 | texel_vector_index++; | 439 | ++texel_vector_index; |
| 415 | } else { | 440 | } else { |
| 416 | result_vector[result_index] = val; | 441 | result_vector[result_index] = val; |
| 417 | result_index++; | 442 | ++result_index; |
| 418 | } | 443 | } |
| 419 | } | 444 | } |
| 420 | 445 | ||
| 421 | // Returns the number of bits required to encode n_vals values. | 446 | // Returns the number of bits required to encode n_vals values. |
| 422 | uint GetBitLength(uint n_vals, uint encoding_index) { | 447 | uint GetBitLength(uint n_vals, uint encoding_index) { |
| 423 | uint totalBits = encoding_values[encoding_index].num_bits * n_vals; | 448 | uint total_bits = encoding_values[encoding_index].num_bits * n_vals; |
| 424 | if (encoding_values[encoding_index].encoding == Trit) { | 449 | if (encoding_values[encoding_index].encoding == TRIT) { |
| 425 | totalBits += (n_vals * 8 + 4) / 5; | 450 | total_bits += Div5Ceil(n_vals * 8); |
| 426 | } else if (encoding_values[encoding_index].encoding == Quint) { | 451 | } else if (encoding_values[encoding_index].encoding == QUINT) { |
| 427 | totalBits += (n_vals * 7 + 2) / 3; | 452 | total_bits += Div3Ceil(n_vals * 7); |
| 428 | } | 453 | } |
| 429 | return totalBits; | 454 | return total_bits; |
| 430 | } | 455 | } |
| 431 | 456 | ||
| 432 | uint GetNumWeightValues(uvec2 size, bool dual_plane) { | 457 | uint GetNumWeightValues(uvec2 size, bool dual_plane) { |
| @@ -459,7 +484,7 @@ uint BitsOp(uint bits, uint start, uint end) { | |||
| 459 | return ((bits >> start) & mask); | 484 | return ((bits >> start) & mask); |
| 460 | } | 485 | } |
| 461 | 486 | ||
| 462 | void DecodeQuintBlock(uint num_bits) { // Value number of bits | 487 | void DecodeQuintBlock(uint num_bits) { |
| 463 | uint m[3]; | 488 | uint m[3]; |
| 464 | uint q[3]; | 489 | uint q[3]; |
| 465 | uint Q; | 490 | uint Q; |
| @@ -483,7 +508,6 @@ void DecodeQuintBlock(uint num_bits) { // Value number of bits | |||
| 483 | q[2] = BitsOp(Q, 5, 6); | 508 | q[2] = BitsOp(Q, 5, 6); |
| 484 | C = BitsOp(Q, 0, 4); | 509 | C = BitsOp(Q, 0, 4); |
| 485 | } | 510 | } |
| 486 | |||
| 487 | if (BitsOp(C, 0, 2) == 5) { | 511 | if (BitsOp(C, 0, 2) == 5) { |
| 488 | q[1] = 4; | 512 | q[1] = 4; |
| 489 | q[0] = BitsOp(C, 3, 4); | 513 | q[0] = BitsOp(C, 3, 4); |
| @@ -492,10 +516,9 @@ void DecodeQuintBlock(uint num_bits) { // Value number of bits | |||
| 492 | q[0] = BitsOp(C, 0, 2); | 516 | q[0] = BitsOp(C, 0, 2); |
| 493 | } | 517 | } |
| 494 | } | 518 | } |
| 495 | |||
| 496 | for (uint i = 0; i < 3; i++) { | 519 | for (uint i = 0; i < 3; i++) { |
| 497 | EncodingData val; | 520 | EncodingData val; |
| 498 | val.encoding = Quint; | 521 | val.encoding = QUINT; |
| 499 | val.num_bits = num_bits; | 522 | val.num_bits = num_bits; |
| 500 | val.bit_value = m[i]; | 523 | val.bit_value = m[i]; |
| 501 | val.quint_trit_value = q[i]; | 524 | val.quint_trit_value = q[i]; |
| @@ -547,29 +570,28 @@ void DecodeTritBlock(uint num_bits) { | |||
| 547 | } | 570 | } |
| 548 | for (uint i = 0; i < 5; i++) { | 571 | for (uint i = 0; i < 5; i++) { |
| 549 | EncodingData val; | 572 | EncodingData val; |
| 550 | val.encoding = Trit; | 573 | val.encoding = TRIT; |
| 551 | val.num_bits = num_bits; | 574 | val.num_bits = num_bits; |
| 552 | val.bit_value = m[i]; | 575 | val.bit_value = m[i]; |
| 553 | val.quint_trit_value = t[i]; | 576 | val.quint_trit_value = t[i]; |
| 554 | ResultEmplaceBack(val); | 577 | ResultEmplaceBack(val); |
| 555 | } | 578 | } |
| 556 | } | 579 | } |
| 580 | |||
| 557 | void DecodeIntegerSequence(uint max_range, uint num_values) { | 581 | void DecodeIntegerSequence(uint max_range, uint num_values) { |
| 558 | EncodingData val = encoding_values[max_range]; | 582 | EncodingData val = encoding_values[max_range]; |
| 559 | uint vals_decoded = 0; | 583 | uint vals_decoded = 0; |
| 560 | while (vals_decoded < num_values) { | 584 | while (vals_decoded < num_values) { |
| 561 | switch (val.encoding) { | 585 | switch (val.encoding) { |
| 562 | case Quint: | 586 | case QUINT: |
| 563 | DecodeQuintBlock(val.num_bits); | 587 | DecodeQuintBlock(val.num_bits); |
| 564 | vals_decoded += 3; | 588 | vals_decoded += 3; |
| 565 | break; | 589 | break; |
| 566 | 590 | case TRIT: | |
| 567 | case Trit: | ||
| 568 | DecodeTritBlock(val.num_bits); | 591 | DecodeTritBlock(val.num_bits); |
| 569 | vals_decoded += 5; | 592 | vals_decoded += 5; |
| 570 | break; | 593 | break; |
| 571 | 594 | case JUST_BITS: | |
| 572 | case JustBits: | ||
| 573 | val.bit_value = StreamColorBits(val.num_bits); | 595 | val.bit_value = StreamColorBits(val.num_bits); |
| 574 | ResultEmplaceBack(val); | 596 | ResultEmplaceBack(val); |
| 575 | vals_decoded++; | 597 | vals_decoded++; |
| @@ -578,8 +600,7 @@ void DecodeIntegerSequence(uint max_range, uint num_values) { | |||
| 578 | } | 600 | } |
| 579 | } | 601 | } |
| 580 | 602 | ||
| 581 | void DecodeColorValues(out uint color_values[32], uvec4 modes, uint num_partitions, | 603 | void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) { |
| 582 | uint color_data_bits) { | ||
| 583 | uint num_values = 0; | 604 | uint num_values = 0; |
| 584 | for (uint i = 0; i < num_partitions; i++) { | 605 | for (uint i = 0; i < num_partitions; i++) { |
| 585 | num_values += ((modes[i] >> 2) + 1) << 1; | 606 | num_values += ((modes[i] >> 2) + 1) << 1; |
| @@ -587,21 +608,21 @@ void DecodeColorValues(out uint color_values[32], uvec4 modes, uint num_partitio | |||
| 587 | int range = 256; | 608 | int range = 256; |
| 588 | while (--range > 0) { | 609 | while (--range > 0) { |
| 589 | EncodingData val = encoding_values[range]; | 610 | EncodingData val = encoding_values[range]; |
| 590 | uint bitLength = GetBitLength(num_values, range); | 611 | uint bit_length = GetBitLength(num_values, range); |
| 591 | if (bitLength <= color_data_bits) { | 612 | if (bit_length <= color_data_bits) { |
| 592 | while (--range > 0) { | 613 | while (--range > 0) { |
| 593 | EncodingData newval = encoding_values[range]; | 614 | EncodingData newval = encoding_values[range]; |
| 594 | if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) { | 615 | if (newval.encoding != val.encoding && newval.num_bits != val.num_bits) { |
| 595 | break; | 616 | break; |
| 596 | } | 617 | } |
| 597 | } | 618 | } |
| 598 | range++; | 619 | ++range; |
| 599 | break; | 620 | break; |
| 600 | } | 621 | } |
| 601 | } | 622 | } |
| 602 | DecodeIntegerSequence(range, num_values); | 623 | DecodeIntegerSequence(range, num_values); |
| 603 | uint out_index = 0; | 624 | uint out_index = 0; |
| 604 | for (int itr = 0; itr < result_index; itr++) { | 625 | for (int itr = 0; itr < result_index; ++itr) { |
| 605 | if (out_index >= num_values) { | 626 | if (out_index >= num_values) { |
| 606 | break; | 627 | break; |
| 607 | } | 628 | } |
| @@ -611,77 +632,83 @@ void DecodeColorValues(out uint color_values[32], uvec4 modes, uint num_partitio | |||
| 611 | uint A = 0, B = 0, C = 0, D = 0; | 632 | uint A = 0, B = 0, C = 0, D = 0; |
| 612 | A = ReplicateBitTo9((bitval & 1)); | 633 | A = ReplicateBitTo9((bitval & 1)); |
| 613 | switch (val.encoding) { | 634 | switch (val.encoding) { |
| 614 | case JustBits: | 635 | case JUST_BITS: |
| 615 | color_values[out_index++] = FastReplicateTo8(bitval, bitlen); | 636 | color_values[out_index++] = FastReplicateTo8(bitval, bitlen); |
| 616 | break; | 637 | break; |
| 617 | case Trit: { | 638 | case TRIT: { |
| 618 | D = val.quint_trit_value; | 639 | D = val.quint_trit_value; |
| 619 | switch (bitlen) { | 640 | switch (bitlen) { |
| 620 | case 1: { | 641 | case 1: |
| 621 | C = 204; | 642 | C = 204; |
| 622 | } break; | 643 | break; |
| 623 | case 2: { | 644 | case 2: { |
| 624 | C = 93; | 645 | C = 93; |
| 625 | uint b = (bitval >> 1) & 1; | 646 | uint b = (bitval >> 1) & 1; |
| 626 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | 647 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); |
| 627 | } break; | 648 | break; |
| 628 | 649 | } | |
| 629 | case 3: { | 650 | case 3: { |
| 630 | C = 44; | 651 | C = 44; |
| 631 | uint cb = (bitval >> 1) & 3; | 652 | uint cb = (bitval >> 1) & 3; |
| 632 | B = (cb << 7) | (cb << 2) | cb; | 653 | B = (cb << 7) | (cb << 2) | cb; |
| 633 | } break; | 654 | break; |
| 634 | 655 | } | |
| 635 | case 4: { | 656 | case 4: { |
| 636 | C = 22; | 657 | C = 22; |
| 637 | uint dcb = (bitval >> 1) & 7; | 658 | uint dcb = (bitval >> 1) & 7; |
| 638 | B = (dcb << 6) | dcb; | 659 | B = (dcb << 6) | dcb; |
| 639 | } break; | 660 | break; |
| 640 | 661 | } | |
| 641 | case 5: { | 662 | case 5: { |
| 642 | C = 11; | 663 | C = 11; |
| 643 | uint edcb = (bitval >> 1) & 0xF; | 664 | uint edcb = (bitval >> 1) & 0xF; |
| 644 | B = (edcb << 5) | (edcb >> 2); | 665 | B = (edcb << 5) | (edcb >> 2); |
| 645 | } break; | 666 | break; |
| 646 | 667 | } | |
| 647 | case 6: { | 668 | case 6: { |
| 648 | C = 5; | 669 | C = 5; |
| 649 | uint fedcb = (bitval >> 1) & 0x1F; | 670 | uint fedcb = (bitval >> 1) & 0x1F; |
| 650 | B = (fedcb << 4) | (fedcb >> 4); | 671 | B = (fedcb << 4) | (fedcb >> 4); |
| 651 | } break; | 672 | break; |
| 652 | } | 673 | } |
| 653 | } break; | 674 | } |
| 654 | case Quint: { | 675 | break; |
| 676 | } | ||
| 677 | case QUINT: { | ||
| 655 | D = val.quint_trit_value; | 678 | D = val.quint_trit_value; |
| 656 | switch (bitlen) { | 679 | switch (bitlen) { |
| 657 | case 1: { | 680 | case 1: |
| 658 | C = 113; | 681 | C = 113; |
| 659 | } break; | 682 | break; |
| 660 | case 2: { | 683 | case 2: { |
| 661 | C = 54; | 684 | C = 54; |
| 662 | uint b = (bitval >> 1) & 1; | 685 | uint b = (bitval >> 1) & 1; |
| 663 | B = (b << 8) | (b << 3) | (b << 2); | 686 | B = (b << 8) | (b << 3) | (b << 2); |
| 664 | } break; | 687 | break; |
| 688 | } | ||
| 665 | case 3: { | 689 | case 3: { |
| 666 | C = 26; | 690 | C = 26; |
| 667 | uint cb = (bitval >> 1) & 3; | 691 | uint cb = (bitval >> 1) & 3; |
| 668 | B = (cb << 7) | (cb << 1) | (cb >> 1); | 692 | B = (cb << 7) | (cb << 1) | (cb >> 1); |
| 669 | } break; | 693 | break; |
| 694 | } | ||
| 670 | case 4: { | 695 | case 4: { |
| 671 | C = 13; | 696 | C = 13; |
| 672 | uint dcb = (bitval >> 1) & 7; | 697 | uint dcb = (bitval >> 1) & 7; |
| 673 | B = (dcb << 6) | (dcb >> 1); | 698 | B = (dcb << 6) | (dcb >> 1); |
| 674 | } break; | 699 | break; |
| 700 | } | ||
| 675 | case 5: { | 701 | case 5: { |
| 676 | C = 6; | 702 | C = 6; |
| 677 | uint edcb = (bitval >> 1) & 0xF; | 703 | uint edcb = (bitval >> 1) & 0xF; |
| 678 | B = (edcb << 5) | (edcb >> 3); | 704 | B = (edcb << 5) | (edcb >> 3); |
| 679 | } break; | 705 | break; |
| 680 | } | 706 | } |
| 681 | } break; | 707 | } |
| 708 | break; | ||
| 682 | } | 709 | } |
| 683 | 710 | } | |
| 684 | if (val.encoding != JustBits) { | 711 | if (val.encoding != JUST_BITS) { |
| 685 | uint T = (D * C) + B; | 712 | uint T = (D * C) + B; |
| 686 | T ^= A; | 713 | T ^= A; |
| 687 | T = (A & 0x80) | (T >> 2); | 714 | T = (A & 0x80) | (T >> 2); |
| @@ -689,30 +716,31 @@ void DecodeColorValues(out uint color_values[32], uvec4 modes, uint num_partitio | |||
| 689 | } | 716 | } |
| 690 | } | 717 | } |
| 691 | } | 718 | } |
| 719 | |||
| 692 | ivec2 BitTransferSigned(int a, int b) { | 720 | ivec2 BitTransferSigned(int a, int b) { |
| 693 | ivec2 transferred; | 721 | ivec2 transferred; |
| 694 | transferred[1] = b >> 1; | 722 | transferred.y = b >> 1; |
| 695 | transferred[1] |= a & 0x80; | 723 | transferred.y |= a & 0x80; |
| 696 | transferred[0] = a >> 1; | 724 | transferred.x = a >> 1; |
| 697 | transferred[0] &= 0x3F; | 725 | transferred.x &= 0x3F; |
| 698 | if ((transferred[0] & 0x20) > 0) { | 726 | if ((transferred.x & 0x20) > 0) { |
| 699 | transferred[0] -= 0x40; | 727 | transferred.x -= 0x40; |
| 700 | } | 728 | } |
| 701 | return transferred; | 729 | return transferred; |
| 702 | } | 730 | } |
| 703 | 731 | ||
| 704 | uvec4 ClampByte(ivec4 color) { | 732 | uvec4 ClampByte(ivec4 color) { |
| 705 | for (uint i = 0; i < 4; i++) { | 733 | for (uint i = 0; i < 4; ++i) { |
| 706 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | 734 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); |
| 707 | } | 735 | } |
| 708 | return uvec4(color); | 736 | return uvec4(color); |
| 709 | } | 737 | } |
| 738 | |||
| 710 | ivec4 BlueContract(int a, int r, int g, int b) { | 739 | ivec4 BlueContract(int a, int r, int g, int b) { |
| 711 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); | 740 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); |
| 712 | } | 741 | } |
| 713 | int colvals_index = 0; | 742 | |
| 714 | void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_values[32], | 743 | void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode) { |
| 715 | uint color_endpoint_mode) { | ||
| 716 | #define READ_UINT_VALUES(N) \ | 744 | #define READ_UINT_VALUES(N) \ |
| 717 | uint v[N]; \ | 745 | uint v[N]; \ |
| 718 | for (uint i = 0; i < N; i++) { \ | 746 | for (uint i = 0; i < N; i++) { \ |
| @@ -730,113 +758,120 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_values[32], | |||
| 730 | READ_UINT_VALUES(2) | 758 | READ_UINT_VALUES(2) |
| 731 | ep1 = uvec4(0xFF, v[0], v[0], v[0]); | 759 | ep1 = uvec4(0xFF, v[0], v[0], v[0]); |
| 732 | ep2 = uvec4(0xFF, v[1], v[1], v[1]); | 760 | ep2 = uvec4(0xFF, v[1], v[1], v[1]); |
| 733 | } break; | 761 | break; |
| 734 | 762 | } | |
| 735 | case 1: { | 763 | case 1: { |
| 736 | READ_UINT_VALUES(2) | 764 | READ_UINT_VALUES(2) |
| 737 | uint L0 = (v[0] >> 2) | (v[1] & 0xC0); | 765 | uint L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 738 | uint L1 = max(L0 + (v[1] & 0x3F), 0xFFU); | 766 | uint L1 = max(L0 + (v[1] & 0x3F), 0xFFU); |
| 739 | ep1 = uvec4(0xFF, L0, L0, L0); | 767 | ep1 = uvec4(0xFF, L0, L0, L0); |
| 740 | ep2 = uvec4(0xFF, L1, L1, L1); | 768 | ep2 = uvec4(0xFF, L1, L1, L1); |
| 741 | } break; | 769 | break; |
| 742 | 770 | } | |
| 743 | case 4: { | 771 | case 4: { |
| 744 | READ_UINT_VALUES(4) | 772 | READ_UINT_VALUES(4) |
| 745 | ep1 = uvec4(v[2], v[0], v[0], v[0]); | 773 | ep1 = uvec4(v[2], v[0], v[0], v[0]); |
| 746 | ep2 = uvec4(v[3], v[1], v[1], v[1]); | 774 | ep2 = uvec4(v[3], v[1], v[1], v[1]); |
| 747 | } break; | 775 | break; |
| 748 | 776 | } | |
| 749 | case 5: { | 777 | case 5: { |
| 750 | READ_INT_VALUES(4) | 778 | READ_INT_VALUES(4) |
| 751 | ivec2 transferred = BitTransferSigned(v[1], v[0]); | 779 | ivec2 transferred = BitTransferSigned(v[1], v[0]); |
| 752 | v[1] = transferred[0]; | 780 | v[1] = transferred.x; |
| 753 | v[0] = transferred[1]; | 781 | v[0] = transferred.y; |
| 754 | transferred = BitTransferSigned(v[3], v[2]); | 782 | transferred = BitTransferSigned(v[3], v[2]); |
| 755 | v[3] = transferred[0]; | 783 | v[3] = transferred.x; |
| 756 | v[2] = transferred[1]; | 784 | v[2] = transferred.y; |
| 757 | ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); | 785 | ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); |
| 758 | ep2 = ClampByte(ivec4((v[2] + v[3]), v[0] + v[1], v[0] + v[1], v[0] + v[1])); | 786 | ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); |
| 759 | } break; | 787 | break; |
| 760 | 788 | } | |
| 761 | case 6: { | 789 | case 6: { |
| 762 | READ_UINT_VALUES(4) | 790 | READ_UINT_VALUES(4) |
| 763 | ep1 = uvec4(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | 791 | ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |
| 764 | ep2 = uvec4(0xFF, v[0], v[1], v[2]); | 792 | ep2 = uvec4(0xFF, v[0], v[1], v[2]); |
| 765 | } break; | 793 | break; |
| 766 | 794 | } | |
| 767 | case 8: { | 795 | case 8: { |
| 768 | READ_UINT_VALUES(6) | 796 | READ_UINT_VALUES(6) |
| 769 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | 797 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |
| 770 | ep1 = uvec4(0xFF, v[0], v[2], v[4]); | 798 | ep1 = uvec4(0xFF, v[0], v[2], v[4]); |
| 771 | ep2 = uvec4(0xFF, v[1], v[3], v[5]); | 799 | ep2 = uvec4(0xFF, v[1], v[3], v[5]); |
| 772 | } else { | 800 | } else { |
| 773 | ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); | 801 | ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); |
| 774 | ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); | 802 | ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); |
| 775 | } | 803 | } |
| 776 | } break; | 804 | break; |
| 777 | 805 | } | |
| 778 | case 9: { | 806 | case 9: { |
| 779 | READ_INT_VALUES(6) | 807 | READ_INT_VALUES(6) |
| 780 | ivec2 transferred = BitTransferSigned(v[1], v[0]); | 808 | ivec2 transferred = BitTransferSigned(v[1], v[0]); |
| 781 | v[1] = transferred[0]; | 809 | v[1] = transferred.x; |
| 782 | v[0] = transferred[1]; | 810 | v[0] = transferred.y; |
| 783 | transferred = BitTransferSigned(v[3], v[2]); | 811 | transferred = BitTransferSigned(v[3], v[2]); |
| 784 | v[3] = transferred[0]; | 812 | v[3] = transferred.x; |
| 785 | v[2] = transferred[1]; | 813 | v[2] = transferred.y; |
| 786 | transferred = BitTransferSigned(v[5], v[4]); | 814 | transferred = BitTransferSigned(v[5], v[4]); |
| 787 | v[5] = transferred[0]; | 815 | v[5] = transferred.x; |
| 788 | v[4] = transferred[1]; | 816 | v[4] = transferred.y; |
| 789 | if (v[1] + v[3] + v[5] >= 0) { | 817 | if ((v[1] + v[3] + v[5]) >= 0) { |
| 790 | ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); | 818 | ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); |
| 791 | ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 819 | ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 792 | } else { | 820 | } else { |
| 793 | ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 821 | ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 794 | ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); | 822 | ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); |
| 795 | } | 823 | } |
| 796 | } break; | 824 | break; |
| 797 | 825 | } | |
| 798 | case 10: { | 826 | case 10: { |
| 799 | READ_UINT_VALUES(6) | 827 | READ_UINT_VALUES(6) |
| 800 | ep1 = uvec4(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | 828 | ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |
| 801 | ep2 = uvec4(v[5], v[0], v[1], v[2]); | 829 | ep2 = uvec4(v[5], v[0], v[1], v[2]); |
| 802 | } break; | 830 | break; |
| 803 | 831 | } | |
| 804 | case 12: { | 832 | case 12: { |
| 805 | READ_UINT_VALUES(8) | 833 | READ_UINT_VALUES(8) |
| 806 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | 834 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |
| 807 | ep1 = uvec4(v[6], v[0], v[2], v[4]); | 835 | ep1 = uvec4(v[6], v[0], v[2], v[4]); |
| 808 | ep2 = uvec4(v[7], v[1], v[3], v[5]); | 836 | ep2 = uvec4(v[7], v[1], v[3], v[5]); |
| 809 | } else { | 837 | } else { |
| 810 | ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); | 838 | ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); |
| 811 | ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); | 839 | ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); |
| 812 | } | 840 | } |
| 813 | } break; | 841 | break; |
| 814 | 842 | } | |
| 815 | case 13: { | 843 | case 13: { |
| 816 | READ_INT_VALUES(8) | 844 | READ_INT_VALUES(8) |
| 817 | ivec2 transferred = BitTransferSigned(v[1], v[0]); | 845 | ivec2 transferred = BitTransferSigned(v[1], v[0]); |
| 818 | v[1] = transferred[0]; | 846 | v[1] = transferred.x; |
| 819 | v[0] = transferred[1]; | 847 | v[0] = transferred.y; |
| 820 | transferred = BitTransferSigned(v[3], v[2]); | 848 | transferred = BitTransferSigned(v[3], v[2]); |
| 821 | v[3] = transferred[0]; | 849 | v[3] = transferred.x; |
| 822 | v[2] = transferred[1]; | 850 | v[2] = transferred.y; |
| 823 | 851 | ||
| 824 | transferred = BitTransferSigned(v[5], v[4]); | 852 | transferred = BitTransferSigned(v[5], v[4]); |
| 825 | v[5] = transferred[0]; | 853 | v[5] = transferred.x; |
| 826 | v[4] = transferred[1]; | 854 | v[4] = transferred.y; |
| 827 | 855 | ||
| 828 | transferred = BitTransferSigned(v[7], v[6]); | 856 | transferred = BitTransferSigned(v[7], v[6]); |
| 829 | v[7] = transferred[0]; | 857 | v[7] = transferred.x; |
| 830 | v[6] = transferred[1]; | 858 | v[6] = transferred.y; |
| 831 | 859 | ||
| 832 | if (v[1] + v[3] + v[5] >= 0) { | 860 | if ((v[1] + v[3] + v[5]) >= 0) { |
| 833 | ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); | 861 | ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); |
| 834 | ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 862 | ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 835 | } else { | 863 | } else { |
| 836 | ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 864 | ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 837 | ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); | 865 | ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); |
| 838 | } | 866 | } |
| 839 | } break; | 867 | break; |
| 868 | } | ||
| 869 | default: { | ||
| 870 | // HDR mode, or more likely a bug computing the color_endpoint_mode | ||
| 871 | ep1 = uvec4(0xFF, 0xFF, 0, 0); | ||
| 872 | ep2 = uvec4(0xFF, 0xFF, 0, 0); | ||
| 873 | break; | ||
| 874 | } | ||
| 840 | } | 875 | } |
| 841 | #undef READ_UINT_VALUES | 876 | #undef READ_UINT_VALUES |
| 842 | #undef READ_INT_VALUES | 877 | #undef READ_INT_VALUES |
| @@ -849,52 +884,61 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 849 | uint B = 0, C = 0, D = 0; | 884 | uint B = 0, C = 0, D = 0; |
| 850 | uint result = 0; | 885 | uint result = 0; |
| 851 | switch (val.encoding) { | 886 | switch (val.encoding) { |
| 852 | case JustBits: | 887 | case JUST_BITS: |
| 853 | result = FastReplicateTo6(bitval, bitlen); | 888 | result = FastReplicateTo6(bitval, bitlen); |
| 854 | break; | 889 | break; |
| 855 | case Trit: { | 890 | case TRIT: { |
| 856 | D = val.quint_trit_value; | 891 | D = val.quint_trit_value; |
| 857 | switch (bitlen) { | 892 | switch (bitlen) { |
| 858 | case 0: { | 893 | case 0: { |
| 859 | uint results[3] = {0, 32, 63}; | 894 | uint results[3] = {0, 32, 63}; |
| 860 | result = results[D]; | 895 | result = results[D]; |
| 861 | } break; | 896 | break; |
| 897 | } | ||
| 862 | case 1: { | 898 | case 1: { |
| 863 | C = 50; | 899 | C = 50; |
| 864 | } break; | 900 | break; |
| 901 | } | ||
| 865 | case 2: { | 902 | case 2: { |
| 866 | C = 23; | 903 | C = 23; |
| 867 | uint b = (bitval >> 1) & 1; | 904 | uint b = (bitval >> 1) & 1; |
| 868 | B = (b << 6) | (b << 2) | b; | 905 | B = (b << 6) | (b << 2) | b; |
| 869 | } break; | 906 | break; |
| 907 | } | ||
| 870 | case 3: { | 908 | case 3: { |
| 871 | C = 11; | 909 | C = 11; |
| 872 | uint cb = (bitval >> 1) & 3; | 910 | uint cb = (bitval >> 1) & 3; |
| 873 | B = (cb << 5) | cb; | 911 | B = (cb << 5) | cb; |
| 874 | } break; | 912 | break; |
| 913 | } | ||
| 875 | default: | 914 | default: |
| 876 | break; | 915 | break; |
| 877 | } | 916 | } |
| 878 | } break; | 917 | break; |
| 879 | case Quint: { | 918 | } |
| 919 | case QUINT: { | ||
| 880 | D = val.quint_trit_value; | 920 | D = val.quint_trit_value; |
| 881 | switch (bitlen) { | 921 | switch (bitlen) { |
| 882 | case 0: { | 922 | case 0: { |
| 883 | uint results[5] = {0, 16, 32, 47, 63}; | 923 | uint results[5] = {0, 16, 32, 47, 63}; |
| 884 | result = results[D]; | 924 | result = results[D]; |
| 885 | } break; | 925 | break; |
| 926 | } | ||
| 886 | case 1: { | 927 | case 1: { |
| 887 | C = 28; | 928 | C = 28; |
| 888 | } break; | 929 | break; |
| 930 | } | ||
| 889 | case 2: { | 931 | case 2: { |
| 890 | C = 13; | 932 | C = 13; |
| 891 | uint b = (bitval >> 1) & 1; | 933 | uint b = (bitval >> 1) & 1; |
| 892 | B = (b << 6) | (b << 1); | 934 | B = (b << 6) | (b << 1); |
| 893 | } break; | 935 | break; |
| 894 | } | 936 | } |
| 895 | } break; | 937 | } |
| 938 | break; | ||
| 896 | } | 939 | } |
| 897 | if (val.encoding != JustBits && bitlen > 0) { | 940 | } |
| 941 | if (val.encoding != JUST_BITS && bitlen > 0) { | ||
| 898 | result = D * C + B; | 942 | result = D * C + B; |
| 899 | result ^= A; | 943 | result ^= A; |
| 900 | result = (A & 0x20) | (result >> 2); | 944 | result = (A & 0x20) | (result >> 2); |
| @@ -905,7 +949,7 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 905 | return result; | 949 | return result; |
| 906 | } | 950 | } |
| 907 | 951 | ||
| 908 | void UnquantizeTexelWeights(out uint outbuffer[2][144], bool dual_plane, uvec2 size) { | 952 | void UnquantizeTexelWeights(bool dual_plane, uvec2 size) { |
| 909 | uint weight_idx = 0; | 953 | uint weight_idx = 0; |
| 910 | uint unquantized[2][144]; | 954 | uint unquantized[2][144]; |
| 911 | uint area = size.x * size.y; | 955 | uint area = size.x * size.y; |
| @@ -921,11 +965,12 @@ void UnquantizeTexelWeights(out uint outbuffer[2][144], bool dual_plane, uvec2 s | |||
| 921 | if (++weight_idx >= (area)) | 965 | if (++weight_idx >= (area)) |
| 922 | break; | 966 | break; |
| 923 | } | 967 | } |
| 924 | uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); | 968 | |
| 925 | uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); | 969 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); |
| 926 | uint kPlaneScale = dual_plane ? 2 : 1; | 970 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); |
| 927 | for (uint plane = 0; plane < kPlaneScale; plane++) | 971 | const uint k_plane_scale = dual_plane ? 2 : 1; |
| 928 | for (uint t = 0; t < block_dims.y; t++) | 972 | for (uint plane = 0; plane < k_plane_scale; plane++) { |
| 973 | for (uint t = 0; t < block_dims.y; t++) { | ||
| 929 | for (uint s = 0; s < block_dims.x; s++) { | 974 | for (uint s = 0; s < block_dims.x; s++) { |
| 930 | uint cs = Ds * s; | 975 | uint cs = Ds * s; |
| 931 | uint ct = Dt * t; | 976 | uint ct = Dt * t; |
| @@ -955,8 +1000,10 @@ void UnquantizeTexelWeights(out uint outbuffer[2][144], bool dual_plane, uvec2 s | |||
| 955 | if ((v0 + size.x + 1) < (area)) { | 1000 | if ((v0 + size.x + 1) < (area)) { |
| 956 | p.w = unquantized[plane][(v0 + size.x + 1)]; | 1001 | p.w = unquantized[plane][(v0 + size.x + 1)]; |
| 957 | } | 1002 | } |
| 958 | outbuffer[plane][t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4; | 1003 | unquantized_texel_weights[plane][t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4; |
| 959 | } | 1004 | } |
| 1005 | } | ||
| 1006 | } | ||
| 960 | } | 1007 | } |
| 961 | 1008 | ||
| 962 | int FindLayout(uint mode) { | 1009 | int FindLayout(uint mode) { |
| @@ -991,25 +1038,25 @@ int FindLayout(uint mode) { | |||
| 991 | } | 1038 | } |
| 992 | 1039 | ||
| 993 | TexelWeightParams DecodeBlockInfo(uint block_index) { | 1040 | TexelWeightParams DecodeBlockInfo(uint block_index) { |
| 994 | TexelWeightParams params = TexelWeightParams(uvec2(0), false, 0, false, false, false); | 1041 | TexelWeightParams params = TexelWeightParams(uvec2(0), 0, false, false, false, false); |
| 995 | uint mode = StreamBits(11); | 1042 | uint mode = StreamBits(11); |
| 996 | if ((mode & 0x1ff) == 0x1fc) { | 1043 | if ((mode & 0x1ff) == 0x1fc) { |
| 997 | if ((mode & 0x200) != 0) { | 1044 | if ((mode & 0x200) != 0) { |
| 998 | params.VoidExtentHDR = true; | 1045 | params.void_extent_hdr = true; |
| 999 | } else { | 1046 | } else { |
| 1000 | params.VoidExtentLDR = true; | 1047 | params.void_extent_ldr = true; |
| 1001 | } | 1048 | } |
| 1002 | if ((mode & 0x400) == 0 || StreamBits(1) == 0) { | 1049 | if ((mode & 0x400) == 0 || StreamBits(1) == 0) { |
| 1003 | params.Error = true; | 1050 | params.error_state = true; |
| 1004 | } | 1051 | } |
| 1005 | return params; | 1052 | return params; |
| 1006 | } | 1053 | } |
| 1007 | if ((mode & 0xf) == 0) { | 1054 | if ((mode & 0xf) == 0) { |
| 1008 | params.Error = true; | 1055 | params.error_state = true; |
| 1009 | return params; | 1056 | return params; |
| 1010 | } | 1057 | } |
| 1011 | if ((mode & 3) == 0 && (mode & 0x1c0) == 0x1c0) { | 1058 | if ((mode & 3) == 0 && (mode & 0x1c0) == 0x1c0) { |
| 1012 | params.Error = true; | 1059 | params.error_state = true; |
| 1013 | return params; | 1060 | return params; |
| 1014 | } | 1061 | } |
| 1015 | uint A, B; | 1062 | uint A, B; |
| @@ -1060,7 +1107,7 @@ TexelWeightParams DecodeBlockInfo(uint block_index) { | |||
| 1060 | params.size = uvec2(A + 6, B + 6); | 1107 | params.size = uvec2(A + 6, B + 6); |
| 1061 | break; | 1108 | break; |
| 1062 | default: | 1109 | default: |
| 1063 | params.Error = true; | 1110 | params.error_state = true; |
| 1064 | break; | 1111 | break; |
| 1065 | } | 1112 | } |
| 1066 | params.dual_plane = (mode_layout != 9) && ((mode & 0x400) != 0); | 1113 | params.dual_plane = (mode_layout != 9) && ((mode & 0x400) != 0); |
| @@ -1089,11 +1136,8 @@ void FillError(ivec3 coord) { | |||
| 1089 | } | 1136 | } |
| 1090 | } | 1137 | } |
| 1091 | 1138 | ||
| 1092 | void FillVoidExtentLDR(ivec3 coord, uint block_index) { | 1139 | void FillVoidExtentLDR(ivec3 coord) { |
| 1093 | for (int i = 0; i < 4; i++) { | 1140 | StreamBits(52); |
| 1094 | StreamBits(13); | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | uint r_u = StreamBits(16); | 1141 | uint r_u = StreamBits(16); |
| 1098 | uint g_u = StreamBits(16); | 1142 | uint g_u = StreamBits(16); |
| 1099 | uint b_u = StreamBits(16); | 1143 | uint b_u = StreamBits(16); |
| @@ -1110,21 +1154,20 @@ void FillVoidExtentLDR(ivec3 coord, uint block_index) { | |||
| 1110 | } | 1154 | } |
| 1111 | 1155 | ||
| 1112 | void DecompressBlock(ivec3 coord, uint block_index) { | 1156 | void DecompressBlock(ivec3 coord, uint block_index) { |
| 1113 | TexelWeightParams params; | 1157 | TexelWeightParams params = DecodeBlockInfo(block_index); |
| 1114 | params = DecodeBlockInfo(block_index); | 1158 | if (params.error_state) { |
| 1115 | if (params.Error) { | ||
| 1116 | FillError(coord); | 1159 | FillError(coord); |
| 1117 | return; | 1160 | return; |
| 1118 | } | 1161 | } |
| 1119 | if (params.VoidExtentHDR) { | 1162 | if (params.void_extent_hdr) { |
| 1120 | FillError(coord); | 1163 | FillError(coord); |
| 1121 | return; | 1164 | return; |
| 1122 | } | 1165 | } |
| 1123 | if (params.VoidExtentLDR) { | 1166 | if (params.void_extent_ldr) { |
| 1124 | FillVoidExtentLDR(coord, block_index); | 1167 | FillVoidExtentLDR(coord); |
| 1125 | return; | 1168 | return; |
| 1126 | } | 1169 | } |
| 1127 | if (params.size.x > block_dims.x || params.size.y > block_dims.y) { | 1170 | if ((params.size.x > block_dims.x) || (params.size.y > block_dims.y)) { |
| 1128 | FillError(coord); | 1171 | FillError(coord); |
| 1129 | return; | 1172 | return; |
| 1130 | } | 1173 | } |
| @@ -1139,7 +1182,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1139 | uint ced_pointer = 0; | 1182 | uint ced_pointer = 0; |
| 1140 | uint base_cem = 0; | 1183 | uint base_cem = 0; |
| 1141 | if (num_partitions == 1) { | 1184 | if (num_partitions == 1) { |
| 1142 | color_endpoint_mode[0] = StreamBits(4); | 1185 | color_endpoint_mode.x = StreamBits(4); |
| 1143 | partition_index = 0; | 1186 | partition_index = 0; |
| 1144 | } else { | 1187 | } else { |
| 1145 | partition_index = StreamBits(10); | 1188 | partition_index = StreamBits(10); |
| @@ -1181,7 +1224,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1181 | int nb = int(min(remaining_bits, 8U)); | 1224 | int nb = int(min(remaining_bits, 8U)); |
| 1182 | uint b = StreamBits(nb); | 1225 | uint b = StreamBits(nb); |
| 1183 | color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb)); | 1226 | color_endpoint_data[ced_pointer] = uint(bitfieldExtract(b, 0, nb)); |
| 1184 | ced_pointer++; | 1227 | ++ced_pointer; |
| 1185 | remaining_bits -= nb; | 1228 | remaining_bits -= nb; |
| 1186 | } | 1229 | } |
| 1187 | plane_index = int(StreamBits(plane_selector_bits)); | 1230 | plane_index = int(StreamBits(plane_selector_bits)); |
| @@ -1189,20 +1232,20 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1189 | uint extra_cem = StreamBits(extra_cem_bits); | 1232 | uint extra_cem = StreamBits(extra_cem_bits); |
| 1190 | uint cem = (extra_cem << 6) | base_cem; | 1233 | uint cem = (extra_cem << 6) | base_cem; |
| 1191 | cem >>= 2; | 1234 | cem >>= 2; |
| 1192 | uint C[4] = {0, 0, 0, 0}; | 1235 | uvec4 C = uvec4(0); |
| 1193 | for (uint i = 0; i < num_partitions; i++) { | 1236 | for (uint i = 0; i < num_partitions; i++) { |
| 1194 | C[i] = cem & 1; | 1237 | C[i] = (cem & 1); |
| 1195 | cem >>= 1; | 1238 | cem >>= 1; |
| 1196 | } | 1239 | } |
| 1197 | uint M[4] = {0, 0, 0, 0}; | 1240 | uvec4 M = uvec4(0); |
| 1198 | for (uint i = 0; i < num_partitions; i++) { | 1241 | for (uint i = 0; i < num_partitions; i++) { |
| 1199 | M[i] = cem & 3; | 1242 | M[i] = cem & 3; |
| 1200 | cem >>= 2; | 1243 | cem >>= 2; |
| 1201 | } | 1244 | } |
| 1202 | for (uint i = 0; i < num_partitions; i++) { | 1245 | for (uint i = 0; i < num_partitions; i++) { |
| 1203 | color_endpoint_mode[i] = base_mode; | 1246 | color_endpoint_mode[i] = base_mode; |
| 1204 | if ((C[i]) == 0) { | 1247 | if (C[i] == 0) { |
| 1205 | color_endpoint_mode[i] -= 1; | 1248 | --color_endpoint_mode[i]; |
| 1206 | } | 1249 | } |
| 1207 | color_endpoint_mode[i] <<= 2; | 1250 | color_endpoint_mode[i] <<= 2; |
| 1208 | color_endpoint_mode[i] |= M[i]; | 1251 | color_endpoint_mode[i] |= M[i]; |
| @@ -1213,13 +1256,13 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1213 | color_endpoint_mode[i] = cem; | 1256 | color_endpoint_mode[i] = cem; |
| 1214 | } | 1257 | } |
| 1215 | } | 1258 | } |
| 1259 | DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); | ||
| 1216 | 1260 | ||
| 1217 | uint color_values[32]; // Four values, two endpoints, four maximum paritions | ||
| 1218 | DecodeColorValues(color_values, color_endpoint_mode, num_partitions, color_data_bits); | ||
| 1219 | uvec4 endpoints[4][2]; | 1261 | uvec4 endpoints[4][2]; |
| 1220 | for (uint i = 0; i < num_partitions; i++) { | 1262 | for (uint i = 0; i < num_partitions; i++) { |
| 1221 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_values, color_endpoint_mode[i]); | 1263 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); |
| 1222 | } | 1264 | } |
| 1265 | |||
| 1223 | for (uint i = 0; i < 16; i++) { | 1266 | for (uint i = 0; i < 16; i++) { |
| 1224 | texel_weight_data[i] = local_buff[i]; | 1267 | texel_weight_data[i] = local_buff[i]; |
| 1225 | } | 1268 | } |
| @@ -1238,12 +1281,13 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1238 | uint( | 1281 | uint( |
| 1239 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); | 1282 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); |
| 1240 | for (uint i = 0; i < 16 - clear_byte_start; i++) { | 1283 | for (uint i = 0; i < 16 - clear_byte_start; i++) { |
| 1241 | texel_weight_data[clear_byte_start + i] = uint(0U); | 1284 | texel_weight_data[clear_byte_start + i] = 0U; |
| 1242 | } | 1285 | } |
| 1243 | texel_flag = true; // use texel "vector" and bit stream in integer decoding | 1286 | texel_flag = true; // use texel "vector" and bit stream in integer decoding |
| 1244 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); | 1287 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); |
| 1245 | uint weights[2][144]; | 1288 | |
| 1246 | UnquantizeTexelWeights(weights, params.dual_plane, params.size); | 1289 | UnquantizeTexelWeights(params.dual_plane, params.size); |
| 1290 | |||
| 1247 | for (uint j = 0; j < block_dims.y; j++) { | 1291 | for (uint j = 0; j < block_dims.y; j++) { |
| 1248 | for (uint i = 0; i < block_dims.x; i++) { | 1292 | for (uint i = 0; i < block_dims.x; i++) { |
| 1249 | uint local_partition = Select2DPartition(partition_index, i, j, num_partitions, | 1293 | uint local_partition = Select2DPartition(partition_index, i, j, num_partitions, |
| @@ -1257,9 +1301,9 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1257 | if (params.dual_plane && (((plane_index + 1) & 3) == c)) { | 1301 | if (params.dual_plane && (((plane_index + 1) & 3) == c)) { |
| 1258 | plane_vec[c] = 1; | 1302 | plane_vec[c] = 1; |
| 1259 | } | 1303 | } |
| 1260 | weight_vec[c] = weights[plane_vec[c]][j * block_dims.x + i]; | 1304 | weight_vec[c] = unquantized_texel_weights[plane_vec[c]][j * block_dims.x + i]; |
| 1261 | } | 1305 | } |
| 1262 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6); | 1306 | vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); |
| 1263 | p = (Cf / 65535.0); | 1307 | p = (Cf / 65535.0); |
| 1264 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); | 1308 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); |
| 1265 | } | 1309 | } |
| @@ -1267,7 +1311,7 @@ void DecompressBlock(ivec3 coord, uint block_index) { | |||
| 1267 | } | 1311 | } |
| 1268 | 1312 | ||
| 1269 | void main() { | 1313 | void main() { |
| 1270 | uvec3 pos = gl_GlobalInvocationID + origin; | 1314 | uvec3 pos = gl_GlobalInvocationID; |
| 1271 | pos.x <<= bytes_per_block_log2; | 1315 | pos.x <<= bytes_per_block_log2; |
| 1272 | 1316 | ||
| 1273 | // Read as soon as possible due to its latency | 1317 | // Read as soon as possible due to its latency |
| @@ -1282,9 +1326,10 @@ void main() { | |||
| 1282 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; | 1326 | offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 1283 | offset += swizzle; | 1327 | offset += swizzle; |
| 1284 | 1328 | ||
| 1285 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1.0)); | 1329 | const ivec3 coord = ivec3(gl_GlobalInvocationID * uvec3(block_dims, 1)); |
| 1286 | uint block_index = | 1330 | uint block_index = |
| 1287 | pos.z * num_image_blocks.x * num_image_blocks.y + pos.y * num_image_blocks.x + pos.x; | 1331 | pos.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + pos.y * gl_WorkGroupSize.x + pos.x; |
| 1332 | |||
| 1288 | current_index = 0; | 1333 | current_index = 0; |
| 1289 | bitsread = 0; | 1334 | bitsread = 0; |
| 1290 | for (int i = 0; i < 16; i++) { | 1335 | for (int i = 0; i < 16; i++) { |