diff options
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 105 |
1 files changed, 20 insertions, 85 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 9d9532a98..5e922d1fe 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -140,98 +140,33 @@ uint ReplicateBitTo9(uint value) { | |||
| 140 | return value * 511; | 140 | return value * 511; |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | uint FastReplicateTo8(uint value, uint num_bits) { | 143 | uint ReplicateBits(uint value, uint num_bits, uint to_bit) { |
| 144 | if (value == 0) { | 144 | if (value == 0 || num_bits == 0) { |
| 145 | return 0; | 145 | return 0; |
| 146 | } | 146 | } |
| 147 | const uint array_index = value / 4; | 147 | if (num_bits >= to_bit) { |
| 148 | const uint vector_index = bitfieldExtract(value, 0, 2); | 148 | return value; |
| 149 | switch (num_bits) { | ||
| 150 | case 1: | ||
| 151 | return 255; | ||
| 152 | case 2: { | ||
| 153 | const uvec4 REPLICATE_2_BIT_TO_8_TABLE = (uvec4(0, 85, 170, 255)); | ||
| 154 | return REPLICATE_2_BIT_TO_8_TABLE[vector_index]; | ||
| 155 | } | ||
| 156 | case 3: { | ||
| 157 | const uvec4 REPLICATE_3_BIT_TO_8_TABLE[2] = | ||
| 158 | uvec4[](uvec4(0, 36, 73, 109), uvec4(146, 182, 219, 255)); | ||
| 159 | return REPLICATE_3_BIT_TO_8_TABLE[array_index][vector_index]; | ||
| 160 | } | 149 | } |
| 161 | case 4: { | 150 | const uint v = value & uint((1 << num_bits) - 1); |
| 162 | const uvec4 REPLICATE_4_BIT_TO_8_TABLE[4] = | 151 | uint res = v; |
| 163 | uvec4[](uvec4(0, 17, 34, 51), uvec4(68, 85, 102, 119), uvec4(136, 153, 170, 187), | 152 | uint reslen = num_bits; |
| 164 | uvec4(204, 221, 238, 255)); | 153 | while (reslen < to_bit) { |
| 165 | return REPLICATE_4_BIT_TO_8_TABLE[array_index][vector_index]; | 154 | const uint num_dst_bits_to_shift_up = min(num_bits, to_bit - reslen); |
| 166 | } | 155 | const uint num_src_bits_to_shift_down = num_bits - num_dst_bits_to_shift_up; |
| 167 | case 5: { | 156 | |
| 168 | const uvec4 REPLICATE_5_BIT_TO_8_TABLE[8] = | 157 | res <<= num_dst_bits_to_shift_up; |
| 169 | uvec4[](uvec4(0, 8, 16, 24), uvec4(33, 41, 49, 57), uvec4(66, 74, 82, 90), | 158 | res |= (v >> num_src_bits_to_shift_down); |
| 170 | uvec4(99, 107, 115, 123), uvec4(132, 140, 148, 156), uvec4(165, 173, 181, 189), | 159 | reslen += num_bits; |
| 171 | uvec4(198, 206, 214, 222), uvec4(231, 239, 247, 255)); | ||
| 172 | return REPLICATE_5_BIT_TO_8_TABLE[array_index][vector_index]; | ||
| 173 | } | 160 | } |
| 174 | case 6: { | 161 | return res; |
| 175 | const uvec4 REPLICATE_6_BIT_TO_8_TABLE[16] = uvec4[]( | 162 | } |
| 176 | uvec4(0, 4, 8, 12), uvec4(16, 20, 24, 28), uvec4(32, 36, 40, 44), uvec4(48, 52, 56, 60), | 163 | |
| 177 | uvec4(65, 69, 73, 77), uvec4(81, 85, 89, 93), uvec4(97, 101, 105, 109), | 164 | uint FastReplicateTo8(uint value, uint num_bits) { |
| 178 | uvec4(113, 117, 121, 125), uvec4(130, 134, 138, 142), uvec4(146, 150, 154, 158), | 165 | return ReplicateBits(value, num_bits, 8); |
| 179 | uvec4(162, 166, 170, 174), uvec4(178, 182, 186, 190), uvec4(195, 199, 203, 207), | ||
| 180 | uvec4(211, 215, 219, 223), uvec4(227, 231, 235, 239), uvec4(243, 247, 251, 255)); | ||
| 181 | return REPLICATE_6_BIT_TO_8_TABLE[array_index][vector_index]; | ||
| 182 | } | ||
| 183 | case 7: { | ||
| 184 | const uvec4 REPLICATE_7_BIT_TO_8_TABLE[32] = | ||
| 185 | uvec4[](uvec4(0, 2, 4, 6), uvec4(8, 10, 12, 14), uvec4(16, 18, 20, 22), | ||
| 186 | uvec4(24, 26, 28, 30), uvec4(32, 34, 36, 38), uvec4(40, 42, 44, 46), | ||
| 187 | uvec4(48, 50, 52, 54), uvec4(56, 58, 60, 62), uvec4(64, 66, 68, 70), | ||
| 188 | uvec4(72, 74, 76, 78), uvec4(80, 82, 84, 86), uvec4(88, 90, 92, 94), | ||
| 189 | uvec4(96, 98, 100, 102), uvec4(104, 106, 108, 110), uvec4(112, 114, 116, 118), | ||
| 190 | uvec4(120, 122, 124, 126), uvec4(129, 131, 133, 135), uvec4(137, 139, 141, 143), | ||
| 191 | uvec4(145, 147, 149, 151), uvec4(153, 155, 157, 159), uvec4(161, 163, 165, 167), | ||
| 192 | uvec4(169, 171, 173, 175), uvec4(177, 179, 181, 183), uvec4(185, 187, 189, 191), | ||
| 193 | uvec4(193, 195, 197, 199), uvec4(201, 203, 205, 207), uvec4(209, 211, 213, 215), | ||
| 194 | uvec4(217, 219, 221, 223), uvec4(225, 227, 229, 231), uvec4(233, 235, 237, 239), | ||
| 195 | uvec4(241, 243, 245, 247), uvec4(249, 251, 253, 255)); | ||
| 196 | return REPLICATE_7_BIT_TO_8_TABLE[array_index][vector_index]; | ||
| 197 | } | ||
| 198 | } | ||
| 199 | return value; | ||
| 200 | } | 166 | } |
| 201 | 167 | ||
| 202 | uint FastReplicateTo6(uint value, uint num_bits) { | 168 | uint FastReplicateTo6(uint value, uint num_bits) { |
| 203 | if (value == 0) { | 169 | return ReplicateBits(value, num_bits, 6); |
| 204 | return 0; | ||
| 205 | } | ||
| 206 | const uint array_index = value / 4; | ||
| 207 | const uint vector_index = bitfieldExtract(value, 0, 2); | ||
| 208 | switch (num_bits) { | ||
| 209 | case 1: | ||
| 210 | return 63; | ||
| 211 | case 2: { | ||
| 212 | const uvec4 REPLICATE_2_BIT_TO_6_TABLE = uvec4(0, 21, 42, 63); | ||
| 213 | return REPLICATE_2_BIT_TO_6_TABLE[vector_index]; | ||
| 214 | } | ||
| 215 | case 3: { | ||
| 216 | const uvec4 REPLICATE_3_BIT_TO_6_TABLE[2] = | ||
| 217 | uvec4[](uvec4(0, 9, 18, 27), uvec4(36, 45, 54, 63)); | ||
| 218 | return REPLICATE_3_BIT_TO_6_TABLE[array_index][vector_index]; | ||
| 219 | } | ||
| 220 | case 4: { | ||
| 221 | const uvec4 REPLICATE_4_BIT_TO_6_TABLE[4] = | ||
| 222 | uvec4[](uvec4(0, 4, 8, 12), uvec4(17, 21, 25, 29), uvec4(34, 38, 42, 46), | ||
| 223 | uvec4(51, 55, 59, 63)); | ||
| 224 | return REPLICATE_4_BIT_TO_6_TABLE[array_index][vector_index]; | ||
| 225 | } | ||
| 226 | case 5: { | ||
| 227 | const uvec4 REPLICATE_5_BIT_TO_6_TABLE[8] = | ||
| 228 | uvec4[](uvec4(0, 2, 4, 6), uvec4(8, 10, 12, 14), uvec4(16, 18, 20, 22), | ||
| 229 | uvec4(24, 26, 28, 30), uvec4(33, 35, 37, 39), uvec4(41, 43, 45, 47), | ||
| 230 | uvec4(49, 51, 53, 55), uvec4(57, 59, 61, 63)); | ||
| 231 | return REPLICATE_5_BIT_TO_6_TABLE[array_index][vector_index]; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | return value; | ||
| 235 | } | 170 | } |
| 236 | 171 | ||
| 237 | uint Div3Floor(uint v) { | 172 | uint Div3Floor(uint v) { |