summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-08-06 13:32:35 -0400
committerGravatar Ameer J2023-08-06 14:54:58 -0400
commit913803bf653989d2d08ddd39caf2216b55275dfc (patch)
treefe16d174b54119a593b43c01f89c8ff1d4e1f3cf /src
parentminor (diff)
downloadyuzu-913803bf653989d2d08ddd39caf2216b55275dfc.tar.gz
yuzu-913803bf653989d2d08ddd39caf2216b55275dfc.tar.xz
yuzu-913803bf653989d2d08ddd39caf2216b55275dfc.zip
Compute Replicate
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp105
1 files changed, 20 insertions, 85 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 9d9532a98..5e922d1fe 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -140,98 +140,33 @@ uint ReplicateBitTo9(uint value) {
140 return value * 511; 140 return value * 511;
141} 141}
142 142
143uint FastReplicateTo8(uint value, uint num_bits) { 143uint ReplicateBits(uint value, uint num_bits, uint to_bit) {
144 if (value == 0) { 144 if (value == 0 || num_bits == 0) {
145 return 0; 145 return 0;
146 } 146 }
147 const uint array_index = value / 4; 147 if (num_bits >= to_bit) {
148 const uint vector_index = bitfieldExtract(value, 0, 2); 148 return value;
149 switch (num_bits) {
150 case 1:
151 return 255;
152 case 2: {
153 const uvec4 REPLICATE_2_BIT_TO_8_TABLE = (uvec4(0, 85, 170, 255));
154 return REPLICATE_2_BIT_TO_8_TABLE[vector_index];
155 }
156 case 3: {
157 const uvec4 REPLICATE_3_BIT_TO_8_TABLE[2] =
158 uvec4[](uvec4(0, 36, 73, 109), uvec4(146, 182, 219, 255));
159 return REPLICATE_3_BIT_TO_8_TABLE[array_index][vector_index];
160 } 149 }
161 case 4: { 150 const uint v = value & uint((1 << num_bits) - 1);
162 const uvec4 REPLICATE_4_BIT_TO_8_TABLE[4] = 151 uint res = v;
163 uvec4[](uvec4(0, 17, 34, 51), uvec4(68, 85, 102, 119), uvec4(136, 153, 170, 187), 152 uint reslen = num_bits;
164 uvec4(204, 221, 238, 255)); 153 while (reslen < to_bit) {
165 return REPLICATE_4_BIT_TO_8_TABLE[array_index][vector_index]; 154 const uint num_dst_bits_to_shift_up = min(num_bits, to_bit - reslen);
166 } 155 const uint num_src_bits_to_shift_down = num_bits - num_dst_bits_to_shift_up;
167 case 5: { 156
168 const uvec4 REPLICATE_5_BIT_TO_8_TABLE[8] = 157 res <<= num_dst_bits_to_shift_up;
169 uvec4[](uvec4(0, 8, 16, 24), uvec4(33, 41, 49, 57), uvec4(66, 74, 82, 90), 158 res |= (v >> num_src_bits_to_shift_down);
170 uvec4(99, 107, 115, 123), uvec4(132, 140, 148, 156), uvec4(165, 173, 181, 189), 159 reslen += num_bits;
171 uvec4(198, 206, 214, 222), uvec4(231, 239, 247, 255));
172 return REPLICATE_5_BIT_TO_8_TABLE[array_index][vector_index];
173 } 160 }
174 case 6: { 161 return res;
175 const uvec4 REPLICATE_6_BIT_TO_8_TABLE[16] = uvec4[]( 162}
176 uvec4(0, 4, 8, 12), uvec4(16, 20, 24, 28), uvec4(32, 36, 40, 44), uvec4(48, 52, 56, 60), 163
177 uvec4(65, 69, 73, 77), uvec4(81, 85, 89, 93), uvec4(97, 101, 105, 109), 164uint FastReplicateTo8(uint value, uint num_bits) {
178 uvec4(113, 117, 121, 125), uvec4(130, 134, 138, 142), uvec4(146, 150, 154, 158), 165 return ReplicateBits(value, num_bits, 8);
179 uvec4(162, 166, 170, 174), uvec4(178, 182, 186, 190), uvec4(195, 199, 203, 207),
180 uvec4(211, 215, 219, 223), uvec4(227, 231, 235, 239), uvec4(243, 247, 251, 255));
181 return REPLICATE_6_BIT_TO_8_TABLE[array_index][vector_index];
182 }
183 case 7: {
184 const uvec4 REPLICATE_7_BIT_TO_8_TABLE[32] =
185 uvec4[](uvec4(0, 2, 4, 6), uvec4(8, 10, 12, 14), uvec4(16, 18, 20, 22),
186 uvec4(24, 26, 28, 30), uvec4(32, 34, 36, 38), uvec4(40, 42, 44, 46),
187 uvec4(48, 50, 52, 54), uvec4(56, 58, 60, 62), uvec4(64, 66, 68, 70),
188 uvec4(72, 74, 76, 78), uvec4(80, 82, 84, 86), uvec4(88, 90, 92, 94),
189 uvec4(96, 98, 100, 102), uvec4(104, 106, 108, 110), uvec4(112, 114, 116, 118),
190 uvec4(120, 122, 124, 126), uvec4(129, 131, 133, 135), uvec4(137, 139, 141, 143),
191 uvec4(145, 147, 149, 151), uvec4(153, 155, 157, 159), uvec4(161, 163, 165, 167),
192 uvec4(169, 171, 173, 175), uvec4(177, 179, 181, 183), uvec4(185, 187, 189, 191),
193 uvec4(193, 195, 197, 199), uvec4(201, 203, 205, 207), uvec4(209, 211, 213, 215),
194 uvec4(217, 219, 221, 223), uvec4(225, 227, 229, 231), uvec4(233, 235, 237, 239),
195 uvec4(241, 243, 245, 247), uvec4(249, 251, 253, 255));
196 return REPLICATE_7_BIT_TO_8_TABLE[array_index][vector_index];
197 }
198 }
199 return value;
200} 166}
201 167
202uint FastReplicateTo6(uint value, uint num_bits) { 168uint FastReplicateTo6(uint value, uint num_bits) {
203 if (value == 0) { 169 return ReplicateBits(value, num_bits, 6);
204 return 0;
205 }
206 const uint array_index = value / 4;
207 const uint vector_index = bitfieldExtract(value, 0, 2);
208 switch (num_bits) {
209 case 1:
210 return 63;
211 case 2: {
212 const uvec4 REPLICATE_2_BIT_TO_6_TABLE = uvec4(0, 21, 42, 63);
213 return REPLICATE_2_BIT_TO_6_TABLE[vector_index];
214 }
215 case 3: {
216 const uvec4 REPLICATE_3_BIT_TO_6_TABLE[2] =
217 uvec4[](uvec4(0, 9, 18, 27), uvec4(36, 45, 54, 63));
218 return REPLICATE_3_BIT_TO_6_TABLE[array_index][vector_index];
219 }
220 case 4: {
221 const uvec4 REPLICATE_4_BIT_TO_6_TABLE[4] =
222 uvec4[](uvec4(0, 4, 8, 12), uvec4(17, 21, 25, 29), uvec4(34, 38, 42, 46),
223 uvec4(51, 55, 59, 63));
224 return REPLICATE_4_BIT_TO_6_TABLE[array_index][vector_index];
225 }
226 case 5: {
227 const uvec4 REPLICATE_5_BIT_TO_6_TABLE[8] =
228 uvec4[](uvec4(0, 2, 4, 6), uvec4(8, 10, 12, 14), uvec4(16, 18, 20, 22),
229 uvec4(24, 26, 28, 30), uvec4(33, 35, 37, 39), uvec4(41, 43, 45, 47),
230 uvec4(49, 51, 53, 55), uvec4(57, 59, 61, 63));
231 return REPLICATE_5_BIT_TO_6_TABLE[array_index][vector_index];
232 }
233 }
234 return value;
235} 170}
236 171
237uint Div3Floor(uint v) { 172uint Div3Floor(uint v) {