summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-07-30 13:04:41 -0400
committerGravatar Ameer J2023-08-06 14:54:57 -0400
commit0078e5a33822d0e15cc7fab2809e5bc4883cff26 (patch)
treef487d8954b16e585be0c3d5c3560d73f93f00bf6 /src
parentEncodingData pack (diff)
downloadyuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.gz
yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.xz
yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.zip
reuse vectors memory
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp50
1 files changed, 17 insertions, 33 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 37b502324..4277b0756 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -154,19 +154,10 @@ int color_bitsread = 0;
154uint color_values[32]; 154uint color_values[32];
155int colvals_index = 0; 155int colvals_index = 0;
156 156
157// Weight data globals
158uvec4 texel_weight_data;
159int texel_bitsread = 0;
160
161bool texel_flag = false;
162
163// Global "vectors" to be pushed into when decoding 157// Global "vectors" to be pushed into when decoding
164EncodingData result_vector[144]; 158EncodingData result_vector[144];
165int result_index = 0; 159int result_index = 0;
166 160
167EncodingData texel_vector[144];
168int texel_vector_index = 0;
169
170// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] 161// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
171// is the same as [(num_bits - 1):0] and repeats all the way down. 162// is the same as [(num_bits - 1):0] and repeats all the way down.
172uint Replicate(uint val, uint num_bits, uint to_bit) { 163uint Replicate(uint val, uint num_bits, uint to_bit) {
@@ -382,26 +373,15 @@ void SkipBits(uint num_bits) {
382} 373}
383 374
384uint StreamColorBits(uint num_bits) { 375uint StreamColorBits(uint num_bits) {
385 uint ret = 0; 376 const int int_bits = int(num_bits);
386 int int_bits = int(num_bits); 377 const uint ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
387 if (texel_flag) { 378 color_bitsread += int_bits;
388 ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits);
389 texel_bitsread += int_bits;
390 } else {
391 ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits);
392 color_bitsread += int_bits;
393 }
394 return ret; 379 return ret;
395} 380}
396 381
397void ResultEmplaceBack(EncodingData val) { 382void ResultEmplaceBack(EncodingData val) {
398 if (texel_flag) { 383 result_vector[result_index] = val;
399 texel_vector[texel_vector_index] = val; 384 ++result_index;
400 ++texel_vector_index;
401 } else {
402 result_vector[result_index] = val;
403 ++result_index;
404 }
405} 385}
406 386
407// Returns the number of bits required to encode n_vals values. 387// Returns the number of bits required to encode n_vals values.
@@ -910,7 +890,7 @@ void UnquantizeTexelWeights(bool is_dual_plane, uvec2 size, out uint unquantized
910 const uint loop_count = min(result_index, area * num_planes); 890 const uint loop_count = min(result_index, area * num_planes);
911 uint unquantized[2 * 144]; 891 uint unquantized[2 * 144];
912 for (uint itr = 0; itr < loop_count; ++itr) { 892 for (uint itr = 0; itr < loop_count; ++itr) {
913 unquantized[itr] = UnquantizeTexelWeight(texel_vector[itr]); 893 unquantized[itr] = UnquantizeTexelWeight(result_vector[itr]);
914 } 894 }
915 for (uint plane = 0; plane < num_planes; ++plane) { 895 for (uint plane = 0; plane < num_planes; ++plane) {
916 for (uint t = 0; t < block_dims.y; t++) { 896 for (uint t = 0; t < block_dims.y; t++) {
@@ -1215,22 +1195,26 @@ void DecompressBlock(ivec3 coord) {
1215 ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); 1195 ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]);
1216 } 1196 }
1217 1197
1218 texel_weight_data = local_buff; 1198 color_endpoint_data = local_buff;
1219 texel_weight_data = bitfieldReverse(texel_weight_data).wzyx; 1199 color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
1220 uint clear_byte_start = 1200 uint clear_byte_start =
1221 (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; 1201 (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1;
1222 1202
1223 uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) & 1203 uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) &
1224 uint( 1204 uint(
1225 ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); 1205 ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1));
1226 uint vec_index = (clear_byte_start - 1) >> 2; 1206 uint vec_index = (clear_byte_start - 1) >> 2;
1227 texel_weight_data[vec_index] = 1207 color_endpoint_data[vec_index] =
1228 bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); 1208 bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8);
1229 for (uint i = clear_byte_start; i < 16; ++i) { 1209 for (uint i = clear_byte_start; i < 16; ++i) {
1230 uint idx = i >> 2; 1210 uint idx = i >> 2;
1231 texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8); 1211 color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i % 4) * 8, 8);
1232 } 1212 }
1233 texel_flag = true; // use texel "vector" and bit stream in integer decoding 1213
1214 // Re-init vector variables for next decode phase
1215 result_index = 0;
1216 color_bitsread = 0;
1217
1234 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); 1218 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
1235 1219
1236 uint unquantized_texel_weights[2 * 144]; 1220 uint unquantized_texel_weights[2 * 144];