diff options
| author | 2023-07-30 13:04:41 -0400 | |
|---|---|---|
| committer | 2023-08-06 14:54:57 -0400 | |
| commit | 0078e5a33822d0e15cc7fab2809e5bc4883cff26 (patch) | |
| tree | f487d8954b16e585be0c3d5c3560d73f93f00bf6 /src | |
| parent | EncodingData pack (diff) | |
| download | yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.gz yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.tar.xz yuzu-0078e5a33822d0e15cc7fab2809e5bc4883cff26.zip | |
reuse vectors memory
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 50 |
1 files changed, 17 insertions, 33 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 37b502324..4277b0756 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -154,19 +154,10 @@ int color_bitsread = 0; | |||
| 154 | uint color_values[32]; | 154 | uint color_values[32]; |
| 155 | int colvals_index = 0; | 155 | int colvals_index = 0; |
| 156 | 156 | ||
| 157 | // Weight data globals | ||
| 158 | uvec4 texel_weight_data; | ||
| 159 | int texel_bitsread = 0; | ||
| 160 | |||
| 161 | bool texel_flag = false; | ||
| 162 | |||
| 163 | // Global "vectors" to be pushed into when decoding | 157 | // Global "vectors" to be pushed into when decoding |
| 164 | EncodingData result_vector[144]; | 158 | EncodingData result_vector[144]; |
| 165 | int result_index = 0; | 159 | int result_index = 0; |
| 166 | 160 | ||
| 167 | EncodingData texel_vector[144]; | ||
| 168 | int texel_vector_index = 0; | ||
| 169 | |||
| 170 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | 161 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] |
| 171 | // is the same as [(num_bits - 1):0] and repeats all the way down. | 162 | // is the same as [(num_bits - 1):0] and repeats all the way down. |
| 172 | uint Replicate(uint val, uint num_bits, uint to_bit) { | 163 | uint Replicate(uint val, uint num_bits, uint to_bit) { |
| @@ -382,26 +373,15 @@ void SkipBits(uint num_bits) { | |||
| 382 | } | 373 | } |
| 383 | 374 | ||
| 384 | uint StreamColorBits(uint num_bits) { | 375 | uint StreamColorBits(uint num_bits) { |
| 385 | uint ret = 0; | 376 | const int int_bits = int(num_bits); |
| 386 | int int_bits = int(num_bits); | 377 | const uint ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits); |
| 387 | if (texel_flag) { | 378 | color_bitsread += int_bits; |
| 388 | ret = ExtractBits(texel_weight_data, texel_bitsread, int_bits); | ||
| 389 | texel_bitsread += int_bits; | ||
| 390 | } else { | ||
| 391 | ret = ExtractBits(color_endpoint_data, color_bitsread, int_bits); | ||
| 392 | color_bitsread += int_bits; | ||
| 393 | } | ||
| 394 | return ret; | 379 | return ret; |
| 395 | } | 380 | } |
| 396 | 381 | ||
| 397 | void ResultEmplaceBack(EncodingData val) { | 382 | void ResultEmplaceBack(EncodingData val) { |
| 398 | if (texel_flag) { | 383 | result_vector[result_index] = val; |
| 399 | texel_vector[texel_vector_index] = val; | 384 | ++result_index; |
| 400 | ++texel_vector_index; | ||
| 401 | } else { | ||
| 402 | result_vector[result_index] = val; | ||
| 403 | ++result_index; | ||
| 404 | } | ||
| 405 | } | 385 | } |
| 406 | 386 | ||
| 407 | // Returns the number of bits required to encode n_vals values. | 387 | // Returns the number of bits required to encode n_vals values. |
| @@ -910,7 +890,7 @@ void UnquantizeTexelWeights(bool is_dual_plane, uvec2 size, out uint unquantized | |||
| 910 | const uint loop_count = min(result_index, area * num_planes); | 890 | const uint loop_count = min(result_index, area * num_planes); |
| 911 | uint unquantized[2 * 144]; | 891 | uint unquantized[2 * 144]; |
| 912 | for (uint itr = 0; itr < loop_count; ++itr) { | 892 | for (uint itr = 0; itr < loop_count; ++itr) { |
| 913 | unquantized[itr] = UnquantizeTexelWeight(texel_vector[itr]); | 893 | unquantized[itr] = UnquantizeTexelWeight(result_vector[itr]); |
| 914 | } | 894 | } |
| 915 | for (uint plane = 0; plane < num_planes; ++plane) { | 895 | for (uint plane = 0; plane < num_planes; ++plane) { |
| 916 | for (uint t = 0; t < block_dims.y; t++) { | 896 | for (uint t = 0; t < block_dims.y; t++) { |
| @@ -1215,22 +1195,26 @@ void DecompressBlock(ivec3 coord) { | |||
| 1215 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); | 1195 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], color_endpoint_mode[i]); |
| 1216 | } | 1196 | } |
| 1217 | 1197 | ||
| 1218 | texel_weight_data = local_buff; | 1198 | color_endpoint_data = local_buff; |
| 1219 | texel_weight_data = bitfieldReverse(texel_weight_data).wzyx; | 1199 | color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; |
| 1220 | uint clear_byte_start = | 1200 | uint clear_byte_start = |
| 1221 | (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; | 1201 | (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) >> 3) + 1; |
| 1222 | 1202 | ||
| 1223 | uint byte_insert = ExtractBits(texel_weight_data, int(clear_byte_start - 1) * 8, 8) & | 1203 | uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) & |
| 1224 | uint( | 1204 | uint( |
| 1225 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); | 1205 | ((1 << (GetPackedBitSize(params.size, params.dual_plane, params.max_weight) % 8)) - 1)); |
| 1226 | uint vec_index = (clear_byte_start - 1) >> 2; | 1206 | uint vec_index = (clear_byte_start - 1) >> 2; |
| 1227 | texel_weight_data[vec_index] = | 1207 | color_endpoint_data[vec_index] = |
| 1228 | bitfieldInsert(texel_weight_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); | 1208 | bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); |
| 1229 | for (uint i = clear_byte_start; i < 16; ++i) { | 1209 | for (uint i = clear_byte_start; i < 16; ++i) { |
| 1230 | uint idx = i >> 2; | 1210 | uint idx = i >> 2; |
| 1231 | texel_weight_data[idx] = bitfieldInsert(texel_weight_data[idx], 0, int(i % 4) * 8, 8); | 1211 | color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i % 4) * 8, 8); |
| 1232 | } | 1212 | } |
| 1233 | texel_flag = true; // use texel "vector" and bit stream in integer decoding | 1213 | |
| 1214 | // Re-init vector variables for next decode phase | ||
| 1215 | result_index = 0; | ||
| 1216 | color_bitsread = 0; | ||
| 1217 | |||
| 1234 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); | 1218 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); |
| 1235 | 1219 | ||
| 1236 | uint unquantized_texel_weights[2 * 144]; | 1220 | uint unquantized_texel_weights[2 * 144]; |