summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-07-30 12:41:52 -0400
committerGravatar Ameer J2023-08-06 14:54:57 -0400
commit27c8bb9615039eefc837ade5d91d5733c738f4a0 (patch)
treec6e219b7e8d89de06b64bf416fab07184863d68b /src
parentweights refactor (diff)
downloadyuzu-27c8bb9615039eefc837ade5d91d5733c738f4a0.tar.gz
yuzu-27c8bb9615039eefc837ade5d91d5733c738f4a0.tar.xz
yuzu-27c8bb9615039eefc837ade5d91d5733c738f4a0.zip
flattening
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp87
1 files changed, 44 insertions, 43 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index b84ddd67d..f720df6d2 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -140,8 +140,6 @@ int result_index = 0;
140EncodingData texel_vector[144]; 140EncodingData texel_vector[144];
141int texel_vector_index = 0; 141int texel_vector_index = 0;
142 142
143uint unquantized_texel_weights[2][144];
144
145// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] 143// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
146// is the same as [(num_bits - 1):0] and repeats all the way down. 144// is the same as [(num_bits - 1):0] and repeats all the way down.
147uint Replicate(uint val, uint num_bits, uint to_bit) { 145uint Replicate(uint val, uint num_bits, uint to_bit) {
@@ -879,58 +877,60 @@ uint UnquantizeTexelWeight(EncodingData val) {
879 return result; 877 return result;
880} 878}
881 879
882void UnquantizeTexelWeights(bool dual_plane, uvec2 size) { 880void UnquantizeTexelWeights(bool is_dual_plane, uvec2 size, out uint unquantized_texel_weights[2 * 144]) {
883 uint weight_idx = 0;
884 uint unquantized[2][144];
885 uint area = size.x * size.y;
886 for (uint itr = 0; itr < texel_vector_index; itr++) {
887 unquantized[0][weight_idx] = UnquantizeTexelWeight(texel_vector[itr]);
888 if (dual_plane) {
889 ++itr;
890 unquantized[1][weight_idx] = UnquantizeTexelWeight(texel_vector[itr]);
891 if (itr == texel_vector_index) {
892 break;
893 }
894 }
895 if (++weight_idx >= (area))
896 break;
897 }
898
899 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); 881 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
900 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); 882 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
901 const uint k_plane_scale = dual_plane ? 2 : 1; 883 const uint num_planes = is_dual_plane ? 2 : 1;
902 for (uint plane = 0; plane < k_plane_scale; plane++) { 884 const uint area = size.x * size.y;
885 const uint loop_count = min(result_index, area * num_planes);
886 uint unquantized[2 * 144];
887 for (uint itr = 0; itr < loop_count; ++itr) {
888 unquantized[itr] = UnquantizeTexelWeight(texel_vector[itr]);
889 }
890 for (uint plane = 0; plane < num_planes; ++plane) {
903 for (uint t = 0; t < block_dims.y; t++) { 891 for (uint t = 0; t < block_dims.y; t++) {
904 for (uint s = 0; s < block_dims.x; s++) { 892 for (uint s = 0; s < block_dims.x; s++) {
905 uint cs = Ds * s; 893 const uint cs = Ds * s;
906 uint ct = Dt * t; 894 const uint ct = Dt * t;
907 uint gs = (cs * (size.x - 1) + 32) >> 6; 895 const uint gs = (cs * (size.x - 1) + 32) >> 6;
908 uint gt = (ct * (size.y - 1) + 32) >> 6; 896 const uint gt = (ct * (size.y - 1) + 32) >> 6;
909 uint js = gs >> 4; 897 const uint js = gs >> 4;
910 uint fs = gs & 0xF; 898 const uint fs = gs & 0xF;
911 uint jt = gt >> 4; 899 const uint jt = gt >> 4;
912 uint ft = gt & 0x0F; 900 const uint ft = gt & 0x0F;
913 uint w11 = (fs * ft + 8) >> 4; 901 const uint w11 = (fs * ft + 8) >> 4;
914 uint w10 = ft - w11; 902 const uint w10 = ft - w11;
915 uint w01 = fs - w11; 903 const uint w01 = fs - w11;
916 uint w00 = 16 - fs - ft + w11; 904 const uint w00 = 16 - fs - ft + w11;
917 uvec4 w = uvec4(w00, w01, w10, w11); 905 const uvec4 w = uvec4(w00, w01, w10, w11);
918 uint v0 = jt * size.x + js; 906 const uint v0 = jt * size.x + js;
919 907
920 uvec4 p = uvec4(0); 908 uvec4 p = uvec4(0);
909
910#define VectorIndicesFromBase(offset_base) \
911 const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \
912
921 if (v0 < area) { 913 if (v0 < area) {
922 p.x = unquantized[plane][v0]; 914 const uint offset_base = v0;
915 VectorIndicesFromBase(offset_base);
916 p.x = unquantized[offset];
923 } 917 }
924 if ((v0 + 1) < (area)) { 918 if ((v0 + 1) < (area)) {
925 p.y = unquantized[plane][v0 + 1]; 919 const uint offset_base = v0 + 1;
920 VectorIndicesFromBase(offset_base);
921 p.y = unquantized[offset];
926 } 922 }
927 if ((v0 + size.x) < (area)) { 923 if ((v0 + size.x) < (area)) {
928 p.z = unquantized[plane][(v0 + size.x)]; 924 const uint offset_base = v0 + size.x;
925 VectorIndicesFromBase(offset_base);
926 p.z = unquantized[offset];
929 } 927 }
930 if ((v0 + size.x + 1) < (area)) { 928 if ((v0 + size.x + 1) < (area)) {
931 p.w = unquantized[plane][(v0 + size.x + 1)]; 929 const uint offset_base = v0 + size.x + 1;
930 VectorIndicesFromBase(offset_base);
931 p.w = unquantized[offset];
932 } 932 }
933 unquantized_texel_weights[plane][t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4; 933 unquantized_texel_weights[plane * 144 + t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4;
934 } 934 }
935 } 935 }
936 } 936 }
@@ -1208,7 +1208,8 @@ void DecompressBlock(ivec3 coord) {
1208 texel_flag = true; // use texel "vector" and bit stream in integer decoding 1208 texel_flag = true; // use texel "vector" and bit stream in integer decoding
1209 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); 1209 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
1210 1210
1211 UnquantizeTexelWeights(params.dual_plane, params.size); 1211 uint unquantized_texel_weights[2 * 144];
1212 UnquantizeTexelWeights(params.dual_plane, params.size, unquantized_texel_weights);
1212 1213
1213 for (uint j = 0; j < block_dims.y; j++) { 1214 for (uint j = 0; j < block_dims.y; j++) {
1214 for (uint i = 0; i < block_dims.x; i++) { 1215 for (uint i = 0; i < block_dims.x; i++) {
@@ -1220,10 +1221,10 @@ void DecompressBlock(ivec3 coord) {
1220 const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); 1221 const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]);
1221 const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); 1222 const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]);
1222 const uint weight_offset = (j * block_dims.x + i); 1223 const uint weight_offset = (j * block_dims.x + i);
1223 const uint primary_weight = unquantized_texel_weights[weight_offset][0]; 1224 const uint primary_weight = unquantized_texel_weights[weight_offset];
1224 uvec4 weight_vec = uvec4(primary_weight); 1225 uvec4 weight_vec = uvec4(primary_weight);
1225 if (params.dual_plane) { 1226 if (params.dual_plane) {
1226 const uint secondary_weight = unquantized_texel_weights[weight_offset][1]; 1227 const uint secondary_weight = unquantized_texel_weights[weight_offset + 144];
1227 for (uint c = 0; c < 4; c++) { 1228 for (uint c = 0; c < 4; c++) {
1228 const bool is_secondary = ((plane_index + 1u) & 3u) == c; 1229 const bool is_secondary = ((plane_index + 1u) & 3u) == c;
1229 weight_vec[c] = is_secondary ? secondary_weight : primary_weight; 1230 weight_vec[c] = is_secondary ? secondary_weight : primary_weight;