diff options
| author | 2023-07-30 12:41:52 -0400 | |
|---|---|---|
| committer | 2023-08-06 14:54:57 -0400 | |
| commit | 27c8bb9615039eefc837ade5d91d5733c738f4a0 (patch) | |
| tree | c6e219b7e8d89de06b64bf416fab07184863d68b /src | |
| parent | weights refactor (diff) | |
| download | yuzu-27c8bb9615039eefc837ade5d91d5733c738f4a0.tar.gz yuzu-27c8bb9615039eefc837ade5d91d5733c738f4a0.tar.xz yuzu-27c8bb9615039eefc837ade5d91d5733c738f4a0.zip | |
flattening
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 87 |
1 files changed, 44 insertions, 43 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index b84ddd67d..f720df6d2 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -140,8 +140,6 @@ int result_index = 0; | |||
| 140 | EncodingData texel_vector[144]; | 140 | EncodingData texel_vector[144]; |
| 141 | int texel_vector_index = 0; | 141 | int texel_vector_index = 0; |
| 142 | 142 | ||
| 143 | uint unquantized_texel_weights[2][144]; | ||
| 144 | |||
| 145 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] | 143 | // Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] |
| 146 | // is the same as [(num_bits - 1):0] and repeats all the way down. | 144 | // is the same as [(num_bits - 1):0] and repeats all the way down. |
| 147 | uint Replicate(uint val, uint num_bits, uint to_bit) { | 145 | uint Replicate(uint val, uint num_bits, uint to_bit) { |
| @@ -879,58 +877,60 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 879 | return result; | 877 | return result; |
| 880 | } | 878 | } |
| 881 | 879 | ||
| 882 | void UnquantizeTexelWeights(bool dual_plane, uvec2 size) { | 880 | void UnquantizeTexelWeights(bool is_dual_plane, uvec2 size, out uint unquantized_texel_weights[2 * 144]) { |
| 883 | uint weight_idx = 0; | ||
| 884 | uint unquantized[2][144]; | ||
| 885 | uint area = size.x * size.y; | ||
| 886 | for (uint itr = 0; itr < texel_vector_index; itr++) { | ||
| 887 | unquantized[0][weight_idx] = UnquantizeTexelWeight(texel_vector[itr]); | ||
| 888 | if (dual_plane) { | ||
| 889 | ++itr; | ||
| 890 | unquantized[1][weight_idx] = UnquantizeTexelWeight(texel_vector[itr]); | ||
| 891 | if (itr == texel_vector_index) { | ||
| 892 | break; | ||
| 893 | } | ||
| 894 | } | ||
| 895 | if (++weight_idx >= (area)) | ||
| 896 | break; | ||
| 897 | } | ||
| 898 | |||
| 899 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); | 881 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); |
| 900 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); | 882 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); |
| 901 | const uint k_plane_scale = dual_plane ? 2 : 1; | 883 | const uint num_planes = is_dual_plane ? 2 : 1; |
| 902 | for (uint plane = 0; plane < k_plane_scale; plane++) { | 884 | const uint area = size.x * size.y; |
| 885 | const uint loop_count = min(result_index, area * num_planes); | ||
| 886 | uint unquantized[2 * 144]; | ||
| 887 | for (uint itr = 0; itr < loop_count; ++itr) { | ||
| 888 | unquantized[itr] = UnquantizeTexelWeight(texel_vector[itr]); | ||
| 889 | } | ||
| 890 | for (uint plane = 0; plane < num_planes; ++plane) { | ||
| 903 | for (uint t = 0; t < block_dims.y; t++) { | 891 | for (uint t = 0; t < block_dims.y; t++) { |
| 904 | for (uint s = 0; s < block_dims.x; s++) { | 892 | for (uint s = 0; s < block_dims.x; s++) { |
| 905 | uint cs = Ds * s; | 893 | const uint cs = Ds * s; |
| 906 | uint ct = Dt * t; | 894 | const uint ct = Dt * t; |
| 907 | uint gs = (cs * (size.x - 1) + 32) >> 6; | 895 | const uint gs = (cs * (size.x - 1) + 32) >> 6; |
| 908 | uint gt = (ct * (size.y - 1) + 32) >> 6; | 896 | const uint gt = (ct * (size.y - 1) + 32) >> 6; |
| 909 | uint js = gs >> 4; | 897 | const uint js = gs >> 4; |
| 910 | uint fs = gs & 0xF; | 898 | const uint fs = gs & 0xF; |
| 911 | uint jt = gt >> 4; | 899 | const uint jt = gt >> 4; |
| 912 | uint ft = gt & 0x0F; | 900 | const uint ft = gt & 0x0F; |
| 913 | uint w11 = (fs * ft + 8) >> 4; | 901 | const uint w11 = (fs * ft + 8) >> 4; |
| 914 | uint w10 = ft - w11; | 902 | const uint w10 = ft - w11; |
| 915 | uint w01 = fs - w11; | 903 | const uint w01 = fs - w11; |
| 916 | uint w00 = 16 - fs - ft + w11; | 904 | const uint w00 = 16 - fs - ft + w11; |
| 917 | uvec4 w = uvec4(w00, w01, w10, w11); | 905 | const uvec4 w = uvec4(w00, w01, w10, w11); |
| 918 | uint v0 = jt * size.x + js; | 906 | const uint v0 = jt * size.x + js; |
| 919 | 907 | ||
| 920 | uvec4 p = uvec4(0); | 908 | uvec4 p = uvec4(0); |
| 909 | |||
| 910 | #define VectorIndicesFromBase(offset_base) \ | ||
| 911 | const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \ | ||
| 912 | |||
| 921 | if (v0 < area) { | 913 | if (v0 < area) { |
| 922 | p.x = unquantized[plane][v0]; | 914 | const uint offset_base = v0; |
| 915 | VectorIndicesFromBase(offset_base); | ||
| 916 | p.x = unquantized[offset]; | ||
| 923 | } | 917 | } |
| 924 | if ((v0 + 1) < (area)) { | 918 | if ((v0 + 1) < (area)) { |
| 925 | p.y = unquantized[plane][v0 + 1]; | 919 | const uint offset_base = v0 + 1; |
| 920 | VectorIndicesFromBase(offset_base); | ||
| 921 | p.y = unquantized[offset]; | ||
| 926 | } | 922 | } |
| 927 | if ((v0 + size.x) < (area)) { | 923 | if ((v0 + size.x) < (area)) { |
| 928 | p.z = unquantized[plane][(v0 + size.x)]; | 924 | const uint offset_base = v0 + size.x; |
| 925 | VectorIndicesFromBase(offset_base); | ||
| 926 | p.z = unquantized[offset]; | ||
| 929 | } | 927 | } |
| 930 | if ((v0 + size.x + 1) < (area)) { | 928 | if ((v0 + size.x + 1) < (area)) { |
| 931 | p.w = unquantized[plane][(v0 + size.x + 1)]; | 929 | const uint offset_base = v0 + size.x + 1; |
| 930 | VectorIndicesFromBase(offset_base); | ||
| 931 | p.w = unquantized[offset]; | ||
| 932 | } | 932 | } |
| 933 | unquantized_texel_weights[plane][t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4; | 933 | unquantized_texel_weights[plane * 144 + t * block_dims.x + s] = (uint(dot(p, w)) + 8) >> 4; |
| 934 | } | 934 | } |
| 935 | } | 935 | } |
| 936 | } | 936 | } |
| @@ -1208,7 +1208,8 @@ void DecompressBlock(ivec3 coord) { | |||
| 1208 | texel_flag = true; // use texel "vector" and bit stream in integer decoding | 1208 | texel_flag = true; // use texel "vector" and bit stream in integer decoding |
| 1209 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); | 1209 | DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); |
| 1210 | 1210 | ||
| 1211 | UnquantizeTexelWeights(params.dual_plane, params.size); | 1211 | uint unquantized_texel_weights[2 * 144]; |
| 1212 | UnquantizeTexelWeights(params.dual_plane, params.size, unquantized_texel_weights); | ||
| 1212 | 1213 | ||
| 1213 | for (uint j = 0; j < block_dims.y; j++) { | 1214 | for (uint j = 0; j < block_dims.y; j++) { |
| 1214 | for (uint i = 0; i < block_dims.x; i++) { | 1215 | for (uint i = 0; i < block_dims.x; i++) { |
| @@ -1220,10 +1221,10 @@ void DecompressBlock(ivec3 coord) { | |||
| 1220 | const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); | 1221 | const uvec4 C0 = ReplicateByteTo16(endpoints[local_partition][0]); |
| 1221 | const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); | 1222 | const uvec4 C1 = ReplicateByteTo16(endpoints[local_partition][1]); |
| 1222 | const uint weight_offset = (j * block_dims.x + i); | 1223 | const uint weight_offset = (j * block_dims.x + i); |
| 1223 | const uint primary_weight = unquantized_texel_weights[weight_offset][0]; | 1224 | const uint primary_weight = unquantized_texel_weights[weight_offset]; |
| 1224 | uvec4 weight_vec = uvec4(primary_weight); | 1225 | uvec4 weight_vec = uvec4(primary_weight); |
| 1225 | if (params.dual_plane) { | 1226 | if (params.dual_plane) { |
| 1226 | const uint secondary_weight = unquantized_texel_weights[weight_offset][1]; | 1227 | const uint secondary_weight = unquantized_texel_weights[weight_offset + 144]; |
| 1227 | for (uint c = 0; c < 4; c++) { | 1228 | for (uint c = 0; c < 4; c++) { |
| 1228 | const bool is_secondary = ((plane_index + 1u) & 3u) == c; | 1229 | const bool is_secondary = ((plane_index + 1u) & 3u) == c; |
| 1229 | weight_vec[c] = is_secondary ? secondary_weight : primary_weight; | 1230 | weight_vec[c] = is_secondary ? secondary_weight : primary_weight; |