diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 132 |
1 files changed, 63 insertions, 69 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 5e922d1fe..4014d4bfe 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -804,11 +804,7 @@ uint UnquantizeTexelWeight(EncodingData val) { | |||
| 804 | return result; | 804 | return result; |
| 805 | } | 805 | } |
| 806 | 806 | ||
| 807 | uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; | ||
| 808 | |||
| 809 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | 807 | void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { |
| 810 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); | ||
| 811 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); | ||
| 812 | const uint num_planes = is_dual_plane ? 2 : 1; | 808 | const uint num_planes = is_dual_plane ? 2 : 1; |
| 813 | const uint area = size.x * size.y; | 809 | const uint area = size.x * size.y; |
| 814 | const uint loop_count = min(result_index, area * num_planes); | 810 | const uint loop_count = min(result_index, area * num_planes); |
| @@ -818,58 +814,71 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { | |||
| 818 | result_vector[array_index][vector_index] = | 814 | result_vector[array_index][vector_index] = |
| 819 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); | 815 | UnquantizeTexelWeight(GetEncodingFromVector(itr)); |
| 820 | } | 816 | } |
| 821 | for (uint plane = 0; plane < num_planes; ++plane) { | 817 | } |
| 822 | for (uint t = 0; t < block_dims.y; t++) { | 818 | |
| 823 | for (uint s = 0; s < block_dims.x; s++) { | 819 | uint GetUnquantizedTexelWieght(uint offset_base, uint plane, bool is_dual_plane) { |
| 824 | const uint cs = Ds * s; | 820 | const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; |
| 825 | const uint ct = Dt * t; | 821 | const uint array_index = offset / 4; |
| 826 | const uint gs = (cs * (size.x - 1) + 32) >> 6; | ||
| 827 | const uint gt = (ct * (size.y - 1) + 32) >> 6; | ||
| 828 | const uint js = gs >> 4; | ||
| 829 | const uint fs = gs & 0xF; | ||
| 830 | const uint jt = gt >> 4; | ||
| 831 | const uint ft = gt & 0x0F; | ||
| 832 | const uint w11 = (fs * ft + 8) >> 4; | ||
| 833 | const uint w10 = ft - w11; | ||
| 834 | const uint w01 = fs - w11; | ||
| 835 | const uint w00 = 16 - fs - ft + w11; | ||
| 836 | const uvec4 w = uvec4(w00, w01, w10, w11); | ||
| 837 | const uint v0 = jt * size.x + js; | ||
| 838 | |||
| 839 | uvec4 p = uvec4(0); | ||
| 840 | |||
| 841 | #define VectorIndicesFromBase(offset_base) \ | ||
| 842 | const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \ | ||
| 843 | const uint array_index = offset / 4; \ | ||
| 844 | const uint vector_index = offset % 4; | 822 | const uint vector_index = offset % 4; |
| 823 | return result_vector[array_index][vector_index]; | ||
| 824 | } | ||
| 845 | 825 | ||
| 846 | if (v0 < area) { | 826 | uvec4 GetUnquantizedWeightVector(uint t, uint s, uvec2 size, uint plane_index, bool is_dual_plane) { |
| 847 | const uint offset_base = v0; | 827 | const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); |
| 848 | VectorIndicesFromBase(offset_base); | 828 | const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); |
| 849 | p.x = result_vector[array_index][vector_index]; | 829 | const uint area = size.x * size.y; |
| 850 | } | 830 | |
| 851 | if ((v0 + 1) < (area)) { | 831 | const uint cs = Ds * s; |
| 852 | const uint offset_base = v0 + 1; | 832 | const uint ct = Dt * t; |
| 853 | VectorIndicesFromBase(offset_base); | 833 | const uint gs = (cs * (size.x - 1) + 32) >> 6; |
| 854 | p.y = result_vector[array_index][vector_index]; | 834 | const uint gt = (ct * (size.y - 1) + 32) >> 6; |
| 855 | } | 835 | const uint js = gs >> 4; |
| 856 | if ((v0 + size.x) < (area)) { | 836 | const uint fs = gs & 0xF; |
| 857 | const uint offset_base = v0 + size.x; | 837 | const uint jt = gt >> 4; |
| 858 | VectorIndicesFromBase(offset_base); | 838 | const uint ft = gt & 0x0F; |
| 859 | p.z = result_vector[array_index][vector_index]; | 839 | const uint w11 = (fs * ft + 8) >> 4; |
| 860 | } | 840 | const uint w10 = ft - w11; |
| 861 | if ((v0 + size.x + 1) < (area)) { | 841 | const uint w01 = fs - w11; |
| 862 | const uint offset_base = v0 + size.x + 1; | 842 | const uint w00 = 16 - fs - ft + w11; |
| 863 | VectorIndicesFromBase(offset_base); | 843 | const uvec4 w = uvec4(w00, w01, w10, w11); |
| 864 | p.w = result_vector[array_index][vector_index]; | 844 | const uint v0 = jt * size.x + js; |
| 865 | } | 845 | |
| 866 | const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; | 846 | uvec4 p0 = uvec4(0); |
| 867 | const uint array_index = offset / 4; | 847 | uvec4 p1 = uvec4(0); |
| 868 | const uint vector_index = offset % 4; | 848 | |
| 869 | unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; | 849 | if (v0 < area) { |
| 870 | } | 850 | const uint offset_base = v0; |
| 851 | p0.x = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); | ||
| 852 | p1.x = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); | ||
| 853 | } | ||
| 854 | if ((v0 + 1) < (area)) { | ||
| 855 | const uint offset_base = v0 + 1; | ||
| 856 | p0.y = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); | ||
| 857 | p1.y = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); | ||
| 858 | } | ||
| 859 | if ((v0 + size.x) < (area)) { | ||
| 860 | const uint offset_base = v0 + size.x; | ||
| 861 | p0.z = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); | ||
| 862 | p1.z = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); | ||
| 863 | } | ||
| 864 | if ((v0 + size.x + 1) < (area)) { | ||
| 865 | const uint offset_base = v0 + size.x + 1; | ||
| 866 | p0.w = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane); | ||
| 867 | p1.w = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane); | ||
| 868 | } | ||
| 869 | |||
| 870 | const uint primary_weight = (uint(dot(p0, w)) + 8) >> 4; | ||
| 871 | |||
| 872 | uvec4 weight_vec = uvec4(primary_weight); | ||
| 873 | |||
| 874 | if (is_dual_plane) { | ||
| 875 | const uint secondary_weight = (uint(dot(p1, w)) + 8) >> 4; | ||
| 876 | for (uint c = 0; c < 4; c++) { | ||
| 877 | const bool is_secondary = ((plane_index + 1u) & 3u) == c; | ||
| 878 | weight_vec[c] = is_secondary ? secondary_weight : primary_weight; | ||
| 871 | } | 879 | } |
| 872 | } | 880 | } |
| 881 | return weight_vec; | ||
| 873 | } | 882 | } |
| 874 | 883 | ||
| 875 | int FindLayout(uint mode) { | 884 | int FindLayout(uint mode) { |
| @@ -1155,25 +1164,10 @@ void DecompressBlock(ivec3 coord) { | |||
| 1155 | } | 1164 | } |
| 1156 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); | 1165 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); |
| 1157 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); | 1166 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); |
| 1158 | const uint weight_offset = (j * block_dims.x + i); | 1167 | const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane); |
| 1159 | const uint array_index = weight_offset / 4; | ||
| 1160 | const uint vector_index = weight_offset % 4; | ||
| 1161 | const uint primary_weight = unquantized_texel_weights[array_index][vector_index]; | ||
| 1162 | uvec4 weight_vec = uvec4(primary_weight); | ||
| 1163 | if (dual_plane) { | ||
| 1164 | const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS; | ||
| 1165 | const uint secondary_array_index = secondary_weight_offset / 4; | ||
| 1166 | const uint secondary_vector_index = secondary_weight_offset % 4; | ||
| 1167 | const uint secondary_weight = | ||
| 1168 | unquantized_texel_weights[secondary_array_index][secondary_vector_index]; | ||
| 1169 | for (uint c = 0; c < 4; c++) { | ||
| 1170 | const bool is_secondary = ((plane_index + 1u) & 3u) == c; | ||
| 1171 | weight_vec[c] = is_secondary ? secondary_weight : primary_weight; | ||
| 1172 | } | ||
| 1173 | } | ||
| 1174 | const vec4 Cf = | 1168 | const vec4 Cf = |
| 1175 | vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); | 1169 | vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); |
| 1176 | const vec4 p = (Cf / 65535.0); | 1170 | const vec4 p = (Cf / 65535.0f); |
| 1177 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); | 1171 | imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); |
| 1178 | } | 1172 | } |
| 1179 | } | 1173 | } |