summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp132
1 files changed, 63 insertions, 69 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 5e922d1fe..4014d4bfe 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -804,11 +804,7 @@ uint UnquantizeTexelWeight(EncodingData val) {
804 return result; 804 return result;
805} 805}
806 806
807uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
808
809void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) { 807void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
810 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
811 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
812 const uint num_planes = is_dual_plane ? 2 : 1; 808 const uint num_planes = is_dual_plane ? 2 : 1;
813 const uint area = size.x * size.y; 809 const uint area = size.x * size.y;
814 const uint loop_count = min(result_index, area * num_planes); 810 const uint loop_count = min(result_index, area * num_planes);
@@ -818,58 +814,71 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
818 result_vector[array_index][vector_index] = 814 result_vector[array_index][vector_index] =
819 UnquantizeTexelWeight(GetEncodingFromVector(itr)); 815 UnquantizeTexelWeight(GetEncodingFromVector(itr));
820 } 816 }
821 for (uint plane = 0; plane < num_planes; ++plane) { 817}
822 for (uint t = 0; t < block_dims.y; t++) { 818
823 for (uint s = 0; s < block_dims.x; s++) { 819uint GetUnquantizedTexelWieght(uint offset_base, uint plane, bool is_dual_plane) {
824 const uint cs = Ds * s; 820 const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base;
825 const uint ct = Dt * t; 821 const uint array_index = offset / 4;
826 const uint gs = (cs * (size.x - 1) + 32) >> 6;
827 const uint gt = (ct * (size.y - 1) + 32) >> 6;
828 const uint js = gs >> 4;
829 const uint fs = gs & 0xF;
830 const uint jt = gt >> 4;
831 const uint ft = gt & 0x0F;
832 const uint w11 = (fs * ft + 8) >> 4;
833 const uint w10 = ft - w11;
834 const uint w01 = fs - w11;
835 const uint w00 = 16 - fs - ft + w11;
836 const uvec4 w = uvec4(w00, w01, w10, w11);
837 const uint v0 = jt * size.x + js;
838
839 uvec4 p = uvec4(0);
840
841#define VectorIndicesFromBase(offset_base) \
842 const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base; \
843 const uint array_index = offset / 4; \
844 const uint vector_index = offset % 4; 822 const uint vector_index = offset % 4;
823 return result_vector[array_index][vector_index];
824}
845 825
846 if (v0 < area) { 826uvec4 GetUnquantizedWeightVector(uint t, uint s, uvec2 size, uint plane_index, bool is_dual_plane) {
847 const uint offset_base = v0; 827 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
848 VectorIndicesFromBase(offset_base); 828 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
849 p.x = result_vector[array_index][vector_index]; 829 const uint area = size.x * size.y;
850 } 830
851 if ((v0 + 1) < (area)) { 831 const uint cs = Ds * s;
852 const uint offset_base = v0 + 1; 832 const uint ct = Dt * t;
853 VectorIndicesFromBase(offset_base); 833 const uint gs = (cs * (size.x - 1) + 32) >> 6;
854 p.y = result_vector[array_index][vector_index]; 834 const uint gt = (ct * (size.y - 1) + 32) >> 6;
855 } 835 const uint js = gs >> 4;
856 if ((v0 + size.x) < (area)) { 836 const uint fs = gs & 0xF;
857 const uint offset_base = v0 + size.x; 837 const uint jt = gt >> 4;
858 VectorIndicesFromBase(offset_base); 838 const uint ft = gt & 0x0F;
859 p.z = result_vector[array_index][vector_index]; 839 const uint w11 = (fs * ft + 8) >> 4;
860 } 840 const uint w10 = ft - w11;
861 if ((v0 + size.x + 1) < (area)) { 841 const uint w01 = fs - w11;
862 const uint offset_base = v0 + size.x + 1; 842 const uint w00 = 16 - fs - ft + w11;
863 VectorIndicesFromBase(offset_base); 843 const uvec4 w = uvec4(w00, w01, w10, w11);
864 p.w = result_vector[array_index][vector_index]; 844 const uint v0 = jt * size.x + js;
865 } 845
866 const uint offset = (t * block_dims.x + s) + ARRAY_NUM_ELEMENTS * plane; 846 uvec4 p0 = uvec4(0);
867 const uint array_index = offset / 4; 847 uvec4 p1 = uvec4(0);
868 const uint vector_index = offset % 4; 848
869 unquantized_texel_weights[array_index][vector_index] = (uint(dot(p, w)) + 8) >> 4; 849 if (v0 < area) {
870 } 850 const uint offset_base = v0;
851 p0.x = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
852 p1.x = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
853 }
854 if ((v0 + 1) < (area)) {
855 const uint offset_base = v0 + 1;
856 p0.y = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
857 p1.y = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
858 }
859 if ((v0 + size.x) < (area)) {
860 const uint offset_base = v0 + size.x;
861 p0.z = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
862 p1.z = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
863 }
864 if ((v0 + size.x + 1) < (area)) {
865 const uint offset_base = v0 + size.x + 1;
866 p0.w = GetUnquantizedTexelWieght(offset_base, 0, is_dual_plane);
867 p1.w = GetUnquantizedTexelWieght(offset_base, 1, is_dual_plane);
868 }
869
870 const uint primary_weight = (uint(dot(p0, w)) + 8) >> 4;
871
872 uvec4 weight_vec = uvec4(primary_weight);
873
874 if (is_dual_plane) {
875 const uint secondary_weight = (uint(dot(p1, w)) + 8) >> 4;
876 for (uint c = 0; c < 4; c++) {
877 const bool is_secondary = ((plane_index + 1u) & 3u) == c;
878 weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
871 } 879 }
872 } 880 }
881 return weight_vec;
873} 882}
874 883
875int FindLayout(uint mode) { 884int FindLayout(uint mode) {
@@ -1155,25 +1164,10 @@ void DecompressBlock(ivec3 coord) {
1155 } 1164 }
1156 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); 1165 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
1157 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); 1166 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
1158 const uint weight_offset = (j * block_dims.x + i); 1167 const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane);
1159 const uint array_index = weight_offset / 4;
1160 const uint vector_index = weight_offset % 4;
1161 const uint primary_weight = unquantized_texel_weights[array_index][vector_index];
1162 uvec4 weight_vec = uvec4(primary_weight);
1163 if (dual_plane) {
1164 const uint secondary_weight_offset = (j * block_dims.x + i) + ARRAY_NUM_ELEMENTS;
1165 const uint secondary_array_index = secondary_weight_offset / 4;
1166 const uint secondary_vector_index = secondary_weight_offset % 4;
1167 const uint secondary_weight =
1168 unquantized_texel_weights[secondary_array_index][secondary_vector_index];
1169 for (uint c = 0; c < 4; c++) {
1170 const bool is_secondary = ((plane_index + 1u) & 3u) == c;
1171 weight_vec[c] = is_secondary ? secondary_weight : primary_weight;
1172 }
1173 }
1174 const vec4 Cf = 1168 const vec4 Cf =
1175 vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); 1169 vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64);
1176 const vec4 p = (Cf / 65535.0); 1170 const vec4 p = (Cf / 65535.0f);
1177 imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); 1171 imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
1178 } 1172 }
1179 } 1173 }