summaryrefslogtreecommitdiff
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
authorGravatar Ameer J2023-08-01 19:48:19 -0400
committerGravatar Ameer J2023-08-06 14:54:57 -0400
commit05ee37a1f0641a2e1a15e9052371a5ce0cd20058 (patch)
tree6ae5e2fb18f3f6916b8220e6c0debdbd7c3ddeb2 /src/video_core/host_shaders
parentgl (diff)
downloadyuzu-05ee37a1f0641a2e1a15e9052371a5ce0cd20058.tar.gz
yuzu-05ee37a1f0641a2e1a15e9052371a5ce0cd20058.tar.xz
yuzu-05ee37a1f0641a2e1a15e9052371a5ce0cd20058.zip
amd opts
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp29
1 files changed, 13 insertions, 16 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index e8801b0ff..ca93dc2a2 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -154,7 +154,7 @@ uint FastReplicateTo8(uint value, uint num_bits) {
154 return 0; 154 return 0;
155 } 155 }
156 const uint array_index = value / 4; 156 const uint array_index = value / 4;
157 const uint vector_index = value % 4; 157 const uint vector_index = bitfieldExtract(value, 0, 2);
158 switch (num_bits) { 158 switch (num_bits) {
159 case 1: 159 case 1:
160 return 255; 160 return 255;
@@ -213,7 +213,7 @@ uint FastReplicateTo6(uint value, uint num_bits) {
213 return 0; 213 return 0;
214 } 214 }
215 const uint array_index = value / 4; 215 const uint array_index = value / 4;
216 const uint vector_index = value % 4; 216 const uint vector_index = bitfieldExtract(value, 0, 2);
217 switch (num_bits) { 217 switch (num_bits) {
218 case 1: 218 case 1:
219 return 63; 219 return 63;
@@ -536,8 +536,8 @@ void DecodeIntegerSequence(uint max_range, uint num_values) {
536 } 536 }
537} 537}
538 538
539void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, 539uvec4 color_values[8];
540 out uvec4 color_values[8]) { 540void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits) {
541 uint num_values = 0; 541 uint num_values = 0;
542 for (uint i = 0; i < num_partitions; i++) { 542 for (uint i = 0; i < num_partitions; i++) {
543 num_values += ((modes[i] >> 2) + 1) << 1; 543 num_values += ((modes[i] >> 2) + 1) << 1;
@@ -664,10 +664,7 @@ ivec2 BitTransferSigned(int a, int b) {
664} 664}
665 665
666uvec4 ClampByte(ivec4 color) { 666uvec4 ClampByte(ivec4 color) {
667 for (uint i = 0; i < 4; ++i) { 667 return uvec4(clamp(color, 0, 255));
668 color[i] = clamp(color[i], 0, 255);
669 }
670 return uvec4(color);
671} 668}
672 669
673ivec4 BlueContract(int a, int r, int g, int b) { 670ivec4 BlueContract(int a, int r, int g, int b) {
@@ -675,7 +672,7 @@ ivec4 BlueContract(int a, int r, int g, int b) {
675} 672}
676 673
677void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, 674void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
678 in uvec4 color_values[8], inout uint colvals_index) { 675 inout uint colvals_index) {
679#define READ_UINT_VALUES(N) \ 676#define READ_UINT_VALUES(N) \
680 uint v[N]; \ 677 uint v[N]; \
681 for (uint i = 0; i < N; i++) { \ 678 for (uint i = 0; i < N; i++) { \
@@ -887,8 +884,9 @@ uint UnquantizeTexelWeight(EncodingData val) {
887 return result; 884 return result;
888} 885}
889 886
890void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane, 887uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE];
891 out uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]) { 888
889void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
892 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1)); 890 const uint Ds = uint((block_dims.x * 0.5f + 1024) / (block_dims.x - 1));
893 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1)); 891 const uint Dt = uint((block_dims.y * 0.5f + 1024) / (block_dims.y - 1));
894 const uint num_planes = is_dual_plane ? 2 : 1; 892 const uint num_planes = is_dual_plane ? 2 : 1;
@@ -1205,11 +1203,11 @@ void DecompressBlock(ivec3 coord) {
1205 // This decode phase should at most push 32 elements into the vector 1203 // This decode phase should at most push 32 elements into the vector
1206 result_vector_max_index = 32; 1204 result_vector_max_index = 32;
1207 1205
1208 uvec4 color_values[8]; 1206 // uvec4 color_values[8];
1209 uint colvals_index = 0; 1207 uint colvals_index = 0;
1210 DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits, color_values); 1208 DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits);
1211 for (uint i = 0; i < num_partitions; i++) { 1209 for (uint i = 0; i < num_partitions; i++) {
1212 ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values, 1210 ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i],
1213 colvals_index); 1211 colvals_index);
1214 } 1212 }
1215 } 1213 }
@@ -1239,8 +1237,7 @@ void DecompressBlock(ivec3 coord) {
1239 } 1237 }
1240 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane)); 1238 DecodeIntegerSequence(params.max_weight, GetNumWeightValues(params.size, params.dual_plane));
1241 1239
1242 uvec4 unquantized_texel_weights[VECTOR_ARRAY_SIZE]; 1240 UnquantizeTexelWeights(params.size, params.dual_plane);
1243 UnquantizeTexelWeights(params.size, params.dual_plane, unquantized_texel_weights);
1244 for (uint j = 0; j < block_dims.y; j++) { 1241 for (uint j = 0; j < block_dims.y; j++) {
1245 for (uint i = 0; i < block_dims.x; i++) { 1242 for (uint i = 0; i < block_dims.x; i++) {
1246 uint local_partition = 0; 1243 uint local_partition = 0;