summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-08-01 20:19:45 -0400
committerGravatar Ameer J2023-08-06 14:54:57 -0400
commitc077e467c4fb10ef92c1369e9b9116ed653af1de (patch)
treea3fdf014a9204fce9fc6b5c267f6d05909df439c /src
parentbfe instead of mod (diff)
downloadyuzu-c077e467c4fb10ef92c1369e9b9116ed653af1de.tar.gz
yuzu-c077e467c4fb10ef92c1369e9b9116ed653af1de.tar.xz
yuzu-c077e467c4fb10ef92c1369e9b9116ed653af1de.zip
global endpoints
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp76
1 files changed, 36 insertions, 40 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 5346cba0c..077bec576 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -94,6 +94,8 @@ uint result_index = 0;
94uint result_vector_max_index; 94uint result_vector_max_index;
95bool result_limit_reached = false; 95bool result_limit_reached = false;
96 96
97uvec4 endpoints[2][4];
98
97// EncodingData helpers 99// EncodingData helpers
98uint Encoding(EncodingData val) { 100uint Encoding(EncodingData val) {
99 return bitfieldExtract(val.data, 0, 8); 101 return bitfieldExtract(val.data, 0, 8);
@@ -673,7 +675,7 @@ ivec4 BlueContract(int a, int r, int g, int b) {
673 return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); 675 return ivec4(a, (r + b) >> 1, (g + b) >> 1, b);
674} 676}
675 677
676void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, 678void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
677 inout uint colvals_index) { 679 inout uint colvals_index) {
678#define READ_UINT_VALUES(N) \ 680#define READ_UINT_VALUES(N) \
679 uint v[N]; \ 681 uint v[N]; \
@@ -692,22 +694,22 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
692 switch (color_endpoint_mode) { 694 switch (color_endpoint_mode) {
693 case 0: { 695 case 0: {
694 READ_UINT_VALUES(2) 696 READ_UINT_VALUES(2)
695 ep1 = uvec4(0xFF, v[0], v[0], v[0]); 697 endpoints[0][ep_index] = uvec4(0xFF, v[0], v[0], v[0]);
696 ep2 = uvec4(0xFF, v[1], v[1], v[1]); 698 endpoints[1][ep_index] = uvec4(0xFF, v[1], v[1], v[1]);
697 break; 699 break;
698 } 700 }
699 case 1: { 701 case 1: {
700 READ_UINT_VALUES(2) 702 READ_UINT_VALUES(2)
701 const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); 703 const uint L0 = (v[0] >> 2) | (v[1] & 0xC0);
702 const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); 704 const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU);
703 ep1 = uvec4(0xFF, L0, L0, L0); 705 endpoints[0][ep_index] = uvec4(0xFF, L0, L0, L0);
704 ep2 = uvec4(0xFF, L1, L1, L1); 706 endpoints[1][ep_index] = uvec4(0xFF, L1, L1, L1);
705 break; 707 break;
706 } 708 }
707 case 4: { 709 case 4: {
708 READ_UINT_VALUES(4) 710 READ_UINT_VALUES(4)
709 ep1 = uvec4(v[2], v[0], v[0], v[0]); 711 endpoints[0][ep_index] = uvec4(v[2], v[0], v[0], v[0]);
710 ep2 = uvec4(v[3], v[1], v[1], v[1]); 712 endpoints[1][ep_index] = uvec4(v[3], v[1], v[1], v[1]);
711 break; 713 break;
712 } 714 }
713 case 5: { 715 case 5: {
@@ -718,24 +720,24 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
718 transferred = BitTransferSigned(v[3], v[2]); 720 transferred = BitTransferSigned(v[3], v[2]);
719 v[3] = transferred.x; 721 v[3] = transferred.x;
720 v[2] = transferred.y; 722 v[2] = transferred.y;
721 ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); 723 endpoints[0][ep_index] = ClampByte(ivec4(v[2], v[0], v[0], v[0]));
722 ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); 724 endpoints[1][ep_index] = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]));
723 break; 725 break;
724 } 726 }
725 case 6: { 727 case 6: {
726 READ_UINT_VALUES(4) 728 READ_UINT_VALUES(4)
727 ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); 729 endpoints[0][ep_index] = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
728 ep2 = uvec4(0xFF, v[0], v[1], v[2]); 730 endpoints[1][ep_index] = uvec4(0xFF, v[0], v[1], v[2]);
729 break; 731 break;
730 } 732 }
731 case 8: { 733 case 8: {
732 READ_UINT_VALUES(6) 734 READ_UINT_VALUES(6)
733 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { 735 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) {
734 ep1 = uvec4(0xFF, v[0], v[2], v[4]); 736 endpoints[0][ep_index] = uvec4(0xFF, v[0], v[2], v[4]);
735 ep2 = uvec4(0xFF, v[1], v[3], v[5]); 737 endpoints[1][ep_index] = uvec4(0xFF, v[1], v[3], v[5]);
736 } else { 738 } else {
737 ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); 739 endpoints[0][ep_index] = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5])));
738 ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); 740 endpoints[1][ep_index] = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4])));
739 } 741 }
740 break; 742 break;
741 } 743 }
@@ -751,28 +753,28 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
751 v[5] = transferred.x; 753 v[5] = transferred.x;
752 v[4] = transferred.y; 754 v[4] = transferred.y;
753 if ((v[1] + v[3] + v[5]) >= 0) { 755 if ((v[1] + v[3] + v[5]) >= 0) {
754 ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); 756 endpoints[0][ep_index] = ClampByte(ivec4(0xFF, v[0], v[2], v[4]));
755 ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); 757 endpoints[1][ep_index] = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]));
756 } else { 758 } else {
757 ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); 759 endpoints[0][ep_index] = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]));
758 ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); 760 endpoints[1][ep_index] = ClampByte(BlueContract(0xFF, v[0], v[2], v[4]));
759 } 761 }
760 break; 762 break;
761 } 763 }
762 case 10: { 764 case 10: {
763 READ_UINT_VALUES(6) 765 READ_UINT_VALUES(6)
764 ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); 766 endpoints[0][ep_index] = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
765 ep2 = uvec4(v[5], v[0], v[1], v[2]); 767 endpoints[1][ep_index] = uvec4(v[5], v[0], v[1], v[2]);
766 break; 768 break;
767 } 769 }
768 case 12: { 770 case 12: {
769 READ_UINT_VALUES(8) 771 READ_UINT_VALUES(8)
770 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { 772 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) {
771 ep1 = uvec4(v[6], v[0], v[2], v[4]); 773 endpoints[0][ep_index] = uvec4(v[6], v[0], v[2], v[4]);
772 ep2 = uvec4(v[7], v[1], v[3], v[5]); 774 endpoints[1][ep_index] = uvec4(v[7], v[1], v[3], v[5]);
773 } else { 775 } else {
774 ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); 776 endpoints[0][ep_index] = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5])));
775 ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); 777 endpoints[1][ep_index] = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4])));
776 } 778 }
777 break; 779 break;
778 } 780 }
@@ -794,18 +796,18 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
794 v[6] = transferred.y; 796 v[6] = transferred.y;
795 797
796 if ((v[1] + v[3] + v[5]) >= 0) { 798 if ((v[1] + v[3] + v[5]) >= 0) {
797 ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); 799 endpoints[0][ep_index] = ClampByte(ivec4(v[6], v[0], v[2], v[4]));
798 ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); 800 endpoints[1][ep_index] = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]));
799 } else { 801 } else {
800 ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); 802 endpoints[0][ep_index] = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]));
801 ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); 803 endpoints[1][ep_index] = ClampByte(BlueContract(v[6], v[0], v[2], v[4]));
802 } 804 }
803 break; 805 break;
804 } 806 }
805 default: { 807 default: {
806 // HDR mode, or more likely a bug computing the color_endpoint_mode 808 // HDR mode, or more likely a bug computing the color_endpoint_mode
807 ep1 = uvec4(0xFF, 0xFF, 0, 0); 809 endpoints[0][ep_index] = uvec4(0xFF, 0xFF, 0, 0);
808 ep2 = uvec4(0xFF, 0xFF, 0, 0); 810 endpoints[1][ep_index] = uvec4(0xFF, 0xFF, 0, 0);
809 break; 811 break;
810 } 812 }
811 } 813 }
@@ -1198,10 +1200,6 @@ void DecompressBlock(ivec3 coord) {
1198 color_endpoint_mode[i] = cem; 1200 color_endpoint_mode[i] = cem;
1199 } 1201 }
1200 } 1202 }
1201
1202 uvec4 endpoints0[4];
1203 uvec4 endpoints1[4];
1204 {
1205 // This decode phase should at most push 32 elements into the vector 1203 // This decode phase should at most push 32 elements into the vector
1206 result_vector_max_index = 32; 1204 result_vector_max_index = 32;
1207 1205
@@ -1209,10 +1207,8 @@ void DecompressBlock(ivec3 coord) {
1209 uint colvals_index = 0; 1207 uint colvals_index = 0;
1210 DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); 1208 DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits);
1211 for (uint i = 0; i < num_partitions; i++) { 1209 for (uint i = 0; i < num_partitions; i++) {
1212 ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], 1210 ComputeEndpoints(i, color_endpoint_mode[i], colvals_index);
1213 colvals_index);
1214 } 1211 }
1215 }
1216 color_endpoint_data = local_buff; 1212 color_endpoint_data = local_buff;
1217 color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; 1213 color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
1218 const uint clear_byte_start = (weight_bits >> 3) + 1; 1214 const uint clear_byte_start = (weight_bits >> 3) + 1;
@@ -1247,8 +1243,8 @@ void DecompressBlock(ivec3 coord) {
1247 local_partition = Select2DPartition(partition_index, i, j, num_partitions, 1243 local_partition = Select2DPartition(partition_index, i, j, num_partitions,
1248 (block_dims.y * block_dims.x) < 32); 1244 (block_dims.y * block_dims.x) < 32);
1249 } 1245 }
1250 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); 1246 const uvec4 C0 = ReplicateByteTo16(endpoints[0][local_partition]);
1251 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); 1247 const uvec4 C1 = ReplicateByteTo16(endpoints[1][local_partition]);
1252 const uint weight_offset = (j * block_dims.x + i); 1248 const uint weight_offset = (j * block_dims.x + i);
1253 const uint array_index = weight_offset / 4; 1249 const uint array_index = weight_offset / 4;
1254 const uint vector_index = bfe(weight_offset, 0, 2); 1250 const uint vector_index = bfe(weight_offset, 0, 2);