summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Ameer J2023-08-01 20:53:25 -0400
committerGravatar Ameer J2023-08-06 14:54:57 -0400
commit553dd3e1202638fa8bad9fed56110ee447208ecf (patch)
treed6e40442bc051edd92b62a3440593beedcc739a2 /src
parentglobal endpoints (diff)
downloadyuzu-553dd3e1202638fa8bad9fed56110ee447208ecf.tar.gz
yuzu-553dd3e1202638fa8bad9fed56110ee447208ecf.tar.xz
yuzu-553dd3e1202638fa8bad9fed56110ee447208ecf.zip
Revert "global endpoints"
This reverts commit d8f5bfd1df2b7469ef6abcee182aa110602d1751.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp76
1 files changed, 40 insertions, 36 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index 077bec576..5346cba0c 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -94,8 +94,6 @@ uint result_index = 0;
94uint result_vector_max_index; 94uint result_vector_max_index;
95bool result_limit_reached = false; 95bool result_limit_reached = false;
96 96
97uvec4 endpoints[2][4];
98
99// EncodingData helpers 97// EncodingData helpers
100uint Encoding(EncodingData val) { 98uint Encoding(EncodingData val) {
101 return bitfieldExtract(val.data, 0, 8); 99 return bitfieldExtract(val.data, 0, 8);
@@ -675,7 +673,7 @@ ivec4 BlueContract(int a, int r, int g, int b) {
675 return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); 673 return ivec4(a, (r + b) >> 1, (g + b) >> 1, b);
676} 674}
677 675
678void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, 676void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
679 inout uint colvals_index) { 677 inout uint colvals_index) {
680#define READ_UINT_VALUES(N) \ 678#define READ_UINT_VALUES(N) \
681 uint v[N]; \ 679 uint v[N]; \
@@ -694,22 +692,22 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
694 switch (color_endpoint_mode) { 692 switch (color_endpoint_mode) {
695 case 0: { 693 case 0: {
696 READ_UINT_VALUES(2) 694 READ_UINT_VALUES(2)
697 endpoints[0][ep_index] = uvec4(0xFF, v[0], v[0], v[0]); 695 ep1 = uvec4(0xFF, v[0], v[0], v[0]);
698 endpoints[1][ep_index] = uvec4(0xFF, v[1], v[1], v[1]); 696 ep2 = uvec4(0xFF, v[1], v[1], v[1]);
699 break; 697 break;
700 } 698 }
701 case 1: { 699 case 1: {
702 READ_UINT_VALUES(2) 700 READ_UINT_VALUES(2)
703 const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); 701 const uint L0 = (v[0] >> 2) | (v[1] & 0xC0);
704 const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); 702 const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU);
705 endpoints[0][ep_index] = uvec4(0xFF, L0, L0, L0); 703 ep1 = uvec4(0xFF, L0, L0, L0);
706 endpoints[1][ep_index] = uvec4(0xFF, L1, L1, L1); 704 ep2 = uvec4(0xFF, L1, L1, L1);
707 break; 705 break;
708 } 706 }
709 case 4: { 707 case 4: {
710 READ_UINT_VALUES(4) 708 READ_UINT_VALUES(4)
711 endpoints[0][ep_index] = uvec4(v[2], v[0], v[0], v[0]); 709 ep1 = uvec4(v[2], v[0], v[0], v[0]);
712 endpoints[1][ep_index] = uvec4(v[3], v[1], v[1], v[1]); 710 ep2 = uvec4(v[3], v[1], v[1], v[1]);
713 break; 711 break;
714 } 712 }
715 case 5: { 713 case 5: {
@@ -720,24 +718,24 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
720 transferred = BitTransferSigned(v[3], v[2]); 718 transferred = BitTransferSigned(v[3], v[2]);
721 v[3] = transferred.x; 719 v[3] = transferred.x;
722 v[2] = transferred.y; 720 v[2] = transferred.y;
723 endpoints[0][ep_index] = ClampByte(ivec4(v[2], v[0], v[0], v[0])); 721 ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0]));
724 endpoints[1][ep_index] = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); 722 ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]));
725 break; 723 break;
726 } 724 }
727 case 6: { 725 case 6: {
728 READ_UINT_VALUES(4) 726 READ_UINT_VALUES(4)
729 endpoints[0][ep_index] = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); 727 ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
730 endpoints[1][ep_index] = uvec4(0xFF, v[0], v[1], v[2]); 728 ep2 = uvec4(0xFF, v[0], v[1], v[2]);
731 break; 729 break;
732 } 730 }
733 case 8: { 731 case 8: {
734 READ_UINT_VALUES(6) 732 READ_UINT_VALUES(6)
735 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { 733 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) {
736 endpoints[0][ep_index] = uvec4(0xFF, v[0], v[2], v[4]); 734 ep1 = uvec4(0xFF, v[0], v[2], v[4]);
737 endpoints[1][ep_index] = uvec4(0xFF, v[1], v[3], v[5]); 735 ep2 = uvec4(0xFF, v[1], v[3], v[5]);
738 } else { 736 } else {
739 endpoints[0][ep_index] = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); 737 ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5])));
740 endpoints[1][ep_index] = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); 738 ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4])));
741 } 739 }
742 break; 740 break;
743 } 741 }
@@ -753,28 +751,28 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
753 v[5] = transferred.x; 751 v[5] = transferred.x;
754 v[4] = transferred.y; 752 v[4] = transferred.y;
755 if ((v[1] + v[3] + v[5]) >= 0) { 753 if ((v[1] + v[3] + v[5]) >= 0) {
756 endpoints[0][ep_index] = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); 754 ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4]));
757 endpoints[1][ep_index] = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); 755 ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]));
758 } else { 756 } else {
759 endpoints[0][ep_index] = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); 757 ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]));
760 endpoints[1][ep_index] = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); 758 ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4]));
761 } 759 }
762 break; 760 break;
763 } 761 }
764 case 10: { 762 case 10: {
765 READ_UINT_VALUES(6) 763 READ_UINT_VALUES(6)
766 endpoints[0][ep_index] = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); 764 ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8);
767 endpoints[1][ep_index] = uvec4(v[5], v[0], v[1], v[2]); 765 ep2 = uvec4(v[5], v[0], v[1], v[2]);
768 break; 766 break;
769 } 767 }
770 case 12: { 768 case 12: {
771 READ_UINT_VALUES(8) 769 READ_UINT_VALUES(8)
772 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { 770 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) {
773 endpoints[0][ep_index] = uvec4(v[6], v[0], v[2], v[4]); 771 ep1 = uvec4(v[6], v[0], v[2], v[4]);
774 endpoints[1][ep_index] = uvec4(v[7], v[1], v[3], v[5]); 772 ep2 = uvec4(v[7], v[1], v[3], v[5]);
775 } else { 773 } else {
776 endpoints[0][ep_index] = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); 774 ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5])));
777 endpoints[1][ep_index] = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); 775 ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4])));
778 } 776 }
779 break; 777 break;
780 } 778 }
@@ -796,18 +794,18 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode,
796 v[6] = transferred.y; 794 v[6] = transferred.y;
797 795
798 if ((v[1] + v[3] + v[5]) >= 0) { 796 if ((v[1] + v[3] + v[5]) >= 0) {
799 endpoints[0][ep_index] = ClampByte(ivec4(v[6], v[0], v[2], v[4])); 797 ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4]));
800 endpoints[1][ep_index] = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); 798 ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]));
801 } else { 799 } else {
802 endpoints[0][ep_index] = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); 800 ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]));
803 endpoints[1][ep_index] = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); 801 ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4]));
804 } 802 }
805 break; 803 break;
806 } 804 }
807 default: { 805 default: {
808 // HDR mode, or more likely a bug computing the color_endpoint_mode 806 // HDR mode, or more likely a bug computing the color_endpoint_mode
809 endpoints[0][ep_index] = uvec4(0xFF, 0xFF, 0, 0); 807 ep1 = uvec4(0xFF, 0xFF, 0, 0);
810 endpoints[1][ep_index] = uvec4(0xFF, 0xFF, 0, 0); 808 ep2 = uvec4(0xFF, 0xFF, 0, 0);
811 break; 809 break;
812 } 810 }
813 } 811 }
@@ -1200,6 +1198,10 @@ void DecompressBlock(ivec3 coord) {
1200 color_endpoint_mode[i] = cem; 1198 color_endpoint_mode[i] = cem;
1201 } 1199 }
1202 } 1200 }
1201
1202 uvec4 endpoints0[4];
1203 uvec4 endpoints1[4];
1204 {
1203 // This decode phase should at most push 32 elements into the vector 1205 // This decode phase should at most push 32 elements into the vector
1204 result_vector_max_index = 32; 1206 result_vector_max_index = 32;
1205 1207
@@ -1207,8 +1209,10 @@ void DecompressBlock(ivec3 coord) {
1207 uint colvals_index = 0; 1209 uint colvals_index = 0;
1208 DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); 1210 DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits);
1209 for (uint i = 0; i < num_partitions; i++) { 1211 for (uint i = 0; i < num_partitions; i++) {
1210 ComputeEndpoints(i, color_endpoint_mode[i], colvals_index); 1212 ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i],
1213 colvals_index);
1211 } 1214 }
1215 }
1212 color_endpoint_data = local_buff; 1216 color_endpoint_data = local_buff;
1213 color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; 1217 color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
1214 const uint clear_byte_start = (weight_bits >> 3) + 1; 1218 const uint clear_byte_start = (weight_bits >> 3) + 1;
@@ -1243,8 +1247,8 @@ void DecompressBlock(ivec3 coord) {
1243 local_partition = Select2DPartition(partition_index, i, j, num_partitions, 1247 local_partition = Select2DPartition(partition_index, i, j, num_partitions,
1244 (block_dims.y * block_dims.x) < 32); 1248 (block_dims.y * block_dims.x) < 32);
1245 } 1249 }
1246 const uvec4 C0 = ReplicateByteTo16(endpoints[0][local_partition]); 1250 const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
1247 const uvec4 C1 = ReplicateByteTo16(endpoints[1][local_partition]); 1251 const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
1248 const uint weight_offset = (j * block_dims.x + i); 1252 const uint weight_offset = (j * block_dims.x + i);
1249 const uint array_index = weight_offset / 4; 1253 const uint array_index = weight_offset / 4;
1250 const uint vector_index = bfe(weight_offset, 0, 2); 1254 const uint vector_index = bfe(weight_offset, 0, 2);