diff options
| author | 2023-08-01 20:19:45 -0400 | |
|---|---|---|
| committer | 2023-08-06 14:54:57 -0400 | |
| commit | c077e467c4fb10ef92c1369e9b9116ed653af1de (patch) | |
| tree | a3fdf014a9204fce9fc6b5c267f6d05909df439c /src | |
| parent | bfe instead of mod (diff) | |
| download | yuzu-c077e467c4fb10ef92c1369e9b9116ed653af1de.tar.gz yuzu-c077e467c4fb10ef92c1369e9b9116ed653af1de.tar.xz yuzu-c077e467c4fb10ef92c1369e9b9116ed653af1de.zip | |
global endpoints
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 76 |
1 files changed, 36 insertions, 40 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 5346cba0c..077bec576 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -94,6 +94,8 @@ uint result_index = 0; | |||
| 94 | uint result_vector_max_index; | 94 | uint result_vector_max_index; |
| 95 | bool result_limit_reached = false; | 95 | bool result_limit_reached = false; |
| 96 | 96 | ||
| 97 | uvec4 endpoints[2][4]; | ||
| 98 | |||
| 97 | // EncodingData helpers | 99 | // EncodingData helpers |
| 98 | uint Encoding(EncodingData val) { | 100 | uint Encoding(EncodingData val) { |
| 99 | return bitfieldExtract(val.data, 0, 8); | 101 | return bitfieldExtract(val.data, 0, 8); |
| @@ -673,7 +675,7 @@ ivec4 BlueContract(int a, int r, int g, int b) { | |||
| 673 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); | 675 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); |
| 674 | } | 676 | } |
| 675 | 677 | ||
| 676 | void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | 678 | void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, |
| 677 | inout uint colvals_index) { | 679 | inout uint colvals_index) { |
| 678 | #define READ_UINT_VALUES(N) \ | 680 | #define READ_UINT_VALUES(N) \ |
| 679 | uint v[N]; \ | 681 | uint v[N]; \ |
| @@ -692,22 +694,22 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||
| 692 | switch (color_endpoint_mode) { | 694 | switch (color_endpoint_mode) { |
| 693 | case 0: { | 695 | case 0: { |
| 694 | READ_UINT_VALUES(2) | 696 | READ_UINT_VALUES(2) |
| 695 | ep1 = uvec4(0xFF, v[0], v[0], v[0]); | 697 | endpoints[0][ep_index] = uvec4(0xFF, v[0], v[0], v[0]); |
| 696 | ep2 = uvec4(0xFF, v[1], v[1], v[1]); | 698 | endpoints[1][ep_index] = uvec4(0xFF, v[1], v[1], v[1]); |
| 697 | break; | 699 | break; |
| 698 | } | 700 | } |
| 699 | case 1: { | 701 | case 1: { |
| 700 | READ_UINT_VALUES(2) | 702 | READ_UINT_VALUES(2) |
| 701 | const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); | 703 | const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 702 | const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); | 704 | const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); |
| 703 | ep1 = uvec4(0xFF, L0, L0, L0); | 705 | endpoints[0][ep_index] = uvec4(0xFF, L0, L0, L0); |
| 704 | ep2 = uvec4(0xFF, L1, L1, L1); | 706 | endpoints[1][ep_index] = uvec4(0xFF, L1, L1, L1); |
| 705 | break; | 707 | break; |
| 706 | } | 708 | } |
| 707 | case 4: { | 709 | case 4: { |
| 708 | READ_UINT_VALUES(4) | 710 | READ_UINT_VALUES(4) |
| 709 | ep1 = uvec4(v[2], v[0], v[0], v[0]); | 711 | endpoints[0][ep_index] = uvec4(v[2], v[0], v[0], v[0]); |
| 710 | ep2 = uvec4(v[3], v[1], v[1], v[1]); | 712 | endpoints[1][ep_index] = uvec4(v[3], v[1], v[1], v[1]); |
| 711 | break; | 713 | break; |
| 712 | } | 714 | } |
| 713 | case 5: { | 715 | case 5: { |
| @@ -718,24 +720,24 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||
| 718 | transferred = BitTransferSigned(v[3], v[2]); | 720 | transferred = BitTransferSigned(v[3], v[2]); |
| 719 | v[3] = transferred.x; | 721 | v[3] = transferred.x; |
| 720 | v[2] = transferred.y; | 722 | v[2] = transferred.y; |
| 721 | ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); | 723 | endpoints[0][ep_index] = ClampByte(ivec4(v[2], v[0], v[0], v[0])); |
| 722 | ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); | 724 | endpoints[1][ep_index] = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); |
| 723 | break; | 725 | break; |
| 724 | } | 726 | } |
| 725 | case 6: { | 727 | case 6: { |
| 726 | READ_UINT_VALUES(4) | 728 | READ_UINT_VALUES(4) |
| 727 | ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); | 729 | endpoints[0][ep_index] = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |
| 728 | ep2 = uvec4(0xFF, v[0], v[1], v[2]); | 730 | endpoints[1][ep_index] = uvec4(0xFF, v[0], v[1], v[2]); |
| 729 | break; | 731 | break; |
| 730 | } | 732 | } |
| 731 | case 8: { | 733 | case 8: { |
| 732 | READ_UINT_VALUES(6) | 734 | READ_UINT_VALUES(6) |
| 733 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { | 735 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |
| 734 | ep1 = uvec4(0xFF, v[0], v[2], v[4]); | 736 | endpoints[0][ep_index] = uvec4(0xFF, v[0], v[2], v[4]); |
| 735 | ep2 = uvec4(0xFF, v[1], v[3], v[5]); | 737 | endpoints[1][ep_index] = uvec4(0xFF, v[1], v[3], v[5]); |
| 736 | } else { | 738 | } else { |
| 737 | ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); | 739 | endpoints[0][ep_index] = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); |
| 738 | ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); | 740 | endpoints[1][ep_index] = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); |
| 739 | } | 741 | } |
| 740 | break; | 742 | break; |
| 741 | } | 743 | } |
| @@ -751,28 +753,28 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||
| 751 | v[5] = transferred.x; | 753 | v[5] = transferred.x; |
| 752 | v[4] = transferred.y; | 754 | v[4] = transferred.y; |
| 753 | if ((v[1] + v[3] + v[5]) >= 0) { | 755 | if ((v[1] + v[3] + v[5]) >= 0) { |
| 754 | ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); | 756 | endpoints[0][ep_index] = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); |
| 755 | ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 757 | endpoints[1][ep_index] = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 756 | } else { | 758 | } else { |
| 757 | ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 759 | endpoints[0][ep_index] = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 758 | ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); | 760 | endpoints[1][ep_index] = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); |
| 759 | } | 761 | } |
| 760 | break; | 762 | break; |
| 761 | } | 763 | } |
| 762 | case 10: { | 764 | case 10: { |
| 763 | READ_UINT_VALUES(6) | 765 | READ_UINT_VALUES(6) |
| 764 | ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); | 766 | endpoints[0][ep_index] = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |
| 765 | ep2 = uvec4(v[5], v[0], v[1], v[2]); | 767 | endpoints[1][ep_index] = uvec4(v[5], v[0], v[1], v[2]); |
| 766 | break; | 768 | break; |
| 767 | } | 769 | } |
| 768 | case 12: { | 770 | case 12: { |
| 769 | READ_UINT_VALUES(8) | 771 | READ_UINT_VALUES(8) |
| 770 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { | 772 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |
| 771 | ep1 = uvec4(v[6], v[0], v[2], v[4]); | 773 | endpoints[0][ep_index] = uvec4(v[6], v[0], v[2], v[4]); |
| 772 | ep2 = uvec4(v[7], v[1], v[3], v[5]); | 774 | endpoints[1][ep_index] = uvec4(v[7], v[1], v[3], v[5]); |
| 773 | } else { | 775 | } else { |
| 774 | ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); | 776 | endpoints[0][ep_index] = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); |
| 775 | ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); | 777 | endpoints[1][ep_index] = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); |
| 776 | } | 778 | } |
| 777 | break; | 779 | break; |
| 778 | } | 780 | } |
| @@ -794,18 +796,18 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, | |||
| 794 | v[6] = transferred.y; | 796 | v[6] = transferred.y; |
| 795 | 797 | ||
| 796 | if ((v[1] + v[3] + v[5]) >= 0) { | 798 | if ((v[1] + v[3] + v[5]) >= 0) { |
| 797 | ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); | 799 | endpoints[0][ep_index] = ClampByte(ivec4(v[6], v[0], v[2], v[4])); |
| 798 | ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 800 | endpoints[1][ep_index] = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 799 | } else { | 801 | } else { |
| 800 | ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 802 | endpoints[0][ep_index] = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 801 | ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); | 803 | endpoints[1][ep_index] = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); |
| 802 | } | 804 | } |
| 803 | break; | 805 | break; |
| 804 | } | 806 | } |
| 805 | default: { | 807 | default: { |
| 806 | // HDR mode, or more likely a bug computing the color_endpoint_mode | 808 | // HDR mode, or more likely a bug computing the color_endpoint_mode |
| 807 | ep1 = uvec4(0xFF, 0xFF, 0, 0); | 809 | endpoints[0][ep_index] = uvec4(0xFF, 0xFF, 0, 0); |
| 808 | ep2 = uvec4(0xFF, 0xFF, 0, 0); | 810 | endpoints[1][ep_index] = uvec4(0xFF, 0xFF, 0, 0); |
| 809 | break; | 811 | break; |
| 810 | } | 812 | } |
| 811 | } | 813 | } |
| @@ -1198,10 +1200,6 @@ void DecompressBlock(ivec3 coord) { | |||
| 1198 | color_endpoint_mode[i] = cem; | 1200 | color_endpoint_mode[i] = cem; |
| 1199 | } | 1201 | } |
| 1200 | } | 1202 | } |
| 1201 | |||
| 1202 | uvec4 endpoints0[4]; | ||
| 1203 | uvec4 endpoints1[4]; | ||
| 1204 | { | ||
| 1205 | // This decode phase should at most push 32 elements into the vector | 1203 | // This decode phase should at most push 32 elements into the vector |
| 1206 | result_vector_max_index = 32; | 1204 | result_vector_max_index = 32; |
| 1207 | 1205 | ||
| @@ -1209,10 +1207,8 @@ void DecompressBlock(ivec3 coord) { | |||
| 1209 | uint colvals_index = 0; | 1207 | uint colvals_index = 0; |
| 1210 | DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); | 1208 | DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); |
| 1211 | for (uint i = 0; i < num_partitions; i++) { | 1209 | for (uint i = 0; i < num_partitions; i++) { |
| 1212 | ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], | 1210 | ComputeEndpoints(i, color_endpoint_mode[i], colvals_index); |
| 1213 | colvals_index); | ||
| 1214 | } | 1211 | } |
| 1215 | } | ||
| 1216 | color_endpoint_data = local_buff; | 1212 | color_endpoint_data = local_buff; |
| 1217 | color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; | 1213 | color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; |
| 1218 | const uint clear_byte_start = (weight_bits >> 3) + 1; | 1214 | const uint clear_byte_start = (weight_bits >> 3) + 1; |
| @@ -1247,8 +1243,8 @@ void DecompressBlock(ivec3 coord) { | |||
| 1247 | local_partition = Select2DPartition(partition_index, i, j, num_partitions, | 1243 | local_partition = Select2DPartition(partition_index, i, j, num_partitions, |
| 1248 | (block_dims.y * block_dims.x) < 32); | 1244 | (block_dims.y * block_dims.x) < 32); |
| 1249 | } | 1245 | } |
| 1250 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); | 1246 | const uvec4 C0 = ReplicateByteTo16(endpoints[0][local_partition]); |
| 1251 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); | 1247 | const uvec4 C1 = ReplicateByteTo16(endpoints[1][local_partition]); |
| 1252 | const uint weight_offset = (j * block_dims.x + i); | 1248 | const uint weight_offset = (j * block_dims.x + i); |
| 1253 | const uint array_index = weight_offset / 4; | 1249 | const uint array_index = weight_offset / 4; |
| 1254 | const uint vector_index = bfe(weight_offset, 0, 2); | 1250 | const uint vector_index = bfe(weight_offset, 0, 2); |