diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/host_shaders/astc_decoder.comp | 76 |
1 files changed, 40 insertions, 36 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 077bec576..5346cba0c 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp | |||
| @@ -94,8 +94,6 @@ uint result_index = 0; | |||
| 94 | uint result_vector_max_index; | 94 | uint result_vector_max_index; |
| 95 | bool result_limit_reached = false; | 95 | bool result_limit_reached = false; |
| 96 | 96 | ||
| 97 | uvec4 endpoints[2][4]; | ||
| 98 | |||
| 99 | // EncodingData helpers | 97 | // EncodingData helpers |
| 100 | uint Encoding(EncodingData val) { | 98 | uint Encoding(EncodingData val) { |
| 101 | return bitfieldExtract(val.data, 0, 8); | 99 | return bitfieldExtract(val.data, 0, 8); |
| @@ -675,7 +673,7 @@ ivec4 BlueContract(int a, int r, int g, int b) { | |||
| 675 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); | 673 | return ivec4(a, (r + b) >> 1, (g + b) >> 1, b); |
| 676 | } | 674 | } |
| 677 | 675 | ||
| 678 | void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, | 676 | void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, |
| 679 | inout uint colvals_index) { | 677 | inout uint colvals_index) { |
| 680 | #define READ_UINT_VALUES(N) \ | 678 | #define READ_UINT_VALUES(N) \ |
| 681 | uint v[N]; \ | 679 | uint v[N]; \ |
| @@ -694,22 +692,22 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, | |||
| 694 | switch (color_endpoint_mode) { | 692 | switch (color_endpoint_mode) { |
| 695 | case 0: { | 693 | case 0: { |
| 696 | READ_UINT_VALUES(2) | 694 | READ_UINT_VALUES(2) |
| 697 | endpoints[0][ep_index] = uvec4(0xFF, v[0], v[0], v[0]); | 695 | ep1 = uvec4(0xFF, v[0], v[0], v[0]); |
| 698 | endpoints[1][ep_index] = uvec4(0xFF, v[1], v[1], v[1]); | 696 | ep2 = uvec4(0xFF, v[1], v[1], v[1]); |
| 699 | break; | 697 | break; |
| 700 | } | 698 | } |
| 701 | case 1: { | 699 | case 1: { |
| 702 | READ_UINT_VALUES(2) | 700 | READ_UINT_VALUES(2) |
| 703 | const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); | 701 | const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 704 | const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); | 702 | const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); |
| 705 | endpoints[0][ep_index] = uvec4(0xFF, L0, L0, L0); | 703 | ep1 = uvec4(0xFF, L0, L0, L0); |
| 706 | endpoints[1][ep_index] = uvec4(0xFF, L1, L1, L1); | 704 | ep2 = uvec4(0xFF, L1, L1, L1); |
| 707 | break; | 705 | break; |
| 708 | } | 706 | } |
| 709 | case 4: { | 707 | case 4: { |
| 710 | READ_UINT_VALUES(4) | 708 | READ_UINT_VALUES(4) |
| 711 | endpoints[0][ep_index] = uvec4(v[2], v[0], v[0], v[0]); | 709 | ep1 = uvec4(v[2], v[0], v[0], v[0]); |
| 712 | endpoints[1][ep_index] = uvec4(v[3], v[1], v[1], v[1]); | 710 | ep2 = uvec4(v[3], v[1], v[1], v[1]); |
| 713 | break; | 711 | break; |
| 714 | } | 712 | } |
| 715 | case 5: { | 713 | case 5: { |
| @@ -720,24 +718,24 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, | |||
| 720 | transferred = BitTransferSigned(v[3], v[2]); | 718 | transferred = BitTransferSigned(v[3], v[2]); |
| 721 | v[3] = transferred.x; | 719 | v[3] = transferred.x; |
| 722 | v[2] = transferred.y; | 720 | v[2] = transferred.y; |
| 723 | endpoints[0][ep_index] = ClampByte(ivec4(v[2], v[0], v[0], v[0])); | 721 | ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); |
| 724 | endpoints[1][ep_index] = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); | 722 | ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); |
| 725 | break; | 723 | break; |
| 726 | } | 724 | } |
| 727 | case 6: { | 725 | case 6: { |
| 728 | READ_UINT_VALUES(4) | 726 | READ_UINT_VALUES(4) |
| 729 | endpoints[0][ep_index] = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); | 727 | ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |
| 730 | endpoints[1][ep_index] = uvec4(0xFF, v[0], v[1], v[2]); | 728 | ep2 = uvec4(0xFF, v[0], v[1], v[2]); |
| 731 | break; | 729 | break; |
| 732 | } | 730 | } |
| 733 | case 8: { | 731 | case 8: { |
| 734 | READ_UINT_VALUES(6) | 732 | READ_UINT_VALUES(6) |
| 735 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { | 733 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |
| 736 | endpoints[0][ep_index] = uvec4(0xFF, v[0], v[2], v[4]); | 734 | ep1 = uvec4(0xFF, v[0], v[2], v[4]); |
| 737 | endpoints[1][ep_index] = uvec4(0xFF, v[1], v[3], v[5]); | 735 | ep2 = uvec4(0xFF, v[1], v[3], v[5]); |
| 738 | } else { | 736 | } else { |
| 739 | endpoints[0][ep_index] = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); | 737 | ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); |
| 740 | endpoints[1][ep_index] = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); | 738 | ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); |
| 741 | } | 739 | } |
| 742 | break; | 740 | break; |
| 743 | } | 741 | } |
| @@ -753,28 +751,28 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, | |||
| 753 | v[5] = transferred.x; | 751 | v[5] = transferred.x; |
| 754 | v[4] = transferred.y; | 752 | v[4] = transferred.y; |
| 755 | if ((v[1] + v[3] + v[5]) >= 0) { | 753 | if ((v[1] + v[3] + v[5]) >= 0) { |
| 756 | endpoints[0][ep_index] = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); | 754 | ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); |
| 757 | endpoints[1][ep_index] = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 755 | ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 758 | } else { | 756 | } else { |
| 759 | endpoints[0][ep_index] = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 757 | ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 760 | endpoints[1][ep_index] = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); | 758 | ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); |
| 761 | } | 759 | } |
| 762 | break; | 760 | break; |
| 763 | } | 761 | } |
| 764 | case 10: { | 762 | case 10: { |
| 765 | READ_UINT_VALUES(6) | 763 | READ_UINT_VALUES(6) |
| 766 | endpoints[0][ep_index] = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); | 764 | ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); |
| 767 | endpoints[1][ep_index] = uvec4(v[5], v[0], v[1], v[2]); | 765 | ep2 = uvec4(v[5], v[0], v[1], v[2]); |
| 768 | break; | 766 | break; |
| 769 | } | 767 | } |
| 770 | case 12: { | 768 | case 12: { |
| 771 | READ_UINT_VALUES(8) | 769 | READ_UINT_VALUES(8) |
| 772 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { | 770 | if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { |
| 773 | endpoints[0][ep_index] = uvec4(v[6], v[0], v[2], v[4]); | 771 | ep1 = uvec4(v[6], v[0], v[2], v[4]); |
| 774 | endpoints[1][ep_index] = uvec4(v[7], v[1], v[3], v[5]); | 772 | ep2 = uvec4(v[7], v[1], v[3], v[5]); |
| 775 | } else { | 773 | } else { |
| 776 | endpoints[0][ep_index] = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); | 774 | ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); |
| 777 | endpoints[1][ep_index] = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); | 775 | ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); |
| 778 | } | 776 | } |
| 779 | break; | 777 | break; |
| 780 | } | 778 | } |
| @@ -796,18 +794,18 @@ void ComputeEndpoints(uint ep_index, uint color_endpoint_mode, | |||
| 796 | v[6] = transferred.y; | 794 | v[6] = transferred.y; |
| 797 | 795 | ||
| 798 | if ((v[1] + v[3] + v[5]) >= 0) { | 796 | if ((v[1] + v[3] + v[5]) >= 0) { |
| 799 | endpoints[0][ep_index] = ClampByte(ivec4(v[6], v[0], v[2], v[4])); | 797 | ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); |
| 800 | endpoints[1][ep_index] = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 798 | ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 801 | } else { | 799 | } else { |
| 802 | endpoints[0][ep_index] = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); | 800 | ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); |
| 803 | endpoints[1][ep_index] = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); | 801 | ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); |
| 804 | } | 802 | } |
| 805 | break; | 803 | break; |
| 806 | } | 804 | } |
| 807 | default: { | 805 | default: { |
| 808 | // HDR mode, or more likely a bug computing the color_endpoint_mode | 806 | // HDR mode, or more likely a bug computing the color_endpoint_mode |
| 809 | endpoints[0][ep_index] = uvec4(0xFF, 0xFF, 0, 0); | 807 | ep1 = uvec4(0xFF, 0xFF, 0, 0); |
| 810 | endpoints[1][ep_index] = uvec4(0xFF, 0xFF, 0, 0); | 808 | ep2 = uvec4(0xFF, 0xFF, 0, 0); |
| 811 | break; | 809 | break; |
| 812 | } | 810 | } |
| 813 | } | 811 | } |
| @@ -1200,6 +1198,10 @@ void DecompressBlock(ivec3 coord) { | |||
| 1200 | color_endpoint_mode[i] = cem; | 1198 | color_endpoint_mode[i] = cem; |
| 1201 | } | 1199 | } |
| 1202 | } | 1200 | } |
| 1201 | |||
| 1202 | uvec4 endpoints0[4]; | ||
| 1203 | uvec4 endpoints1[4]; | ||
| 1204 | { | ||
| 1203 | // This decode phase should at most push 32 elements into the vector | 1205 | // This decode phase should at most push 32 elements into the vector |
| 1204 | result_vector_max_index = 32; | 1206 | result_vector_max_index = 32; |
| 1205 | 1207 | ||
| @@ -1207,8 +1209,10 @@ void DecompressBlock(ivec3 coord) { | |||
| 1207 | uint colvals_index = 0; | 1209 | uint colvals_index = 0; |
| 1208 | DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); | 1210 | DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits); |
| 1209 | for (uint i = 0; i < num_partitions; i++) { | 1211 | for (uint i = 0; i < num_partitions; i++) { |
| 1210 | ComputeEndpoints(i, color_endpoint_mode[i], colvals_index); | 1212 | ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], |
| 1213 | colvals_index); | ||
| 1211 | } | 1214 | } |
| 1215 | } | ||
| 1212 | color_endpoint_data = local_buff; | 1216 | color_endpoint_data = local_buff; |
| 1213 | color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; | 1217 | color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; |
| 1214 | const uint clear_byte_start = (weight_bits >> 3) + 1; | 1218 | const uint clear_byte_start = (weight_bits >> 3) + 1; |
| @@ -1243,8 +1247,8 @@ void DecompressBlock(ivec3 coord) { | |||
| 1243 | local_partition = Select2DPartition(partition_index, i, j, num_partitions, | 1247 | local_partition = Select2DPartition(partition_index, i, j, num_partitions, |
| 1244 | (block_dims.y * block_dims.x) < 32); | 1248 | (block_dims.y * block_dims.x) < 32); |
| 1245 | } | 1249 | } |
| 1246 | const uvec4 C0 = ReplicateByteTo16(endpoints[0][local_partition]); | 1250 | const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); |
| 1247 | const uvec4 C1 = ReplicateByteTo16(endpoints[1][local_partition]); | 1251 | const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); |
| 1248 | const uint weight_offset = (j * block_dims.x + i); | 1252 | const uint weight_offset = (j * block_dims.x + i); |
| 1249 | const uint array_index = weight_offset / 4; | 1253 | const uint array_index = weight_offset / 4; |
| 1250 | const uint vector_index = bfe(weight_offset, 0, 2); | 1254 | const uint vector_index = bfe(weight_offset, 0, 2); |