summaryrefslogtreecommitdiff
path: root/src/video_core/host_shaders
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/host_shaders')
-rw-r--r--src/video_core/host_shaders/astc_decoder.comp218
1 files changed, 109 insertions, 109 deletions
diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp
index ca93dc2a2..0e611ede5 100644
--- a/src/video_core/host_shaders/astc_decoder.comp
+++ b/src/video_core/host_shaders/astc_decoder.comp
@@ -88,7 +88,7 @@ int color_bitsread = 0;
88#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4) 88#define VECTOR_ARRAY_SIZE DIVCEIL(ARRAY_NUM_ELEMENTS * 2, 4)
89uvec4 result_vector[VECTOR_ARRAY_SIZE]; 89uvec4 result_vector[VECTOR_ARRAY_SIZE];
90 90
91int result_index = 0; 91uint result_index = 0;
92uint result_vector_max_index; 92uint result_vector_max_index;
93bool result_limit_reached = false; 93bool result_limit_reached = false;
94 94
@@ -131,8 +131,8 @@ void ResultEmplaceBack(EncodingData val) {
131 result_limit_reached = true; 131 result_limit_reached = true;
132 return; 132 return;
133 } 133 }
134 const uint array_index = result_index / 4; 134 const uint array_index = result_index / 4u;
135 const uint vector_index = result_index % 4; 135 const uint vector_index = result_index % 4u;
136 result_vector[array_index][vector_index] = val.data; 136 result_vector[array_index][vector_index] = val.data;
137 ++result_index; 137 ++result_index;
138} 138}
@@ -428,69 +428,68 @@ uint BitsOp(uint bits, uint start, uint end) {
428 428
429void DecodeQuintBlock(uint num_bits) { 429void DecodeQuintBlock(uint num_bits) {
430 uvec3 m; 430 uvec3 m;
431 uvec3 q; 431 uvec4 qQ;
432 uint Q;
433 m[0] = StreamColorBits(num_bits); 432 m[0] = StreamColorBits(num_bits);
434 Q = StreamColorBits(3); 433 qQ.w = StreamColorBits(3);
435 m[1] = StreamColorBits(num_bits); 434 m[1] = StreamColorBits(num_bits);
436 Q |= StreamColorBits(2) << 3; 435 qQ.w |= StreamColorBits(2) << 3;
437 m[2] = StreamColorBits(num_bits); 436 m[2] = StreamColorBits(num_bits);
438 Q |= StreamColorBits(2) << 5; 437 qQ.w |= StreamColorBits(2) << 5;
439 if (BitsOp(Q, 1, 2) == 3 && BitsOp(Q, 5, 6) == 0) { 438 if (BitsOp(qQ.w, 1, 2) == 3 && BitsOp(qQ.w, 5, 6) == 0) {
440 q.x = 4; 439 qQ.x = 4;
441 q.y = 4; 440 qQ.y = 4;
442 q.z = (BitsBracket(Q, 0) << 2) | ((BitsBracket(Q, 4) & ~BitsBracket(Q, 0)) << 1) | 441 qQ.z = (BitsBracket(qQ.w, 0) << 2) | ((BitsBracket(qQ.w, 4) & ~BitsBracket(qQ.w, 0)) << 1) |
443 (BitsBracket(Q, 3) & ~BitsBracket(Q, 0)); 442 (BitsBracket(qQ.w, 3) & ~BitsBracket(qQ.w, 0));
444 } else { 443 } else {
445 uint C = 0; 444 uint C = 0;
446 if (BitsOp(Q, 1, 2) == 3) { 445 if (BitsOp(qQ.w, 1, 2) == 3) {
447 q.z = 4; 446 qQ.z = 4;
448 C = (BitsOp(Q, 3, 4) << 3) | ((~BitsOp(Q, 5, 6) & 3) << 1) | BitsBracket(Q, 0); 447 C = (BitsOp(qQ.w, 3, 4) << 3) | ((~BitsOp(qQ.w, 5, 6) & 3) << 1) | BitsBracket(qQ.w, 0);
449 } else { 448 } else {
450 q.z = BitsOp(Q, 5, 6); 449 qQ.z = BitsOp(qQ.w, 5, 6);
451 C = BitsOp(Q, 0, 4); 450 C = BitsOp(qQ.w, 0, 4);
452 } 451 }
453 if (BitsOp(C, 0, 2) == 5) { 452 if (BitsOp(C, 0, 2) == 5) {
454 q.y = 4; 453 qQ.y = 4;
455 q.x = BitsOp(C, 3, 4); 454 qQ.x = BitsOp(C, 3, 4);
456 } else { 455 } else {
457 q.y = BitsOp(C, 3, 4); 456 qQ.y = BitsOp(C, 3, 4);
458 q.x = BitsOp(C, 0, 2); 457 qQ.x = BitsOp(C, 0, 2);
459 } 458 }
460 } 459 }
461 for (uint i = 0; i < 3; i++) { 460 for (uint i = 0; i < 3; i++) {
462 const EncodingData val = CreateEncodingData(QUINT, num_bits, m[i], q[i]); 461 const EncodingData val = CreateEncodingData(QUINT, num_bits, m[i], qQ[i]);
463 ResultEmplaceBack(val); 462 ResultEmplaceBack(val);
464 } 463 }
465} 464}
466 465
467void DecodeTritBlock(uint num_bits) { 466void DecodeTritBlock(uint num_bits) {
468 uint m[5]; 467 uvec4 m;
469 uint t[5]; 468 uvec4 t;
470 uint T; 469 uvec3 Tm5t5;
471 m[0] = StreamColorBits(num_bits); 470 m[0] = StreamColorBits(num_bits);
472 T = StreamColorBits(2); 471 Tm5t5.x = StreamColorBits(2);
473 m[1] = StreamColorBits(num_bits); 472 m[1] = StreamColorBits(num_bits);
474 T |= StreamColorBits(2) << 2; 473 Tm5t5.x |= StreamColorBits(2) << 2;
475 m[2] = StreamColorBits(num_bits); 474 m[2] = StreamColorBits(num_bits);
476 T |= StreamColorBits(1) << 4; 475 Tm5t5.x |= StreamColorBits(1) << 4;
477 m[3] = StreamColorBits(num_bits); 476 m[3] = StreamColorBits(num_bits);
478 T |= StreamColorBits(2) << 5; 477 Tm5t5.x |= StreamColorBits(2) << 5;
479 m[4] = StreamColorBits(num_bits); 478 Tm5t5.y = StreamColorBits(num_bits);
480 T |= StreamColorBits(1) << 7; 479 Tm5t5.x |= StreamColorBits(1) << 7;
481 uint C = 0; 480 uint C = 0;
482 if (BitsOp(T, 2, 4) == 7) { 481 if (BitsOp(Tm5t5.x, 2, 4) == 7) {
483 C = (BitsOp(T, 5, 7) << 2) | BitsOp(T, 0, 1); 482 C = (BitsOp(Tm5t5.x, 5, 7) << 2) | BitsOp(Tm5t5.x, 0, 1);
484 t[4] = 2; 483 Tm5t5.z = 2;
485 t[3] = 2; 484 t[3] = 2;
486 } else { 485 } else {
487 C = BitsOp(T, 0, 4); 486 C = BitsOp(Tm5t5.x, 0, 4);
488 if (BitsOp(T, 5, 6) == 3) { 487 if (BitsOp(Tm5t5.x, 5, 6) == 3) {
489 t[4] = 2; 488 Tm5t5.z = 2;
490 t[3] = BitsBracket(T, 7); 489 t[3] = BitsBracket(Tm5t5.x, 7);
491 } else { 490 } else {
492 t[4] = BitsBracket(T, 7); 491 Tm5t5.z = BitsBracket(Tm5t5.x, 7);
493 t[3] = BitsOp(T, 5, 6); 492 t[3] = BitsOp(Tm5t5.x, 5, 6);
494 } 493 }
495 } 494 }
496 if (BitsOp(C, 0, 1) == 3) { 495 if (BitsOp(C, 0, 1) == 3) {
@@ -506,10 +505,12 @@ void DecodeTritBlock(uint num_bits) {
506 t[1] = BitsOp(C, 2, 3); 505 t[1] = BitsOp(C, 2, 3);
507 t[0] = (BitsBracket(C, 1) << 1) | (BitsBracket(C, 0) & ~BitsBracket(C, 1)); 506 t[0] = (BitsBracket(C, 1) << 1) | (BitsBracket(C, 0) & ~BitsBracket(C, 1));
508 } 507 }
509 for (uint i = 0; i < 5; i++) { 508 for (uint i = 0; i < 4; i++) {
510 const EncodingData val = CreateEncodingData(TRIT, num_bits, m[i], t[i]); 509 const EncodingData val = CreateEncodingData(TRIT, num_bits, m[i], t[i]);
511 ResultEmplaceBack(val); 510 ResultEmplaceBack(val);
512 } 511 }
512 const EncodingData val = CreateEncodingData(TRIT, num_bits, Tm5t5.y, Tm5t5.z);
513 ResultEmplaceBack(val);
513} 514}
514 515
515void DecodeIntegerSequence(uint max_range, uint num_values) { 516void DecodeIntegerSequence(uint max_range, uint num_values) {
@@ -674,129 +675,128 @@ ivec4 BlueContract(int a, int r, int g, int b) {
674void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, 675void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode,
675 inout uint colvals_index) { 676 inout uint colvals_index) {
676#define READ_UINT_VALUES(N) \ 677#define READ_UINT_VALUES(N) \
677 uint v[N]; \ 678 uvec4 V[2]; \
678 for (uint i = 0; i < N; i++) { \ 679 for (uint i = 0; i < N; i++) { \
679 v[i] = color_values[colvals_index / 4][colvals_index % 4]; \ 680 V[i / 4][i % 4] = color_values[colvals_index / 4][colvals_index % 4]; \
680 ++colvals_index; \ 681 ++colvals_index; \
681 } 682 }
682
683#define READ_INT_VALUES(N) \ 683#define READ_INT_VALUES(N) \
684 int v[N]; \ 684 ivec4 V[2]; \
685 for (uint i = 0; i < N; i++) { \ 685 for (uint i = 0; i < N; i++) { \
686 v[i] = int(color_values[colvals_index / 4][colvals_index % 4]); \ 686 V[i / 4][i % 4] = int(color_values[colvals_index / 4][colvals_index % 4]); \
687 ++colvals_index; \ 687 ++colvals_index; \
688 } 688 }
689 689
690 switch (color_endpoint_mode) { 690 switch (color_endpoint_mode) {
691 case 0: { 691 case 0: {
692 READ_UINT_VALUES(2) 692 READ_UINT_VALUES(2)
693 ep1 = uvec4(0xFF, v[0], v[0], v[0]); 693 ep1 = uvec4(0xFF, V[0].x, V[0].x, V[0].x);
694 ep2 = uvec4(0xFF, v[1], v[1], v[1]); 694 ep2 = uvec4(0xFF, V[0].y, V[0].y, V[0].y);
695 break; 695 break;
696 } 696 }
697 case 1: { 697 case 1: {
698 READ_UINT_VALUES(2) 698 READ_UINT_VALUES(2)
699 const uint L0 = (v[0] >> 2) | (v[1] & 0xC0); 699 const uint L0 = (V[0].x >> 2) | (V[0].y & 0xC0);
700 const uint L1 = min(L0 + (v[1] & 0x3F), 0xFFU); 700 const uint L1 = min(L0 + (V[0].y & 0x3F), 0xFFU);
701 ep1 = uvec4(0xFF, L0, L0, L0); 701 ep1 = uvec4(0xFF, L0, L0, L0);
702 ep2 = uvec4(0xFF, L1, L1, L1); 702 ep2 = uvec4(0xFF, L1, L1, L1);
703 break; 703 break;
704 } 704 }
705 case 4: { 705 case 4: {
706 READ_UINT_VALUES(4) 706 READ_UINT_VALUES(4)
707 ep1 = uvec4(v[2], v[0], v[0], v[0]); 707 ep1 = uvec4(V[0].z, V[0].x, V[0].x, V[0].x);
708 ep2 = uvec4(v[3], v[1], v[1], v[1]); 708 ep2 = uvec4(V[0].w, V[0].y, V[0].y, V[0].y);
709 break; 709 break;
710 } 710 }
711 case 5: { 711 case 5: {
712 READ_INT_VALUES(4) 712 READ_INT_VALUES(4)
713 ivec2 transferred = BitTransferSigned(v[1], v[0]); 713 ivec2 transferred = BitTransferSigned(V[0].y, V[0].x);
714 v[1] = transferred.x; 714 V[0].y = transferred.x;
715 v[0] = transferred.y; 715 V[0].x = transferred.y;
716 transferred = BitTransferSigned(v[3], v[2]); 716 transferred = BitTransferSigned(V[0].w, V[0].z);
717 v[3] = transferred.x; 717 V[0].w = transferred.x;
718 v[2] = transferred.y; 718 V[0].z = transferred.y;
719 ep1 = ClampByte(ivec4(v[2], v[0], v[0], v[0])); 719 ep1 = ClampByte(ivec4(V[0].z, V[0].x, V[0].x, V[0].x));
720 ep2 = ClampByte(ivec4(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1])); 720 ep2 = ClampByte(ivec4(V[0].z + V[0].w, V[0].x + V[0].y, V[0].x + V[0].y, V[0].x + V[0].y));
721 break; 721 break;
722 } 722 }
723 case 6: { 723 case 6: {
724 READ_UINT_VALUES(4) 724 READ_UINT_VALUES(4)
725 ep1 = uvec4(0xFF, (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); 725 ep1 = uvec4(0xFF, (V[0].x * V[0].w) >> 8, (V[0].y * V[0].w) >> 8, (V[0].z * V[0].w) >> 8);
726 ep2 = uvec4(0xFF, v[0], v[1], v[2]); 726 ep2 = uvec4(0xFF, V[0].x, V[0].y, V[0].z);
727 break; 727 break;
728 } 728 }
729 case 8: { 729 case 8: {
730 READ_UINT_VALUES(6) 730 READ_UINT_VALUES(6)
731 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { 731 if ((V[0].y + V[0].w + V[1].y) >= (V[0].x + V[0].z + V[1].x)) {
732 ep1 = uvec4(0xFF, v[0], v[2], v[4]); 732 ep1 = uvec4(0xFF, V[0].x, V[0].z, V[1].x);
733 ep2 = uvec4(0xFF, v[1], v[3], v[5]); 733 ep2 = uvec4(0xFF, V[0].y, V[0].w, V[1].y);
734 } else { 734 } else {
735 ep1 = uvec4(BlueContract(0xFF, int(v[1]), int(v[3]), int(v[5]))); 735 ep1 = uvec4(BlueContract(0xFF, int(V[0].y), int(V[0].w), int(V[1].y)));
736 ep2 = uvec4(BlueContract(0xFF, int(v[0]), int(v[2]), int(v[4]))); 736 ep2 = uvec4(BlueContract(0xFF, int(V[0].x), int(V[0].z), int(V[1].x)));
737 } 737 }
738 break; 738 break;
739 } 739 }
740 case 9: { 740 case 9: {
741 READ_INT_VALUES(6) 741 READ_INT_VALUES(6)
742 ivec2 transferred = BitTransferSigned(v[1], v[0]); 742 ivec2 transferred = BitTransferSigned(V[0].y, V[0].x);
743 v[1] = transferred.x; 743 V[0].y = transferred.x;
744 v[0] = transferred.y; 744 V[0].x = transferred.y;
745 transferred = BitTransferSigned(v[3], v[2]); 745 transferred = BitTransferSigned(V[0].w, V[0].z);
746 v[3] = transferred.x; 746 V[0].w = transferred.x;
747 v[2] = transferred.y; 747 V[0].z = transferred.y;
748 transferred = BitTransferSigned(v[5], v[4]); 748 transferred = BitTransferSigned(V[1].y, V[1].x);
749 v[5] = transferred.x; 749 V[1].y = transferred.x;
750 v[4] = transferred.y; 750 V[1].x = transferred.y;
751 if ((v[1] + v[3] + v[5]) >= 0) { 751 if ((V[0].y + V[0].w + V[1].y) >= 0) {
752 ep1 = ClampByte(ivec4(0xFF, v[0], v[2], v[4])); 752 ep1 = ClampByte(ivec4(0xFF, V[0].x, V[0].z, V[1].x));
753 ep2 = ClampByte(ivec4(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); 753 ep2 = ClampByte(ivec4(0xFF, V[0].x + V[0].y, V[0].z + V[0].w, V[1].x + V[1].y));
754 } else { 754 } else {
755 ep1 = ClampByte(BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5])); 755 ep1 = ClampByte(BlueContract(0xFF, V[0].x + V[0].y, V[0].z + V[0].w, V[1].x + V[1].y));
756 ep2 = ClampByte(BlueContract(0xFF, v[0], v[2], v[4])); 756 ep2 = ClampByte(BlueContract(0xFF, V[0].x, V[0].z, V[1].x));
757 } 757 }
758 break; 758 break;
759 } 759 }
760 case 10: { 760 case 10: {
761 READ_UINT_VALUES(6) 761 READ_UINT_VALUES(6)
762 ep1 = uvec4(v[4], (v[0] * v[3]) >> 8, (v[1] * v[3]) >> 8, (v[2] * v[3]) >> 8); 762 ep1 = uvec4(V[1].x, (V[0].x * V[0].w) >> 8, (V[0].y * V[0].w) >> 8, (V[0].z * V[0].w) >> 8);
763 ep2 = uvec4(v[5], v[0], v[1], v[2]); 763 ep2 = uvec4(V[1].y, V[0].x, V[0].y, V[0].z);
764 break; 764 break;
765 } 765 }
766 case 12: { 766 case 12: {
767 READ_UINT_VALUES(8) 767 READ_UINT_VALUES(8)
768 if ((v[1] + v[3] + v[5]) >= (v[0] + v[2] + v[4])) { 768 if ((V[0].y + V[0].w + V[1].y) >= (V[0].x + V[0].z + V[1].x)) {
769 ep1 = uvec4(v[6], v[0], v[2], v[4]); 769 ep1 = uvec4(V[1].z, V[0].x, V[0].z, V[1].x);
770 ep2 = uvec4(v[7], v[1], v[3], v[5]); 770 ep2 = uvec4(V[1].w, V[0].y, V[0].w, V[1].y);
771 } else { 771 } else {
772 ep1 = uvec4(BlueContract(int(v[7]), int(v[1]), int(v[3]), int(v[5]))); 772 ep1 = uvec4(BlueContract(int(V[1].w), int(V[0].y), int(V[0].w), int(V[1].y)));
773 ep2 = uvec4(BlueContract(int(v[6]), int(v[0]), int(v[2]), int(v[4]))); 773 ep2 = uvec4(BlueContract(int(V[1].z), int(V[0].x), int(V[0].z), int(V[1].x)));
774 } 774 }
775 break; 775 break;
776 } 776 }
777 case 13: { 777 case 13: {
778 READ_INT_VALUES(8) 778 READ_INT_VALUES(8)
779 ivec2 transferred = BitTransferSigned(v[1], v[0]); 779 ivec2 transferred = BitTransferSigned(V[0].y, V[0].x);
780 v[1] = transferred.x; 780 V[0].y = transferred.x;
781 v[0] = transferred.y; 781 V[0].x = transferred.y;
782 transferred = BitTransferSigned(v[3], v[2]); 782 transferred = BitTransferSigned(V[0].w, V[0].z);
783 v[3] = transferred.x; 783 V[0].w = transferred.x;
784 v[2] = transferred.y; 784 V[0].z = transferred.y;
785 785
786 transferred = BitTransferSigned(v[5], v[4]); 786 transferred = BitTransferSigned(V[1].y, V[1].x);
787 v[5] = transferred.x; 787 V[1].y = transferred.x;
788 v[4] = transferred.y; 788 V[1].x = transferred.y;
789 789
790 transferred = BitTransferSigned(v[7], v[6]); 790 transferred = BitTransferSigned(V[1].w, V[1].z);
791 v[7] = transferred.x; 791 V[1].w = transferred.x;
792 v[6] = transferred.y; 792 V[1].z = transferred.y;
793 793
794 if ((v[1] + v[3] + v[5]) >= 0) { 794 if ((V[0].y + V[0].w + V[1].y) >= 0) {
795 ep1 = ClampByte(ivec4(v[6], v[0], v[2], v[4])); 795 ep1 = ClampByte(ivec4(V[1].z, V[0].x, V[0].z, V[1].x));
796 ep2 = ClampByte(ivec4(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5])); 796 ep2 = ClampByte(ivec4(V[1].w + V[1].z, V[0].x + V[0].y, V[0].z + V[0].w, V[1].x + V[1].y));
797 } else { 797 } else {
798 ep1 = ClampByte(BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5])); 798 ep1 = ClampByte(BlueContract(V[1].z + V[1].w, V[0].x + V[0].y, V[0].z + V[0].w, V[1].x + V[1].y));
799 ep2 = ClampByte(BlueContract(v[6], v[0], v[2], v[4])); 799 ep2 = ClampByte(BlueContract(V[1].z, V[0].x, V[0].z, V[1].x));
800 } 800 }
801 break; 801 break;
802 } 802 }