summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-04-09 03:58:25 -0300
committerGravatar ReinUsesLisp2020-04-09 03:58:25 -0300
commit5de130beea5abaafd4d6d80e8b06e63d23a98c97 (patch)
treeeff1ada20bbb1b7a596e99047f3dffeedbed979b /src
parentastc: Move Replicate to a constexpr LUT when possible (diff)
downloadyuzu-5de130beea5abaafd4d6d80e8b06e63d23a98c97.tar.gz
yuzu-5de130beea5abaafd4d6d80e8b06e63d23a98c97.tar.xz
yuzu-5de130beea5abaafd4d6d80e8b06e63d23a98c97.zip
astc: Implement a fast precompiled alternative for Replicate
Diffstat (limited to 'src')
-rw-r--r--src/video_core/textures/astc.cpp59
1 files changed, 57 insertions, 2 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 0985cb578..55f9aa0e4 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -680,6 +680,61 @@ static constexpr u32 ReplicateBitTo9(std::size_t value) {
680 return REPLICATE_BIT_TO_9_TABLE[value]; 680 return REPLICATE_BIT_TO_9_TABLE[value];
681} 681}
682 682
683static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>();
684static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>();
685static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
686static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
687static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
688static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
689static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
690static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
691/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
692/// to the runtime implementation
693static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
694 switch (num_bits) {
695 case 1:
696 return REPLICATE_1_BIT_TO_8_TABLE[value];
697 case 2:
698 return REPLICATE_2_BIT_TO_8_TABLE[value];
699 case 3:
700 return REPLICATE_3_BIT_TO_8_TABLE[value];
701 case 4:
702 return REPLICATE_4_BIT_TO_8_TABLE[value];
703 case 5:
704 return REPLICATE_5_BIT_TO_8_TABLE[value];
705 case 6:
706 return REPLICATE_6_BIT_TO_8_TABLE[value];
707 case 7:
708 return REPLICATE_7_BIT_TO_8_TABLE[value];
709 case 8:
710 return REPLICATE_8_BIT_TO_8_TABLE[value];
711 default:
712 return Replicate(value, num_bits, 8);
713 }
714}
715
716static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>();
717static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>();
718static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>();
719static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>();
720static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>();
721static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) {
722 switch (num_bits) {
723 case 1:
724 return REPLICATE_1_BIT_TO_6_TABLE[value];
725 case 2:
726 return REPLICATE_2_BIT_TO_6_TABLE[value];
727 case 3:
728 return REPLICATE_3_BIT_TO_6_TABLE[value];
729 case 4:
730 return REPLICATE_4_BIT_TO_6_TABLE[value];
731 case 5:
732 return REPLICATE_5_BIT_TO_6_TABLE[value];
733 default:
734 return Replicate(value, num_bits, 6);
735 }
736}
737
683class Pixel { 738class Pixel {
684protected: 739protected:
685 using ChannelType = s16; 740 using ChannelType = s16;
@@ -868,7 +923,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
868 switch (val.encoding) { 923 switch (val.encoding) {
869 // Replicate bits 924 // Replicate bits
870 case IntegerEncoding::JustBits: 925 case IntegerEncoding::JustBits:
871 out[outIdx++] = Replicate(bitval, bitlen, 8); 926 out[outIdx++] = FastReplicateTo8(bitval, bitlen);
872 break; 927 break;
873 928
874 // Use algorithm in C.2.13 929 // Use algorithm in C.2.13
@@ -992,7 +1047,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
992 u32 result = 0; 1047 u32 result = 0;
993 switch (val.encoding) { 1048 switch (val.encoding) {
994 case IntegerEncoding::JustBits: 1049 case IntegerEncoding::JustBits:
995 result = Replicate(bitval, bitlen, 6); 1050 result = FastReplicateTo6(bitval, bitlen);
996 break; 1051 break;
997 1052
998 case IntegerEncoding::Trit: { 1053 case IntegerEncoding::Trit: {