diff options
| author | 2020-04-09 03:58:25 -0300 | |
|---|---|---|
| committer | 2020-04-09 03:58:25 -0300 | |
| commit | 5de130beea5abaafd4d6d80e8b06e63d23a98c97 (patch) | |
| tree | eff1ada20bbb1b7a596e99047f3dffeedbed979b /src | |
| parent | astc: Move Replicate to a constexpr LUT when possible (diff) | |
| download | yuzu-5de130beea5abaafd4d6d80e8b06e63d23a98c97.tar.gz yuzu-5de130beea5abaafd4d6d80e8b06e63d23a98c97.tar.xz yuzu-5de130beea5abaafd4d6d80e8b06e63d23a98c97.zip | |
astc: Implement a fast precompiled alternative for Replicate
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/textures/astc.cpp | 59 |
1 files changed, 57 insertions, 2 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 0985cb578..55f9aa0e4 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -680,6 +680,61 @@ static constexpr u32 ReplicateBitTo9(std::size_t value) { | |||
| 680 | return REPLICATE_BIT_TO_9_TABLE[value]; | 680 | return REPLICATE_BIT_TO_9_TABLE[value]; |
| 681 | } | 681 | } |
| 682 | 682 | ||
| 683 | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||
| 684 | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||
| 685 | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||
| 686 | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||
| 687 | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||
| 688 | static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||
| 689 | static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||
| 690 | static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||
| 691 | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback | ||
| 692 | /// to the runtime implementation | ||
| 693 | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||
| 694 | switch (num_bits) { | ||
| 695 | case 1: | ||
| 696 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 697 | case 2: | ||
| 698 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 699 | case 3: | ||
| 700 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 701 | case 4: | ||
| 702 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 703 | case 5: | ||
| 704 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 705 | case 6: | ||
| 706 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 707 | case 7: | ||
| 708 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 709 | case 8: | ||
| 710 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 711 | default: | ||
| 712 | return Replicate(value, num_bits, 8); | ||
| 713 | } | ||
| 714 | } | ||
| 715 | |||
| 716 | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||
| 717 | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||
| 718 | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||
| 719 | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||
| 720 | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||
| 721 | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||
| 722 | switch (num_bits) { | ||
| 723 | case 1: | ||
| 724 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 725 | case 2: | ||
| 726 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 727 | case 3: | ||
| 728 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 729 | case 4: | ||
| 730 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 731 | case 5: | ||
| 732 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 733 | default: | ||
| 734 | return Replicate(value, num_bits, 6); | ||
| 735 | } | ||
| 736 | } | ||
| 737 | |||
| 683 | class Pixel { | 738 | class Pixel { |
| 684 | protected: | 739 | protected: |
| 685 | using ChannelType = s16; | 740 | using ChannelType = s16; |
| @@ -868,7 +923,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | |||
| 868 | switch (val.encoding) { | 923 | switch (val.encoding) { |
| 869 | // Replicate bits | 924 | // Replicate bits |
| 870 | case IntegerEncoding::JustBits: | 925 | case IntegerEncoding::JustBits: |
| 871 | out[outIdx++] = Replicate(bitval, bitlen, 8); | 926 | out[outIdx++] = FastReplicateTo8(bitval, bitlen); |
| 872 | break; | 927 | break; |
| 873 | 928 | ||
| 874 | // Use algorithm in C.2.13 | 929 | // Use algorithm in C.2.13 |
| @@ -992,7 +1047,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 992 | u32 result = 0; | 1047 | u32 result = 0; |
| 993 | switch (val.encoding) { | 1048 | switch (val.encoding) { |
| 994 | case IntegerEncoding::JustBits: | 1049 | case IntegerEncoding::JustBits: |
| 995 | result = Replicate(bitval, bitlen, 6); | 1050 | result = FastReplicateTo6(bitval, bitlen); |
| 996 | break; | 1051 | break; |
| 997 | 1052 | ||
| 998 | case IntegerEncoding::Trit: { | 1053 | case IntegerEncoding::Trit: { |