diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/textures/astc.cpp | 241 |
1 files changed, 159 insertions, 82 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 062b4f252..365bde2f1 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -20,6 +20,8 @@ | |||
| 20 | #include <cstring> | 20 | #include <cstring> |
| 21 | #include <vector> | 21 | #include <vector> |
| 22 | 22 | ||
| 23 | #include <boost/container/static_vector.hpp> | ||
| 24 | |||
| 23 | #include "common/common_types.h" | 25 | #include "common/common_types.h" |
| 24 | 26 | ||
| 25 | #include "video_core/textures/astc.h" | 27 | #include "video_core/textures/astc.h" |
| @@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) { | |||
| 39 | 41 | ||
| 40 | class InputBitStream { | 42 | class InputBitStream { |
| 41 | public: | 43 | public: |
| 42 | explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) | 44 | constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
| 43 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 45 | : cur_byte{ptr}, next_bit{start_offset % 8} {} |
| 44 | 46 | ||
| 45 | std::size_t GetBitsRead() const { | 47 | constexpr std::size_t GetBitsRead() const { |
| 46 | return m_BitsRead; | 48 | return bits_read; |
| 47 | } | 49 | } |
| 48 | 50 | ||
| 49 | u32 ReadBit() { | 51 | constexpr bool ReadBit() { |
| 50 | u32 bit = *m_CurByte >> m_NextBit++; | 52 | const bool bit = (*cur_byte >> next_bit++) & 1; |
| 51 | while (m_NextBit >= 8) { | 53 | while (next_bit >= 8) { |
| 52 | m_NextBit -= 8; | 54 | next_bit -= 8; |
| 53 | m_CurByte++; | 55 | cur_byte++; |
| 54 | } | 56 | } |
| 55 | 57 | ||
| 56 | m_BitsRead++; | 58 | bits_read++; |
| 57 | return bit & 1; | 59 | return bit; |
| 58 | } | 60 | } |
| 59 | 61 | ||
| 60 | u32 ReadBits(std::size_t nBits) { | 62 | constexpr u32 ReadBits(std::size_t nBits) { |
| 61 | u32 ret = 0; | 63 | u32 ret = 0; |
| 62 | for (std::size_t i = 0; i < nBits; ++i) { | 64 | for (std::size_t i = 0; i < nBits; ++i) { |
| 63 | ret |= (ReadBit() & 1) << i; | 65 | ret |= (ReadBit() & 1) << i; |
| @@ -66,7 +68,7 @@ public: | |||
| 66 | } | 68 | } |
| 67 | 69 | ||
| 68 | template <std::size_t nBits> | 70 | template <std::size_t nBits> |
| 69 | u32 ReadBits() { | 71 | constexpr u32 ReadBits() { |
| 70 | u32 ret = 0; | 72 | u32 ret = 0; |
| 71 | for (std::size_t i = 0; i < nBits; ++i) { | 73 | for (std::size_t i = 0; i < nBits; ++i) { |
| 72 | ret |= (ReadBit() & 1) << i; | 74 | ret |= (ReadBit() & 1) << i; |
| @@ -75,64 +77,58 @@ public: | |||
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | private: | 79 | private: |
| 78 | const u8* m_CurByte; | 80 | const u8* cur_byte; |
| 79 | std::size_t m_NextBit = 0; | 81 | std::size_t next_bit = 0; |
| 80 | std::size_t m_BitsRead = 0; | 82 | std::size_t bits_read = 0; |
| 81 | }; | 83 | }; |
| 82 | 84 | ||
| 83 | class OutputBitStream { | 85 | class OutputBitStream { |
| 84 | public: | 86 | public: |
| 85 | explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) | 87 | constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) |
| 86 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 88 | : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} |
| 87 | |||
| 88 | ~OutputBitStream() = default; | ||
| 89 | 89 | ||
| 90 | s32 GetBitsWritten() const { | 90 | constexpr std::size_t GetBitsWritten() const { |
| 91 | return m_BitsWritten; | 91 | return bits_written; |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | void WriteBitsR(u32 val, u32 nBits) { | 94 | constexpr void WriteBitsR(u32 val, u32 nBits) { |
| 95 | for (u32 i = 0; i < nBits; i++) { | 95 | for (u32 i = 0; i < nBits; i++) { |
| 96 | WriteBit((val >> (nBits - i - 1)) & 1); | 96 | WriteBit((val >> (nBits - i - 1)) & 1); |
| 97 | } | 97 | } |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | void WriteBits(u32 val, u32 nBits) { | 100 | constexpr void WriteBits(u32 val, u32 nBits) { |
| 101 | for (u32 i = 0; i < nBits; i++) { | 101 | for (u32 i = 0; i < nBits; i++) { |
| 102 | WriteBit((val >> i) & 1); | 102 | WriteBit((val >> i) & 1); |
| 103 | } | 103 | } |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | private: | 106 | private: |
| 107 | void WriteBit(s32 b) { | 107 | constexpr void WriteBit(bool b) { |
| 108 | 108 | if (bits_written >= num_bits) { | |
| 109 | if (done) | ||
| 110 | return; | 109 | return; |
| 110 | } | ||
| 111 | 111 | ||
| 112 | const u32 mask = 1 << m_NextBit++; | 112 | const u32 mask = 1 << next_bit++; |
| 113 | 113 | ||
| 114 | // clear the bit | 114 | // clear the bit |
| 115 | *m_CurByte &= static_cast<u8>(~mask); | 115 | *cur_byte &= static_cast<u8>(~mask); |
| 116 | 116 | ||
| 117 | // Write the bit, if necessary | 117 | // Write the bit, if necessary |
| 118 | if (b) | 118 | if (b) |
| 119 | *m_CurByte |= static_cast<u8>(mask); | 119 | *cur_byte |= static_cast<u8>(mask); |
| 120 | 120 | ||
| 121 | // Next byte? | 121 | // Next byte? |
| 122 | if (m_NextBit >= 8) { | 122 | if (next_bit >= 8) { |
| 123 | m_CurByte += 1; | 123 | cur_byte += 1; |
| 124 | m_NextBit = 0; | 124 | next_bit = 0; |
| 125 | } | 125 | } |
| 126 | |||
| 127 | done = done || ++m_BitsWritten >= m_NumBits; | ||
| 128 | } | 126 | } |
| 129 | 127 | ||
| 130 | s32 m_BitsWritten = 0; | 128 | u8* cur_byte; |
| 131 | const s32 m_NumBits; | 129 | std::size_t num_bits; |
| 132 | u8* m_CurByte; | 130 | std::size_t bits_written = 0; |
| 133 | s32 m_NextBit = 0; | 131 | std::size_t next_bit = 0; |
| 134 | |||
| 135 | bool done = false; | ||
| 136 | }; | 132 | }; |
| 137 | 133 | ||
| 138 | template <typename IntType> | 134 | template <typename IntType> |
| @@ -195,9 +191,13 @@ struct IntegerEncodedValue { | |||
| 195 | u32 trit_value; | 191 | u32 trit_value; |
| 196 | }; | 192 | }; |
| 197 | }; | 193 | }; |
| 194 | using IntegerEncodedVector = boost::container::static_vector< | ||
| 195 | IntegerEncodedValue, 64, | ||
| 196 | boost::container::static_vector_options< | ||
| 197 | boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, | ||
| 198 | boost::container::throw_on_overflow<false>>::type>; | ||
| 198 | 199 | ||
| 199 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 200 | static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { |
| 200 | u32 nBitsPerValue) { | ||
| 201 | // Implement the algorithm in section C.2.12 | 201 | // Implement the algorithm in section C.2.12 |
| 202 | u32 m[5]; | 202 | u32 m[5]; |
| 203 | u32 t[5]; | 203 | u32 t[5]; |
| @@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu | |||
| 255 | } | 255 | } |
| 256 | } | 256 | } |
| 257 | 257 | ||
| 258 | static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 258 | static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result, |
| 259 | u32 nBitsPerValue) { | 259 | u32 nBitsPerValue) { |
| 260 | // Implement the algorithm in section C.2.12 | 260 | // Implement the algorithm in section C.2.12 |
| 261 | u32 m[3]; | 261 | u32 m[3]; |
| @@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues(); | |||
| 343 | // Fills result with the values that are encoded in the given | 343 | // Fills result with the values that are encoded in the given |
| 344 | // bitstream. We must know beforehand what the maximum possible | 344 | // bitstream. We must know beforehand what the maximum possible |
| 345 | // value is, and how many values we're decoding. | 345 | // value is, and how many values we're decoding. |
| 346 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, | 346 | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, |
| 347 | u32 maxRange, u32 nValues) { | 347 | u32 nValues) { |
| 348 | // Determine encoding parameters | 348 | // Determine encoding parameters |
| 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; | 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; |
| 350 | 350 | ||
| @@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { | |||
| 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] |
| 635 | // is the same as [(numBits - 1):0] and repeats all the way down. | 635 | // is the same as [(numBits - 1):0] and repeats all the way down. |
| 636 | template <typename IntType> | 636 | template <typename IntType> |
| 637 | static IntType Replicate(IntType val, u32 numBits, u32 toBit) { | 637 | static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
| 638 | if (numBits == 0) | 638 | if (numBits == 0) { |
| 639 | return 0; | 639 | return 0; |
| 640 | if (toBit == 0) | 640 | } |
| 641 | if (toBit == 0) { | ||
| 641 | return 0; | 642 | return 0; |
| 642 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); | 643 | } |
| 644 | const IntType v = val & static_cast<IntType>((1 << numBits) - 1); | ||
| 643 | IntType res = v; | 645 | IntType res = v; |
| 644 | u32 reslen = numBits; | 646 | u32 reslen = numBits; |
| 645 | while (reslen < toBit) { | 647 | while (reslen < toBit) { |
| @@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) { | |||
| 656 | return res; | 658 | return res; |
| 657 | } | 659 | } |
| 658 | 660 | ||
| 661 | static constexpr std::size_t NumReplicateEntries(u32 num_bits) { | ||
| 662 | return std::size_t(1) << num_bits; | ||
| 663 | } | ||
| 664 | |||
| 665 | template <typename IntType, u32 num_bits, u32 to_bit> | ||
| 666 | static constexpr auto MakeReplicateTable() { | ||
| 667 | std::array<IntType, NumReplicateEntries(num_bits)> table{}; | ||
| 668 | for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { | ||
| 669 | table[value] = Replicate(value, num_bits, to_bit); | ||
| 670 | } | ||
| 671 | return table; | ||
| 672 | } | ||
| 673 | |||
| 674 | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 675 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | ||
| 676 | return REPLICATE_BYTE_TO_16_TABLE[value]; | ||
| 677 | } | ||
| 678 | |||
| 679 | static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); | ||
| 680 | static constexpr u32 ReplicateBitTo7(std::size_t value) { | ||
| 681 | return REPLICATE_BIT_TO_7_TABLE[value]; | ||
| 682 | } | ||
| 683 | |||
| 684 | static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); | ||
| 685 | static constexpr u32 ReplicateBitTo9(std::size_t value) { | ||
| 686 | return REPLICATE_BIT_TO_9_TABLE[value]; | ||
| 687 | } | ||
| 688 | |||
| 689 | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||
| 690 | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||
| 691 | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||
| 692 | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||
| 693 | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||
| 694 | static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||
| 695 | static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||
| 696 | static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||
| 697 | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback | ||
| 698 | /// to the runtime implementation | ||
| 699 | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||
| 700 | switch (num_bits) { | ||
| 701 | case 1: | ||
| 702 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 703 | case 2: | ||
| 704 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 705 | case 3: | ||
| 706 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 707 | case 4: | ||
| 708 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 709 | case 5: | ||
| 710 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 711 | case 6: | ||
| 712 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 713 | case 7: | ||
| 714 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 715 | case 8: | ||
| 716 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 717 | default: | ||
| 718 | return Replicate(value, num_bits, 8); | ||
| 719 | } | ||
| 720 | } | ||
| 721 | |||
| 722 | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||
| 723 | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||
| 724 | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||
| 725 | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||
| 726 | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||
| 727 | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||
| 728 | switch (num_bits) { | ||
| 729 | case 1: | ||
| 730 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 731 | case 2: | ||
| 732 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 733 | case 3: | ||
| 734 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 735 | case 4: | ||
| 736 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 737 | case 5: | ||
| 738 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 739 | default: | ||
| 740 | return Replicate(value, num_bits, 6); | ||
| 741 | } | ||
| 742 | } | ||
| 743 | |||
| 659 | class Pixel { | 744 | class Pixel { |
| 660 | protected: | 745 | protected: |
| 661 | using ChannelType = s16; | 746 | using ChannelType = s16; |
| @@ -674,10 +759,10 @@ public: | |||
| 674 | // significant bits when going from larger to smaller bit depth | 759 | // significant bits when going from larger to smaller bit depth |
| 675 | // or by repeating the most significant bits when going from | 760 | // or by repeating the most significant bits when going from |
| 676 | // smaller to larger bit depths. | 761 | // smaller to larger bit depths. |
| 677 | void ChangeBitDepth(const u8 (&depth)[4]) { | 762 | void ChangeBitDepth() { |
| 678 | for (u32 i = 0; i < 4; i++) { | 763 | for (u32 i = 0; i < 4; i++) { |
| 679 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); | 764 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); |
| 680 | m_BitDepth[i] = depth[i]; | 765 | m_BitDepth[i] = 8; |
| 681 | } | 766 | } |
| 682 | } | 767 | } |
| 683 | 768 | ||
| @@ -689,28 +774,23 @@ public: | |||
| 689 | 774 | ||
| 690 | // Changes the bit depth of a single component. See the comment | 775 | // Changes the bit depth of a single component. See the comment |
| 691 | // above for how we do this. | 776 | // above for how we do this. |
| 692 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { | 777 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { |
| 693 | assert(newDepth <= 8); | ||
| 694 | assert(oldDepth <= 8); | 778 | assert(oldDepth <= 8); |
| 695 | 779 | ||
| 696 | if (oldDepth == newDepth) { | 780 | if (oldDepth == 8) { |
| 697 | // Do nothing | 781 | // Do nothing |
| 698 | return val; | 782 | return val; |
| 699 | } else if (oldDepth == 0 && newDepth != 0) { | 783 | } else if (oldDepth == 0) { |
| 700 | return static_cast<ChannelType>((1 << newDepth) - 1); | 784 | return static_cast<ChannelType>((1 << 8) - 1); |
| 701 | } else if (newDepth > oldDepth) { | 785 | } else if (8 > oldDepth) { |
| 702 | return Replicate(val, oldDepth, newDepth); | 786 | return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); |
| 703 | } else { | 787 | } else { |
| 704 | // oldDepth > newDepth | 788 | // oldDepth > newDepth |
| 705 | if (newDepth == 0) { | 789 | const u8 bitsWasted = static_cast<u8>(oldDepth - 8); |
| 706 | return 0xFF; | 790 | u16 v = static_cast<u16>(val); |
| 707 | } else { | 791 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
| 708 | u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); | 792 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); |
| 709 | u16 v = static_cast<u16>(val); | 793 | return static_cast<u8>(v); |
| 710 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | ||
| 711 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); | ||
| 712 | return static_cast<u8>(v); | ||
| 713 | } | ||
| 714 | } | 794 | } |
| 715 | 795 | ||
| 716 | assert(false && "We shouldn't get here."); | 796 | assert(false && "We shouldn't get here."); |
| @@ -760,8 +840,7 @@ public: | |||
| 760 | // up in the most-significant byte. | 840 | // up in the most-significant byte. |
| 761 | u32 Pack() const { | 841 | u32 Pack() const { |
| 762 | Pixel eightBit(*this); | 842 | Pixel eightBit(*this); |
| 763 | const u8 eightBitDepth[4] = {8, 8, 8, 8}; | 843 | eightBit.ChangeBitDepth(); |
| 764 | eightBit.ChangeBitDepth(eightBitDepth); | ||
| 765 | 844 | ||
| 766 | u32 r = 0; | 845 | u32 r = 0; |
| 767 | r |= eightBit.A(); | 846 | r |= eightBit.A(); |
| @@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | |||
| 816 | } | 895 | } |
| 817 | 896 | ||
| 818 | // We now have enough to decode our integer sequence. | 897 | // We now have enough to decode our integer sequence. |
| 819 | std::vector<IntegerEncodedValue> decodedColorValues; | 898 | IntegerEncodedVector decodedColorValues; |
| 820 | decodedColorValues.reserve(32); | ||
| 821 | 899 | ||
| 822 | InputBitStream colorStream(data); | 900 | InputBitStream colorStream(data); |
| 823 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 901 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| @@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | |||
| 839 | 917 | ||
| 840 | u32 A = 0, B = 0, C = 0, D = 0; | 918 | u32 A = 0, B = 0, C = 0, D = 0; |
| 841 | // A is just the lsb replicated 9 times. | 919 | // A is just the lsb replicated 9 times. |
| 842 | A = Replicate(bitval & 1, 1, 9); | 920 | A = ReplicateBitTo9(bitval & 1); |
| 843 | 921 | ||
| 844 | switch (val.encoding) { | 922 | switch (val.encoding) { |
| 845 | // Replicate bits | 923 | // Replicate bits |
| 846 | case IntegerEncoding::JustBits: | 924 | case IntegerEncoding::JustBits: |
| 847 | out[outIdx++] = Replicate(bitval, bitlen, 8); | 925 | out[outIdx++] = FastReplicateTo8(bitval, bitlen); |
| 848 | break; | 926 | break; |
| 849 | 927 | ||
| 850 | // Use algorithm in C.2.13 | 928 | // Use algorithm in C.2.13 |
| @@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 962 | u32 bitval = val.bit_value; | 1040 | u32 bitval = val.bit_value; |
| 963 | u32 bitlen = val.num_bits; | 1041 | u32 bitlen = val.num_bits; |
| 964 | 1042 | ||
| 965 | u32 A = Replicate(bitval & 1, 1, 7); | 1043 | u32 A = ReplicateBitTo7(bitval & 1); |
| 966 | u32 B = 0, C = 0, D = 0; | 1044 | u32 B = 0, C = 0, D = 0; |
| 967 | 1045 | ||
| 968 | u32 result = 0; | 1046 | u32 result = 0; |
| 969 | switch (val.encoding) { | 1047 | switch (val.encoding) { |
| 970 | case IntegerEncoding::JustBits: | 1048 | case IntegerEncoding::JustBits: |
| 971 | result = Replicate(bitval, bitlen, 6); | 1049 | result = FastReplicateTo6(bitval, bitlen); |
| 972 | break; | 1050 | break; |
| 973 | 1051 | ||
| 974 | case IntegerEncoding::Trit: { | 1052 | case IntegerEncoding::Trit: { |
| @@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1047 | return result; | 1125 | return result; |
| 1048 | } | 1126 | } |
| 1049 | 1127 | ||
| 1050 | static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, | 1128 | static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, |
| 1051 | const TexelWeightParams& params, const u32 blockWidth, | 1129 | const TexelWeightParams& params, const u32 blockWidth, |
| 1052 | const u32 blockHeight) { | 1130 | const u32 blockHeight) { |
| 1053 | u32 weightIdx = 0; | 1131 | u32 weightIdx = 0; |
| @@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1545 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1623 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1546 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1624 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1547 | 1625 | ||
| 1548 | std::vector<IntegerEncodedValue> texelWeightValues; | 1626 | IntegerEncodedVector texelWeightValues; |
| 1549 | texelWeightValues.reserve(64); | ||
| 1550 | 1627 | ||
| 1551 | InputBitStream weightStream(texelWeightData); | 1628 | InputBitStream weightStream(texelWeightData); |
| 1552 | 1629 | ||
| @@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1568 | Pixel p; | 1645 | Pixel p; |
| 1569 | for (u32 c = 0; c < 4; c++) { | 1646 | for (u32 c = 0; c < 4; c++) { |
| 1570 | u32 C0 = endpos32s[partition][0].Component(c); | 1647 | u32 C0 = endpos32s[partition][0].Component(c); |
| 1571 | C0 = Replicate(C0, 8, 16); | 1648 | C0 = ReplicateByteTo16(C0); |
| 1572 | u32 C1 = endpos32s[partition][1].Component(c); | 1649 | u32 C1 = endpos32s[partition][1].Component(c); |
| 1573 | C1 = Replicate(C1, 8, 16); | 1650 | C1 = ReplicateByteTo16(C1); |
| 1574 | 1651 | ||
| 1575 | u32 plane = 0; | 1652 | u32 plane = 0; |
| 1576 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | 1653 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |