diff options
| -rw-r--r-- | src/video_core/textures/astc.cpp | 1056 |
1 files changed, 519 insertions, 537 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 33bd31865..404708d92 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -17,26 +17,37 @@ | |||
| 17 | 17 | ||
| 18 | #include <algorithm> | 18 | #include <algorithm> |
| 19 | #include <cassert> | 19 | #include <cassert> |
| 20 | #include <cstdint> | ||
| 21 | #include <cstring> | 20 | #include <cstring> |
| 22 | #include <vector> | 21 | #include <vector> |
| 23 | 22 | ||
| 23 | #include "common/common_types.h" | ||
| 24 | |||
| 24 | #include "video_core/textures/astc.h" | 25 | #include "video_core/textures/astc.h" |
| 25 | 26 | ||
| 27 | namespace { | ||
| 28 | |||
| 29 | /// Count the number of bits set in a number. | ||
| 30 | constexpr u32 Popcnt(u32 n) { | ||
| 31 | u32 c = 0; | ||
| 32 | for (; n; c++) { | ||
| 33 | n &= n - 1; | ||
| 34 | } | ||
| 35 | return c; | ||
| 36 | } | ||
| 37 | |||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 26 | class InputBitStream { | 40 | class InputBitStream { |
| 27 | public: | 41 | public: |
| 28 | explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) | 42 | explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
| 29 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 43 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 30 | 44 | ||
| 31 | ~InputBitStream() = default; | 45 | std::size_t GetBitsRead() const { |
| 32 | |||
| 33 | int GetBitsRead() const { | ||
| 34 | return m_BitsRead; | 46 | return m_BitsRead; |
| 35 | } | 47 | } |
| 36 | 48 | ||
| 37 | int ReadBit() { | 49 | u32 ReadBit() { |
| 38 | 50 | u32 bit = *m_CurByte >> m_NextBit++; | |
| 39 | int bit = *m_CurByte >> m_NextBit++; | ||
| 40 | while (m_NextBit >= 8) { | 51 | while (m_NextBit >= 8) { |
| 41 | m_NextBit -= 8; | 52 | m_NextBit -= 8; |
| 42 | m_CurByte++; | 53 | m_CurByte++; |
| @@ -46,57 +57,66 @@ public: | |||
| 46 | return bit & 1; | 57 | return bit & 1; |
| 47 | } | 58 | } |
| 48 | 59 | ||
| 49 | unsigned int ReadBits(unsigned int nBits) { | 60 | u32 ReadBits(std::size_t nBits) { |
| 50 | unsigned int ret = 0; | 61 | u32 ret = 0; |
| 51 | for (unsigned int i = 0; i < nBits; i++) { | 62 | for (std::size_t i = 0; i < nBits; ++i) { |
| 63 | ret |= (ReadBit() & 1) << i; | ||
| 64 | } | ||
| 65 | return ret; | ||
| 66 | } | ||
| 67 | |||
| 68 | template <std::size_t nBits> | ||
| 69 | u32 ReadBits() { | ||
| 70 | u32 ret = 0; | ||
| 71 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 52 | ret |= (ReadBit() & 1) << i; | 72 | ret |= (ReadBit() & 1) << i; |
| 53 | } | 73 | } |
| 54 | return ret; | 74 | return ret; |
| 55 | } | 75 | } |
| 56 | 76 | ||
| 57 | private: | 77 | private: |
| 58 | const unsigned char* m_CurByte; | 78 | const u8* m_CurByte; |
| 59 | int m_NextBit = 0; | 79 | std::size_t m_NextBit = 0; |
| 60 | int m_BitsRead = 0; | 80 | std::size_t m_BitsRead = 0; |
| 61 | }; | 81 | }; |
| 62 | 82 | ||
| 63 | class OutputBitStream { | 83 | class OutputBitStream { |
| 64 | public: | 84 | public: |
| 65 | explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | 85 | explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) |
| 66 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 86 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 67 | 87 | ||
| 68 | ~OutputBitStream() = default; | 88 | ~OutputBitStream() = default; |
| 69 | 89 | ||
| 70 | int GetBitsWritten() const { | 90 | s32 GetBitsWritten() const { |
| 71 | return m_BitsWritten; | 91 | return m_BitsWritten; |
| 72 | } | 92 | } |
| 73 | 93 | ||
| 74 | void WriteBitsR(unsigned int val, unsigned int nBits) { | 94 | void WriteBitsR(u32 val, u32 nBits) { |
| 75 | for (unsigned int i = 0; i < nBits; i++) { | 95 | for (u32 i = 0; i < nBits; i++) { |
| 76 | WriteBit((val >> (nBits - i - 1)) & 1); | 96 | WriteBit((val >> (nBits - i - 1)) & 1); |
| 77 | } | 97 | } |
| 78 | } | 98 | } |
| 79 | 99 | ||
| 80 | void WriteBits(unsigned int val, unsigned int nBits) { | 100 | void WriteBits(u32 val, u32 nBits) { |
| 81 | for (unsigned int i = 0; i < nBits; i++) { | 101 | for (u32 i = 0; i < nBits; i++) { |
| 82 | WriteBit((val >> i) & 1); | 102 | WriteBit((val >> i) & 1); |
| 83 | } | 103 | } |
| 84 | } | 104 | } |
| 85 | 105 | ||
| 86 | private: | 106 | private: |
| 87 | void WriteBit(int b) { | 107 | void WriteBit(s32 b) { |
| 88 | 108 | ||
| 89 | if (done) | 109 | if (done) |
| 90 | return; | 110 | return; |
| 91 | 111 | ||
| 92 | const unsigned int mask = 1 << m_NextBit++; | 112 | const u32 mask = 1 << m_NextBit++; |
| 93 | 113 | ||
| 94 | // clear the bit | 114 | // clear the bit |
| 95 | *m_CurByte &= static_cast<unsigned char>(~mask); | 115 | *m_CurByte &= static_cast<u8>(~mask); |
| 96 | 116 | ||
| 97 | // Write the bit, if necessary | 117 | // Write the bit, if necessary |
| 98 | if (b) | 118 | if (b) |
| 99 | *m_CurByte |= static_cast<unsigned char>(mask); | 119 | *m_CurByte |= static_cast<u8>(mask); |
| 100 | 120 | ||
| 101 | // Next byte? | 121 | // Next byte? |
| 102 | if (m_NextBit >= 8) { | 122 | if (m_NextBit >= 8) { |
| @@ -107,10 +127,10 @@ private: | |||
| 107 | done = done || ++m_BitsWritten >= m_NumBits; | 127 | done = done || ++m_BitsWritten >= m_NumBits; |
| 108 | } | 128 | } |
| 109 | 129 | ||
| 110 | int m_BitsWritten = 0; | 130 | s32 m_BitsWritten = 0; |
| 111 | const int m_NumBits; | 131 | const s32 m_NumBits; |
| 112 | unsigned char* m_CurByte; | 132 | u8* m_CurByte; |
| 113 | int m_NextBit = 0; | 133 | s32 m_NextBit = 0; |
| 114 | 134 | ||
| 115 | bool done = false; | 135 | bool done = false; |
| 116 | }; | 136 | }; |
| @@ -123,20 +143,20 @@ public: | |||
| 123 | Bits(const Bits&) = delete; | 143 | Bits(const Bits&) = delete; |
| 124 | Bits& operator=(const Bits&) = delete; | 144 | Bits& operator=(const Bits&) = delete; |
| 125 | 145 | ||
| 126 | uint8_t operator[](uint32_t bitPos) const { | 146 | u8 operator[](u32 bitPos) const { |
| 127 | return static_cast<uint8_t>((m_Bits >> bitPos) & 1); | 147 | return static_cast<u8>((m_Bits >> bitPos) & 1); |
| 128 | } | 148 | } |
| 129 | 149 | ||
| 130 | IntType operator()(uint32_t start, uint32_t end) const { | 150 | IntType operator()(u32 start, u32 end) const { |
| 131 | if (start == end) { | 151 | if (start == end) { |
| 132 | return (*this)[start]; | 152 | return (*this)[start]; |
| 133 | } else if (start > end) { | 153 | } else if (start > end) { |
| 134 | uint32_t t = start; | 154 | u32 t = start; |
| 135 | start = end; | 155 | start = end; |
| 136 | end = t; | 156 | end = t; |
| 137 | } | 157 | } |
| 138 | 158 | ||
| 139 | uint64_t mask = (1 << (end - start + 1)) - 1; | 159 | u64 mask = (1 << (end - start + 1)) - 1; |
| 140 | return (m_Bits >> start) & static_cast<IntType>(mask); | 160 | return (m_Bits >> start) & static_cast<IntType>(mask); |
| 141 | } | 161 | } |
| 142 | 162 | ||
| @@ -144,273 +164,236 @@ private: | |||
| 144 | const IntType& m_Bits; | 164 | const IntType& m_Bits; |
| 145 | }; | 165 | }; |
| 146 | 166 | ||
| 147 | enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; | 167 | enum class IntegerEncoding { JustBits, Qus32, Trit }; |
| 148 | |||
| 149 | class IntegerEncodedValue { | ||
| 150 | private: | ||
| 151 | const EIntegerEncoding m_Encoding; | ||
| 152 | const uint32_t m_NumBits; | ||
| 153 | uint32_t m_BitValue; | ||
| 154 | union { | ||
| 155 | uint32_t m_QuintValue; | ||
| 156 | uint32_t m_TritValue; | ||
| 157 | }; | ||
| 158 | 168 | ||
| 159 | public: | 169 | struct IntegerEncodedValue { |
| 160 | // Jank, but we're not doing any heavy lifting in this class, so it's | 170 | constexpr IntegerEncodedValue() = default; |
| 161 | // probably OK. It allows us to use these in std::vectors... | ||
| 162 | IntegerEncodedValue& operator=(const IntegerEncodedValue& other) { | ||
| 163 | new (this) IntegerEncodedValue(other); | ||
| 164 | return *this; | ||
| 165 | } | ||
| 166 | 171 | ||
| 167 | IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) | 172 | constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) |
| 168 | : m_Encoding(encoding), m_NumBits(numBits) {} | 173 | : encoding{encoding_}, num_bits{num_bits_} {} |
| 169 | 174 | ||
| 170 | EIntegerEncoding GetEncoding() const { | 175 | constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { |
| 171 | return m_Encoding; | 176 | return encoding == other.encoding && num_bits == other.num_bits; |
| 172 | } | ||
| 173 | uint32_t BaseBitLength() const { | ||
| 174 | return m_NumBits; | ||
| 175 | } | ||
| 176 | |||
| 177 | uint32_t GetBitValue() const { | ||
| 178 | return m_BitValue; | ||
| 179 | } | ||
| 180 | void SetBitValue(uint32_t val) { | ||
| 181 | m_BitValue = val; | ||
| 182 | } | ||
| 183 | |||
| 184 | uint32_t GetTritValue() const { | ||
| 185 | return m_TritValue; | ||
| 186 | } | ||
| 187 | void SetTritValue(uint32_t val) { | ||
| 188 | m_TritValue = val; | ||
| 189 | } | ||
| 190 | |||
| 191 | uint32_t GetQuintValue() const { | ||
| 192 | return m_QuintValue; | ||
| 193 | } | ||
| 194 | void SetQuintValue(uint32_t val) { | ||
| 195 | m_QuintValue = val; | ||
| 196 | } | ||
| 197 | |||
| 198 | bool MatchesEncoding(const IntegerEncodedValue& other) const { | ||
| 199 | return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits; | ||
| 200 | } | 177 | } |
| 201 | 178 | ||
| 202 | // Returns the number of bits required to encode nVals values. | 179 | // Returns the number of bits required to encode nVals values. |
| 203 | uint32_t GetBitLength(uint32_t nVals) const { | 180 | u32 GetBitLength(u32 nVals) const { |
| 204 | uint32_t totalBits = m_NumBits * nVals; | 181 | u32 totalBits = num_bits * nVals; |
| 205 | if (m_Encoding == eIntegerEncoding_Trit) { | 182 | if (encoding == IntegerEncoding::Trit) { |
| 206 | totalBits += (nVals * 8 + 4) / 5; | 183 | totalBits += (nVals * 8 + 4) / 5; |
| 207 | } else if (m_Encoding == eIntegerEncoding_Quint) { | 184 | } else if (encoding == IntegerEncoding::Qus32) { |
| 208 | totalBits += (nVals * 7 + 2) / 3; | 185 | totalBits += (nVals * 7 + 2) / 3; |
| 209 | } | 186 | } |
| 210 | return totalBits; | 187 | return totalBits; |
| 211 | } | 188 | } |
| 212 | 189 | ||
| 213 | // Count the number of bits set in a number. | 190 | IntegerEncoding encoding{}; |
| 214 | static inline uint32_t Popcnt(uint32_t n) { | 191 | u32 num_bits = 0; |
| 215 | uint32_t c; | 192 | u32 bit_value = 0; |
| 216 | for (c = 0; n; c++) { | 193 | union { |
| 217 | n &= n - 1; | 194 | u32 qus32_value = 0; |
| 195 | u32 trit_value; | ||
| 196 | }; | ||
| 197 | }; | ||
| 198 | |||
| 199 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | ||
| 200 | u32 nBitsPerValue) { | ||
| 201 | // Implement the algorithm in section C.2.12 | ||
| 202 | u32 m[5]; | ||
| 203 | u32 t[5]; | ||
| 204 | u32 T; | ||
| 205 | |||
| 206 | // Read the trit encoded block according to | ||
| 207 | // table C.2.14 | ||
| 208 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 209 | T = bits.ReadBits<2>(); | ||
| 210 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 211 | T |= bits.ReadBits<2>() << 2; | ||
| 212 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 213 | T |= bits.ReadBit() << 4; | ||
| 214 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 215 | T |= bits.ReadBits<2>() << 5; | ||
| 216 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 217 | T |= bits.ReadBit() << 7; | ||
| 218 | |||
| 219 | u32 C = 0; | ||
| 220 | |||
| 221 | Bits<u32> Tb(T); | ||
| 222 | if (Tb(2, 4) == 7) { | ||
| 223 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 224 | t[4] = t[3] = 2; | ||
| 225 | } else { | ||
| 226 | C = Tb(0, 4); | ||
| 227 | if (Tb(5, 6) == 3) { | ||
| 228 | t[4] = 2; | ||
| 229 | t[3] = Tb[7]; | ||
| 230 | } else { | ||
| 231 | t[4] = Tb[7]; | ||
| 232 | t[3] = Tb(5, 6); | ||
| 218 | } | 233 | } |
| 219 | return c; | ||
| 220 | } | 234 | } |
| 221 | 235 | ||
| 222 | // Returns a new instance of this struct that corresponds to the | 236 | Bits<u32> Cb(C); |
| 223 | // can take no more than maxval values | 237 | if (Cb(0, 1) == 3) { |
| 224 | static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { | 238 | t[2] = 2; |
| 225 | while (maxVal > 0) { | 239 | t[1] = Cb[4]; |
| 226 | uint32_t check = maxVal + 1; | 240 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); |
| 227 | 241 | } else if (Cb(2, 3) == 3) { | |
| 228 | // Is maxVal a power of two? | 242 | t[2] = 2; |
| 229 | if (!(check & (check - 1))) { | 243 | t[1] = 2; |
| 230 | return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); | 244 | t[0] = Cb(0, 1); |
| 231 | } | 245 | } else { |
| 232 | 246 | t[2] = Cb[4]; | |
| 233 | // Is maxVal of the type 3*2^n - 1? | 247 | t[1] = Cb(2, 3); |
| 234 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { | 248 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); |
| 235 | return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); | 249 | } |
| 236 | } | ||
| 237 | 250 | ||
| 238 | // Is maxVal of the type 5*2^n - 1? | 251 | for (std::size_t i = 0; i < 5; ++i) { |
| 239 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { | 252 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); |
| 240 | return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); | 253 | val.bit_value = m[i]; |
| 241 | } | 254 | val.trit_value = t[i]; |
| 255 | } | ||
| 256 | } | ||
| 242 | 257 | ||
| 243 | // Apparently it can't be represented with a bounded integer sequence... | 258 | static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 244 | // just iterate. | 259 | u32 nBitsPerValue) { |
| 245 | maxVal--; | 260 | // Implement the algorithm in section C.2.12 |
| 261 | u32 m[3]; | ||
| 262 | u32 q[3]; | ||
| 263 | u32 Q; | ||
| 264 | |||
| 265 | // Read the trit encoded block according to | ||
| 266 | // table C.2.15 | ||
| 267 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 268 | Q = bits.ReadBits<3>(); | ||
| 269 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 270 | Q |= bits.ReadBits<2>() << 3; | ||
| 271 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 272 | Q |= bits.ReadBits<2>() << 5; | ||
| 273 | |||
| 274 | Bits<u32> Qb(Q); | ||
| 275 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 276 | q[0] = q[1] = 4; | ||
| 277 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 278 | } else { | ||
| 279 | u32 C = 0; | ||
| 280 | if (Qb(1, 2) == 3) { | ||
| 281 | q[2] = 4; | ||
| 282 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 283 | } else { | ||
| 284 | q[2] = Qb(5, 6); | ||
| 285 | C = Qb(0, 4); | ||
| 246 | } | 286 | } |
| 247 | return IntegerEncodedValue(eIntegerEncoding_JustBits, 0); | ||
| 248 | } | ||
| 249 | |||
| 250 | // Fills result with the values that are encoded in the given | ||
| 251 | // bitstream. We must know beforehand what the maximum possible | ||
| 252 | // value is, and how many values we're decoding. | ||
| 253 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, | ||
| 254 | InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { | ||
| 255 | // Determine encoding parameters | ||
| 256 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); | ||
| 257 | |||
| 258 | // Start decoding | ||
| 259 | uint32_t nValsDecoded = 0; | ||
| 260 | while (nValsDecoded < nValues) { | ||
| 261 | switch (val.GetEncoding()) { | ||
| 262 | case eIntegerEncoding_Quint: | ||
| 263 | DecodeQuintBlock(bits, result, val.BaseBitLength()); | ||
| 264 | nValsDecoded += 3; | ||
| 265 | break; | ||
| 266 | 287 | ||
| 267 | case eIntegerEncoding_Trit: | 288 | Bits<u32> Cb(C); |
| 268 | DecodeTritBlock(bits, result, val.BaseBitLength()); | 289 | if (Cb(0, 2) == 5) { |
| 269 | nValsDecoded += 5; | 290 | q[1] = 4; |
| 270 | break; | 291 | q[0] = Cb(3, 4); |
| 271 | 292 | } else { | |
| 272 | case eIntegerEncoding_JustBits: | 293 | q[1] = Cb(3, 4); |
| 273 | val.SetBitValue(bits.ReadBits(val.BaseBitLength())); | 294 | q[0] = Cb(0, 2); |
| 274 | result.push_back(val); | ||
| 275 | nValsDecoded++; | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | } | 295 | } |
| 279 | } | 296 | } |
| 280 | 297 | ||
| 281 | private: | 298 | for (std::size_t i = 0; i < 3; ++i) { |
| 282 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 299 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue); |
| 283 | uint32_t nBitsPerValue) { | 300 | val.bit_value = m[i]; |
| 284 | // Implement the algorithm in section C.2.12 | 301 | val.qus32_value = q[i]; |
| 285 | uint32_t m[5]; | 302 | } |
| 286 | uint32_t t[5]; | 303 | } |
| 287 | uint32_t T; | 304 | |
| 288 | 305 | // Returns a new instance of this struct that corresponds to the | |
| 289 | // Read the trit encoded block according to | 306 | // can take no more than maxval values |
| 290 | // table C.2.14 | 307 | static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { |
| 291 | m[0] = bits.ReadBits(nBitsPerValue); | 308 | while (maxVal > 0) { |
| 292 | T = bits.ReadBits(2); | 309 | u32 check = maxVal + 1; |
| 293 | m[1] = bits.ReadBits(nBitsPerValue); | 310 | |
| 294 | T |= bits.ReadBits(2) << 2; | 311 | // Is maxVal a power of two? |
| 295 | m[2] = bits.ReadBits(nBitsPerValue); | 312 | if (!(check & (check - 1))) { |
| 296 | T |= bits.ReadBit() << 4; | 313 | return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); |
| 297 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 298 | T |= bits.ReadBits(2) << 5; | ||
| 299 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 300 | T |= bits.ReadBit() << 7; | ||
| 301 | |||
| 302 | uint32_t C = 0; | ||
| 303 | |||
| 304 | Bits<uint32_t> Tb(T); | ||
| 305 | if (Tb(2, 4) == 7) { | ||
| 306 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 307 | t[4] = t[3] = 2; | ||
| 308 | } else { | ||
| 309 | C = Tb(0, 4); | ||
| 310 | if (Tb(5, 6) == 3) { | ||
| 311 | t[4] = 2; | ||
| 312 | t[3] = Tb[7]; | ||
| 313 | } else { | ||
| 314 | t[4] = Tb[7]; | ||
| 315 | t[3] = Tb(5, 6); | ||
| 316 | } | ||
| 317 | } | 314 | } |
| 318 | 315 | ||
| 319 | Bits<uint32_t> Cb(C); | 316 | // Is maxVal of the type 3*2^n - 1? |
| 320 | if (Cb(0, 1) == 3) { | 317 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { |
| 321 | t[2] = 2; | 318 | return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); |
| 322 | t[1] = Cb[4]; | ||
| 323 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); | ||
| 324 | } else if (Cb(2, 3) == 3) { | ||
| 325 | t[2] = 2; | ||
| 326 | t[1] = 2; | ||
| 327 | t[0] = Cb(0, 1); | ||
| 328 | } else { | ||
| 329 | t[2] = Cb[4]; | ||
| 330 | t[1] = Cb(2, 3); | ||
| 331 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); | ||
| 332 | } | 319 | } |
| 333 | 320 | ||
| 334 | for (uint32_t i = 0; i < 5; i++) { | 321 | // Is maxVal of the type 5*2^n - 1? |
| 335 | IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); | 322 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { |
| 336 | val.SetBitValue(m[i]); | 323 | return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); |
| 337 | val.SetTritValue(t[i]); | ||
| 338 | result.push_back(val); | ||
| 339 | } | 324 | } |
| 325 | |||
| 326 | // Apparently it can't be represented with a bounded integer sequence... | ||
| 327 | // just iterate. | ||
| 328 | maxVal--; | ||
| 340 | } | 329 | } |
| 330 | return IntegerEncodedValue(IntegerEncoding::JustBits, 0); | ||
| 331 | } | ||
| 341 | 332 | ||
| 342 | static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 333 | static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { |
| 343 | uint32_t nBitsPerValue) { | 334 | std::array<IntegerEncodedValue, 256> encodings{}; |
| 344 | // Implement the algorithm in section C.2.12 | 335 | for (std::size_t i = 0; i < encodings.size(); ++i) { |
| 345 | uint32_t m[3]; | 336 | encodings[i] = CreateEncoding(static_cast<u32>(i)); |
| 346 | uint32_t q[3]; | 337 | } |
| 347 | uint32_t Q; | 338 | return encodings; |
| 348 | 339 | } | |
| 349 | // Read the trit encoded block according to | ||
| 350 | // table C.2.15 | ||
| 351 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 352 | Q = bits.ReadBits(3); | ||
| 353 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 354 | Q |= bits.ReadBits(2) << 3; | ||
| 355 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 356 | Q |= bits.ReadBits(2) << 5; | ||
| 357 | |||
| 358 | Bits<uint32_t> Qb(Q); | ||
| 359 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 360 | q[0] = q[1] = 4; | ||
| 361 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 362 | } else { | ||
| 363 | uint32_t C = 0; | ||
| 364 | if (Qb(1, 2) == 3) { | ||
| 365 | q[2] = 4; | ||
| 366 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 367 | } else { | ||
| 368 | q[2] = Qb(5, 6); | ||
| 369 | C = Qb(0, 4); | ||
| 370 | } | ||
| 371 | 340 | ||
| 372 | Bits<uint32_t> Cb(C); | 341 | static constexpr std::array EncodingsValues = MakeEncodedValues(); |
| 373 | if (Cb(0, 2) == 5) { | 342 | |
| 374 | q[1] = 4; | 343 | // Fills result with the values that are encoded in the given |
| 375 | q[0] = Cb(3, 4); | 344 | // bitstream. We must know beforehand what the maximum possible |
| 376 | } else { | 345 | // value is, and how many values we're decoding. |
| 377 | q[1] = Cb(3, 4); | 346 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, |
| 378 | q[0] = Cb(0, 2); | 347 | u32 maxRange, u32 nValues) { |
| 379 | } | 348 | // Determine encoding parameters |
| 380 | } | 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; |
| 350 | |||
| 351 | // Start decoding | ||
| 352 | u32 nValsDecoded = 0; | ||
| 353 | while (nValsDecoded < nValues) { | ||
| 354 | switch (val.encoding) { | ||
| 355 | case IntegerEncoding::Qus32: | ||
| 356 | DecodeQus32Block(bits, result, val.num_bits); | ||
| 357 | nValsDecoded += 3; | ||
| 358 | break; | ||
| 359 | |||
| 360 | case IntegerEncoding::Trit: | ||
| 361 | DecodeTritBlock(bits, result, val.num_bits); | ||
| 362 | nValsDecoded += 5; | ||
| 363 | break; | ||
| 381 | 364 | ||
| 382 | for (uint32_t i = 0; i < 3; i++) { | 365 | case IntegerEncoding::JustBits: |
| 383 | IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); | 366 | val.bit_value = bits.ReadBits(val.num_bits); |
| 384 | val.m_BitValue = m[i]; | ||
| 385 | val.m_QuintValue = q[i]; | ||
| 386 | result.push_back(val); | 367 | result.push_back(val); |
| 368 | nValsDecoded++; | ||
| 369 | break; | ||
| 387 | } | 370 | } |
| 388 | } | 371 | } |
| 389 | }; | 372 | } |
| 390 | 373 | ||
| 391 | namespace ASTCC { | 374 | namespace ASTCC { |
| 392 | 375 | ||
| 393 | struct TexelWeightParams { | 376 | struct TexelWeightParams { |
| 394 | uint32_t m_Width = 0; | 377 | u32 m_Width = 0; |
| 395 | uint32_t m_Height = 0; | 378 | u32 m_Height = 0; |
| 396 | bool m_bDualPlane = false; | 379 | bool m_bDualPlane = false; |
| 397 | uint32_t m_MaxWeight = 0; | 380 | u32 m_MaxWeight = 0; |
| 398 | bool m_bError = false; | 381 | bool m_bError = false; |
| 399 | bool m_bVoidExtentLDR = false; | 382 | bool m_bVoidExtentLDR = false; |
| 400 | bool m_bVoidExtentHDR = false; | 383 | bool m_bVoidExtentHDR = false; |
| 401 | 384 | ||
| 402 | uint32_t GetPackedBitSize() const { | 385 | u32 GetPackedBitSize() const { |
| 403 | // How many indices do we have? | 386 | // How many indices do we have? |
| 404 | uint32_t nIdxs = m_Height * m_Width; | 387 | u32 nIdxs = m_Height * m_Width; |
| 405 | if (m_bDualPlane) { | 388 | if (m_bDualPlane) { |
| 406 | nIdxs *= 2; | 389 | nIdxs *= 2; |
| 407 | } | 390 | } |
| 408 | 391 | ||
| 409 | return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); | 392 | return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); |
| 410 | } | 393 | } |
| 411 | 394 | ||
| 412 | uint32_t GetNumWeightValues() const { | 395 | u32 GetNumWeightValues() const { |
| 413 | uint32_t ret = m_Width * m_Height; | 396 | u32 ret = m_Width * m_Height; |
| 414 | if (m_bDualPlane) { | 397 | if (m_bDualPlane) { |
| 415 | ret *= 2; | 398 | ret *= 2; |
| 416 | } | 399 | } |
| @@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 422 | TexelWeightParams params; | 405 | TexelWeightParams params; |
| 423 | 406 | ||
| 424 | // Read the entire block mode all at once | 407 | // Read the entire block mode all at once |
| 425 | uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); | 408 | u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); |
| 426 | 409 | ||
| 427 | // Does this match the void extent block mode? | 410 | // Does this match the void extent block mode? |
| 428 | if ((modeBits & 0x01FF) == 0x1FC) { | 411 | if ((modeBits & 0x01FF) == 0x1FC) { |
| @@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 457 | // of the block mode. Layout is determined by a number | 440 | // of the block mode. Layout is determined by a number |
| 458 | // between 0 and 9 corresponding to table C.2.8 of the | 441 | // between 0 and 9 corresponding to table C.2.8 of the |
| 459 | // ASTC spec. | 442 | // ASTC spec. |
| 460 | uint32_t layout = 0; | 443 | u32 layout = 0; |
| 461 | 444 | ||
| 462 | if ((modeBits & 0x1) || (modeBits & 0x2)) { | 445 | if ((modeBits & 0x1) || (modeBits & 0x2)) { |
| 463 | // layout is in [0-4] | 446 | // layout is in [0-4] |
| @@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 509 | assert(layout < 10); | 492 | assert(layout < 10); |
| 510 | 493 | ||
| 511 | // Determine R | 494 | // Determine R |
| 512 | uint32_t R = !!(modeBits & 0x10); | 495 | u32 R = !!(modeBits & 0x10); |
| 513 | if (layout < 5) { | 496 | if (layout < 5) { |
| 514 | R |= (modeBits & 0x3) << 1; | 497 | R |= (modeBits & 0x3) << 1; |
| 515 | } else { | 498 | } else { |
| @@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 520 | // Determine width & height | 503 | // Determine width & height |
| 521 | switch (layout) { | 504 | switch (layout) { |
| 522 | case 0: { | 505 | case 0: { |
| 523 | uint32_t A = (modeBits >> 5) & 0x3; | 506 | u32 A = (modeBits >> 5) & 0x3; |
| 524 | uint32_t B = (modeBits >> 7) & 0x3; | 507 | u32 B = (modeBits >> 7) & 0x3; |
| 525 | params.m_Width = B + 4; | 508 | params.m_Width = B + 4; |
| 526 | params.m_Height = A + 2; | 509 | params.m_Height = A + 2; |
| 527 | break; | 510 | break; |
| 528 | } | 511 | } |
| 529 | 512 | ||
| 530 | case 1: { | 513 | case 1: { |
| 531 | uint32_t A = (modeBits >> 5) & 0x3; | 514 | u32 A = (modeBits >> 5) & 0x3; |
| 532 | uint32_t B = (modeBits >> 7) & 0x3; | 515 | u32 B = (modeBits >> 7) & 0x3; |
| 533 | params.m_Width = B + 8; | 516 | params.m_Width = B + 8; |
| 534 | params.m_Height = A + 2; | 517 | params.m_Height = A + 2; |
| 535 | break; | 518 | break; |
| 536 | } | 519 | } |
| 537 | 520 | ||
| 538 | case 2: { | 521 | case 2: { |
| 539 | uint32_t A = (modeBits >> 5) & 0x3; | 522 | u32 A = (modeBits >> 5) & 0x3; |
| 540 | uint32_t B = (modeBits >> 7) & 0x3; | 523 | u32 B = (modeBits >> 7) & 0x3; |
| 541 | params.m_Width = A + 2; | 524 | params.m_Width = A + 2; |
| 542 | params.m_Height = B + 8; | 525 | params.m_Height = B + 8; |
| 543 | break; | 526 | break; |
| 544 | } | 527 | } |
| 545 | 528 | ||
| 546 | case 3: { | 529 | case 3: { |
| 547 | uint32_t A = (modeBits >> 5) & 0x3; | 530 | u32 A = (modeBits >> 5) & 0x3; |
| 548 | uint32_t B = (modeBits >> 7) & 0x1; | 531 | u32 B = (modeBits >> 7) & 0x1; |
| 549 | params.m_Width = A + 2; | 532 | params.m_Width = A + 2; |
| 550 | params.m_Height = B + 6; | 533 | params.m_Height = B + 6; |
| 551 | break; | 534 | break; |
| 552 | } | 535 | } |
| 553 | 536 | ||
| 554 | case 4: { | 537 | case 4: { |
| 555 | uint32_t A = (modeBits >> 5) & 0x3; | 538 | u32 A = (modeBits >> 5) & 0x3; |
| 556 | uint32_t B = (modeBits >> 7) & 0x1; | 539 | u32 B = (modeBits >> 7) & 0x1; |
| 557 | params.m_Width = B + 2; | 540 | params.m_Width = B + 2; |
| 558 | params.m_Height = A + 2; | 541 | params.m_Height = A + 2; |
| 559 | break; | 542 | break; |
| 560 | } | 543 | } |
| 561 | 544 | ||
| 562 | case 5: { | 545 | case 5: { |
| 563 | uint32_t A = (modeBits >> 5) & 0x3; | 546 | u32 A = (modeBits >> 5) & 0x3; |
| 564 | params.m_Width = 12; | 547 | params.m_Width = 12; |
| 565 | params.m_Height = A + 2; | 548 | params.m_Height = A + 2; |
| 566 | break; | 549 | break; |
| 567 | } | 550 | } |
| 568 | 551 | ||
| 569 | case 6: { | 552 | case 6: { |
| 570 | uint32_t A = (modeBits >> 5) & 0x3; | 553 | u32 A = (modeBits >> 5) & 0x3; |
| 571 | params.m_Width = A + 2; | 554 | params.m_Width = A + 2; |
| 572 | params.m_Height = 12; | 555 | params.m_Height = 12; |
| 573 | break; | 556 | break; |
| @@ -586,8 +569,8 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 586 | } | 569 | } |
| 587 | 570 | ||
| 588 | case 9: { | 571 | case 9: { |
| 589 | uint32_t A = (modeBits >> 5) & 0x3; | 572 | u32 A = (modeBits >> 5) & 0x3; |
| 590 | uint32_t B = (modeBits >> 9) & 0x3; | 573 | u32 B = (modeBits >> 9) & 0x3; |
| 591 | params.m_Width = A + 6; | 574 | params.m_Width = A + 6; |
| 592 | params.m_Height = B + 6; | 575 | params.m_Height = B + 6; |
| 593 | break; | 576 | break; |
| @@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 605 | bool H = (layout != 9) && (modeBits & 0x200); | 588 | bool H = (layout != 9) && (modeBits & 0x200); |
| 606 | 589 | ||
| 607 | if (H) { | 590 | if (H) { |
| 608 | const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; | 591 | const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; |
| 609 | params.m_MaxWeight = maxWeights[R - 2]; | 592 | params.m_MaxWeight = maxWeights[R - 2]; |
| 610 | } else { | 593 | } else { |
| 611 | const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; | 594 | const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; |
| 612 | params.m_MaxWeight = maxWeights[R - 2]; | 595 | params.m_MaxWeight = maxWeights[R - 2]; |
| 613 | } | 596 | } |
| 614 | 597 | ||
| @@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 617 | return params; | 600 | return params; |
| 618 | } | 601 | } |
| 619 | 602 | ||
| 620 | static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, | 603 | static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, |
| 621 | uint32_t blockHeight) { | 604 | u32 blockHeight) { |
| 622 | // Don't actually care about the void extent, just read the bits... | 605 | // Don't actually care about the void extent, just read the bits... |
| 623 | for (int i = 0; i < 4; ++i) { | 606 | for (s32 i = 0; i < 4; ++i) { |
| 624 | strm.ReadBits(13); | 607 | strm.ReadBits<13>(); |
| 625 | } | 608 | } |
| 626 | 609 | ||
| 627 | // Decode the RGBA components and renormalize them to the range [0, 255] | 610 | // Decode the RGBA components and renormalize them to the range [0, 255] |
| 628 | uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); | 611 | u16 r = static_cast<u16>(strm.ReadBits<16>()); |
| 629 | uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); | 612 | u16 g = static_cast<u16>(strm.ReadBits<16>()); |
| 630 | uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); | 613 | u16 b = static_cast<u16>(strm.ReadBits<16>()); |
| 631 | uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); | 614 | u16 a = static_cast<u16>(strm.ReadBits<16>()); |
| 632 | 615 | ||
| 633 | uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | | 616 | u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | |
| 634 | (static_cast<uint32_t>(a) & 0xFF00) << 16; | 617 | (static_cast<u32>(a) & 0xFF00) << 16; |
| 635 | 618 | ||
| 636 | for (uint32_t j = 0; j < blockHeight; j++) { | 619 | for (u32 j = 0; j < blockHeight; j++) { |
| 637 | for (uint32_t i = 0; i < blockWidth; i++) { | 620 | for (u32 i = 0; i < blockWidth; i++) { |
| 638 | outBuf[j * blockWidth + i] = rgba; | 621 | outBuf[j * blockWidth + i] = rgba; |
| 639 | } | 622 | } |
| 640 | } | 623 | } |
| 641 | } | 624 | } |
| 642 | 625 | ||
| 643 | static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { | 626 | static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { |
| 644 | for (uint32_t j = 0; j < blockHeight; j++) { | 627 | for (u32 j = 0; j < blockHeight; j++) { |
| 645 | for (uint32_t i = 0; i < blockWidth; i++) { | 628 | for (u32 i = 0; i < blockWidth; i++) { |
| 646 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | 629 | outBuf[j * blockWidth + i] = 0xFFFF00FF; |
| 647 | } | 630 | } |
| 648 | } | 631 | } |
| @@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh | |||
| 651 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] |
| 652 | // is the same as [(numBits - 1):0] and repeats all the way down. | 635 | // is the same as [(numBits - 1):0] and repeats all the way down. |
| 653 | template <typename IntType> | 636 | template <typename IntType> |
| 654 | static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | 637 | static IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
| 655 | if (numBits == 0) | 638 | if (numBits == 0) |
| 656 | return 0; | 639 | return 0; |
| 657 | if (toBit == 0) | 640 | if (toBit == 0) |
| 658 | return 0; | 641 | return 0; |
| 659 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); | 642 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); |
| 660 | IntType res = v; | 643 | IntType res = v; |
| 661 | uint32_t reslen = numBits; | 644 | u32 reslen = numBits; |
| 662 | while (reslen < toBit) { | 645 | while (reslen < toBit) { |
| 663 | uint32_t comp = 0; | 646 | u32 comp = 0; |
| 664 | if (numBits > toBit - reslen) { | 647 | if (numBits > toBit - reslen) { |
| 665 | uint32_t newshift = toBit - reslen; | 648 | u32 newshift = toBit - reslen; |
| 666 | comp = numBits - newshift; | 649 | comp = numBits - newshift; |
| 667 | numBits = newshift; | 650 | numBits = newshift; |
| 668 | } | 651 | } |
| @@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | |||
| 675 | 658 | ||
| 676 | class Pixel { | 659 | class Pixel { |
| 677 | protected: | 660 | protected: |
| 678 | using ChannelType = int16_t; | 661 | using ChannelType = s16; |
| 679 | uint8_t m_BitDepth[4] = {8, 8, 8, 8}; | 662 | u8 m_BitDepth[4] = {8, 8, 8, 8}; |
| 680 | int16_t color[4] = {}; | 663 | s16 color[4] = {}; |
| 681 | 664 | ||
| 682 | public: | 665 | public: |
| 683 | Pixel() = default; | 666 | Pixel() = default; |
| 684 | Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) | 667 | Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) |
| 685 | : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, | 668 | : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, |
| 686 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), | 669 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), |
| 687 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} | 670 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} |
| 688 | 671 | ||
| @@ -691,22 +674,22 @@ public: | |||
| 691 | // significant bits when going from larger to smaller bit depth | 674 | // significant bits when going from larger to smaller bit depth |
| 692 | // or by repeating the most significant bits when going from | 675 | // or by repeating the most significant bits when going from |
| 693 | // smaller to larger bit depths. | 676 | // smaller to larger bit depths. |
| 694 | void ChangeBitDepth(const uint8_t (&depth)[4]) { | 677 | void ChangeBitDepth(const u8 (&depth)[4]) { |
| 695 | for (uint32_t i = 0; i < 4; i++) { | 678 | for (u32 i = 0; i < 4; i++) { |
| 696 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); | 679 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); |
| 697 | m_BitDepth[i] = depth[i]; | 680 | m_BitDepth[i] = depth[i]; |
| 698 | } | 681 | } |
| 699 | } | 682 | } |
| 700 | 683 | ||
| 701 | template <typename IntType> | 684 | template <typename IntType> |
| 702 | static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { | 685 | static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { |
| 703 | float denominator = static_cast<float>((1 << bitDepth) - 1); | 686 | float denominator = static_cast<float>((1 << bitDepth) - 1); |
| 704 | return static_cast<float>(channel) / denominator; | 687 | return static_cast<float>(channel) / denominator; |
| 705 | } | 688 | } |
| 706 | 689 | ||
| 707 | // Changes the bit depth of a single component. See the comment | 690 | // Changes the bit depth of a single component. See the comment |
| 708 | // above for how we do this. | 691 | // above for how we do this. |
| 709 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { | 692 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { |
| 710 | assert(newDepth <= 8); | 693 | assert(newDepth <= 8); |
| 711 | assert(oldDepth <= 8); | 694 | assert(oldDepth <= 8); |
| 712 | 695 | ||
| @@ -722,12 +705,11 @@ public: | |||
| 722 | if (newDepth == 0) { | 705 | if (newDepth == 0) { |
| 723 | return 0xFF; | 706 | return 0xFF; |
| 724 | } else { | 707 | } else { |
| 725 | uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); | 708 | u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); |
| 726 | uint16_t v = static_cast<uint16_t>(val); | 709 | u16 v = static_cast<u16>(val); |
| 727 | v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | 710 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
| 728 | v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), | 711 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); |
| 729 | static_cast<uint16_t>((1 << newDepth) - 1)); | 712 | return static_cast<u8>(v); |
| 730 | return static_cast<uint8_t>(v); | ||
| 731 | } | 713 | } |
| 732 | } | 714 | } |
| 733 | 715 | ||
| @@ -759,15 +741,15 @@ public: | |||
| 759 | ChannelType& B() { | 741 | ChannelType& B() { |
| 760 | return color[3]; | 742 | return color[3]; |
| 761 | } | 743 | } |
| 762 | const ChannelType& Component(uint32_t idx) const { | 744 | const ChannelType& Component(u32 idx) const { |
| 763 | return color[idx]; | 745 | return color[idx]; |
| 764 | } | 746 | } |
| 765 | ChannelType& Component(uint32_t idx) { | 747 | ChannelType& Component(u32 idx) { |
| 766 | return color[idx]; | 748 | return color[idx]; |
| 767 | } | 749 | } |
| 768 | 750 | ||
| 769 | void GetBitDepth(uint8_t (&outDepth)[4]) const { | 751 | void GetBitDepth(u8 (&outDepth)[4]) const { |
| 770 | for (int i = 0; i < 4; i++) { | 752 | for (s32 i = 0; i < 4; i++) { |
| 771 | outDepth[i] = m_BitDepth[i]; | 753 | outDepth[i] = m_BitDepth[i]; |
| 772 | } | 754 | } |
| 773 | } | 755 | } |
| @@ -776,12 +758,12 @@ public: | |||
| 776 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume | 758 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume |
| 777 | // that the architecture is little-endian, so the alpha channel will end | 759 | // that the architecture is little-endian, so the alpha channel will end |
| 778 | // up in the most-significant byte. | 760 | // up in the most-significant byte. |
| 779 | uint32_t Pack() const { | 761 | u32 Pack() const { |
| 780 | Pixel eightBit(*this); | 762 | Pixel eightBit(*this); |
| 781 | const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; | 763 | const u8 eightBitDepth[4] = {8, 8, 8, 8}; |
| 782 | eightBit.ChangeBitDepth(eightBitDepth); | 764 | eightBit.ChangeBitDepth(eightBitDepth); |
| 783 | 765 | ||
| 784 | uint32_t r = 0; | 766 | u32 r = 0; |
| 785 | r |= eightBit.A(); | 767 | r |= eightBit.A(); |
| 786 | r <<= 8; | 768 | r <<= 8; |
| 787 | r |= eightBit.B(); | 769 | r |= eightBit.B(); |
| @@ -794,7 +776,7 @@ public: | |||
| 794 | 776 | ||
| 795 | // Clamps the pixel to the range [0,255] | 777 | // Clamps the pixel to the range [0,255] |
| 796 | void ClampByte() { | 778 | void ClampByte() { |
| 797 | for (uint32_t i = 0; i < 4; i++) { | 779 | for (u32 i = 0; i < 4; i++) { |
| 798 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | 780 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); |
| 799 | } | 781 | } |
| 800 | } | 782 | } |
| @@ -804,24 +786,24 @@ public: | |||
| 804 | } | 786 | } |
| 805 | }; | 787 | }; |
| 806 | 788 | ||
| 807 | static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, | 789 | static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions, |
| 808 | const uint32_t nPartitions, const uint32_t nBitsForColorData) { | 790 | const u32 nBitsForColorData) { |
| 809 | // First figure out how many color values we have | 791 | // First figure out how many color values we have |
| 810 | uint32_t nValues = 0; | 792 | u32 nValues = 0; |
| 811 | for (uint32_t i = 0; i < nPartitions; i++) { | 793 | for (u32 i = 0; i < nPartitions; i++) { |
| 812 | nValues += ((modes[i] >> 2) + 1) << 1; | 794 | nValues += ((modes[i] >> 2) + 1) << 1; |
| 813 | } | 795 | } |
| 814 | 796 | ||
| 815 | // Then based on the number of values and the remaining number of bits, | 797 | // Then based on the number of values and the remaining number of bits, |
| 816 | // figure out the max value for each of them... | 798 | // figure out the max value for each of them... |
| 817 | uint32_t range = 256; | 799 | u32 range = 256; |
| 818 | while (--range > 0) { | 800 | while (--range > 0) { |
| 819 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); | 801 | IntegerEncodedValue val = EncodingsValues[range]; |
| 820 | uint32_t bitLength = val.GetBitLength(nValues); | 802 | u32 bitLength = val.GetBitLength(nValues); |
| 821 | if (bitLength <= nBitsForColorData) { | 803 | if (bitLength <= nBitsForColorData) { |
| 822 | // Find the smallest possible range that matches the given encoding | 804 | // Find the smallest possible range that matches the given encoding |
| 823 | while (--range > 0) { | 805 | while (--range > 0) { |
| 824 | IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); | 806 | IntegerEncodedValue newval = EncodingsValues[range]; |
| 825 | if (!newval.MatchesEncoding(val)) { | 807 | if (!newval.MatchesEncoding(val)) { |
| 826 | break; | 808 | break; |
| 827 | } | 809 | } |
| @@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 835 | 817 | ||
| 836 | // We now have enough to decode our integer sequence. | 818 | // We now have enough to decode our integer sequence. |
| 837 | std::vector<IntegerEncodedValue> decodedColorValues; | 819 | std::vector<IntegerEncodedValue> decodedColorValues; |
| 820 | decodedColorValues.reserve(32); | ||
| 821 | |||
| 838 | InputBitStream colorStream(data); | 822 | InputBitStream colorStream(data); |
| 839 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 823 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| 840 | 824 | ||
| 841 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | 825 | // Once we have the decoded values, we need to dequantize them to the 0-255 range |
| 842 | // This procedure is outlined in ASTC spec C.2.13 | 826 | // This procedure is outlined in ASTC spec C.2.13 |
| 843 | uint32_t outIdx = 0; | 827 | u32 outIdx = 0; |
| 844 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { | 828 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { |
| 845 | // Have we already decoded all that we need? | 829 | // Have we already decoded all that we need? |
| 846 | if (outIdx >= nValues) { | 830 | if (outIdx >= nValues) { |
| @@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 848 | } | 832 | } |
| 849 | 833 | ||
| 850 | const IntegerEncodedValue& val = *itr; | 834 | const IntegerEncodedValue& val = *itr; |
| 851 | uint32_t bitlen = val.BaseBitLength(); | 835 | u32 bitlen = val.num_bits; |
| 852 | uint32_t bitval = val.GetBitValue(); | 836 | u32 bitval = val.bit_value; |
| 853 | 837 | ||
| 854 | assert(bitlen >= 1); | 838 | assert(bitlen >= 1); |
| 855 | 839 | ||
| 856 | uint32_t A = 0, B = 0, C = 0, D = 0; | 840 | u32 A = 0, B = 0, C = 0, D = 0; |
| 857 | // A is just the lsb replicated 9 times. | 841 | // A is just the lsb replicated 9 times. |
| 858 | A = Replicate(bitval & 1, 1, 9); | 842 | A = Replicate(bitval & 1, 1, 9); |
| 859 | 843 | ||
| 860 | switch (val.GetEncoding()) { | 844 | switch (val.encoding) { |
| 861 | // Replicate bits | 845 | // Replicate bits |
| 862 | case eIntegerEncoding_JustBits: | 846 | case IntegerEncoding::JustBits: |
| 863 | out[outIdx++] = Replicate(bitval, bitlen, 8); | 847 | out[outIdx++] = Replicate(bitval, bitlen, 8); |
| 864 | break; | 848 | break; |
| 865 | 849 | ||
| 866 | // Use algorithm in C.2.13 | 850 | // Use algorithm in C.2.13 |
| 867 | case eIntegerEncoding_Trit: { | 851 | case IntegerEncoding::Trit: { |
| 868 | 852 | ||
| 869 | D = val.GetTritValue(); | 853 | D = val.trit_value; |
| 870 | 854 | ||
| 871 | switch (bitlen) { | 855 | switch (bitlen) { |
| 872 | case 1: { | 856 | case 1: { |
| @@ -876,35 +860,35 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 876 | case 2: { | 860 | case 2: { |
| 877 | C = 93; | 861 | C = 93; |
| 878 | // B = b000b0bb0 | 862 | // B = b000b0bb0 |
| 879 | uint32_t b = (bitval >> 1) & 1; | 863 | u32 b = (bitval >> 1) & 1; |
| 880 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | 864 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); |
| 881 | } break; | 865 | } break; |
| 882 | 866 | ||
| 883 | case 3: { | 867 | case 3: { |
| 884 | C = 44; | 868 | C = 44; |
| 885 | // B = cb000cbcb | 869 | // B = cb000cbcb |
| 886 | uint32_t cb = (bitval >> 1) & 3; | 870 | u32 cb = (bitval >> 1) & 3; |
| 887 | B = (cb << 7) | (cb << 2) | cb; | 871 | B = (cb << 7) | (cb << 2) | cb; |
| 888 | } break; | 872 | } break; |
| 889 | 873 | ||
| 890 | case 4: { | 874 | case 4: { |
| 891 | C = 22; | 875 | C = 22; |
| 892 | // B = dcb000dcb | 876 | // B = dcb000dcb |
| 893 | uint32_t dcb = (bitval >> 1) & 7; | 877 | u32 dcb = (bitval >> 1) & 7; |
| 894 | B = (dcb << 6) | dcb; | 878 | B = (dcb << 6) | dcb; |
| 895 | } break; | 879 | } break; |
| 896 | 880 | ||
| 897 | case 5: { | 881 | case 5: { |
| 898 | C = 11; | 882 | C = 11; |
| 899 | // B = edcb000ed | 883 | // B = edcb000ed |
| 900 | uint32_t edcb = (bitval >> 1) & 0xF; | 884 | u32 edcb = (bitval >> 1) & 0xF; |
| 901 | B = (edcb << 5) | (edcb >> 2); | 885 | B = (edcb << 5) | (edcb >> 2); |
| 902 | } break; | 886 | } break; |
| 903 | 887 | ||
| 904 | case 6: { | 888 | case 6: { |
| 905 | C = 5; | 889 | C = 5; |
| 906 | // B = fedcb000f | 890 | // B = fedcb000f |
| 907 | uint32_t fedcb = (bitval >> 1) & 0x1F; | 891 | u32 fedcb = (bitval >> 1) & 0x1F; |
| 908 | B = (fedcb << 4) | (fedcb >> 4); | 892 | B = (fedcb << 4) | (fedcb >> 4); |
| 909 | } break; | 893 | } break; |
| 910 | 894 | ||
| @@ -912,12 +896,12 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 912 | assert(!"Unsupported trit encoding for color values!"); | 896 | assert(!"Unsupported trit encoding for color values!"); |
| 913 | break; | 897 | break; |
| 914 | } // switch(bitlen) | 898 | } // switch(bitlen) |
| 915 | } // case eIntegerEncoding_Trit | 899 | } // case IntegerEncoding::Trit |
| 916 | break; | 900 | break; |
| 917 | 901 | ||
| 918 | case eIntegerEncoding_Quint: { | 902 | case IntegerEncoding::Qus32: { |
| 919 | 903 | ||
| 920 | D = val.GetQuintValue(); | 904 | D = val.qus32_value; |
| 921 | 905 | ||
| 922 | switch (bitlen) { | 906 | switch (bitlen) { |
| 923 | case 1: { | 907 | case 1: { |
| @@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 927 | case 2: { | 911 | case 2: { |
| 928 | C = 54; | 912 | C = 54; |
| 929 | // B = b0000bb00 | 913 | // B = b0000bb00 |
| 930 | uint32_t b = (bitval >> 1) & 1; | 914 | u32 b = (bitval >> 1) & 1; |
| 931 | B = (b << 8) | (b << 3) | (b << 2); | 915 | B = (b << 8) | (b << 3) | (b << 2); |
| 932 | } break; | 916 | } break; |
| 933 | 917 | ||
| 934 | case 3: { | 918 | case 3: { |
| 935 | C = 26; | 919 | C = 26; |
| 936 | // B = cb0000cbc | 920 | // B = cb0000cbc |
| 937 | uint32_t cb = (bitval >> 1) & 3; | 921 | u32 cb = (bitval >> 1) & 3; |
| 938 | B = (cb << 7) | (cb << 1) | (cb >> 1); | 922 | B = (cb << 7) | (cb << 1) | (cb >> 1); |
| 939 | } break; | 923 | } break; |
| 940 | 924 | ||
| 941 | case 4: { | 925 | case 4: { |
| 942 | C = 13; | 926 | C = 13; |
| 943 | // B = dcb0000dc | 927 | // B = dcb0000dc |
| 944 | uint32_t dcb = (bitval >> 1) & 7; | 928 | u32 dcb = (bitval >> 1) & 7; |
| 945 | B = (dcb << 6) | (dcb >> 1); | 929 | B = (dcb << 6) | (dcb >> 1); |
| 946 | } break; | 930 | } break; |
| 947 | 931 | ||
| 948 | case 5: { | 932 | case 5: { |
| 949 | C = 6; | 933 | C = 6; |
| 950 | // B = edcb0000e | 934 | // B = edcb0000e |
| 951 | uint32_t edcb = (bitval >> 1) & 0xF; | 935 | u32 edcb = (bitval >> 1) & 0xF; |
| 952 | B = (edcb << 5) | (edcb >> 3); | 936 | B = (edcb << 5) | (edcb >> 3); |
| 953 | } break; | 937 | } break; |
| 954 | 938 | ||
| 955 | default: | 939 | default: |
| 956 | assert(!"Unsupported quint encoding for color values!"); | 940 | assert(!"Unsupported qus32 encoding for color values!"); |
| 957 | break; | 941 | break; |
| 958 | } // switch(bitlen) | 942 | } // switch(bitlen) |
| 959 | } // case eIntegerEncoding_Quint | 943 | } // case IntegerEncoding::Qus32 |
| 960 | break; | 944 | break; |
| 961 | } // switch(val.GetEncoding()) | 945 | } // switch(val.encoding) |
| 962 | 946 | ||
| 963 | if (val.GetEncoding() != eIntegerEncoding_JustBits) { | 947 | if (val.encoding != IntegerEncoding::JustBits) { |
| 964 | uint32_t T = D * C + B; | 948 | u32 T = D * C + B; |
| 965 | T ^= A; | 949 | T ^= A; |
| 966 | T = (A & 0x80) | (T >> 2); | 950 | T = (A & 0x80) | (T >> 2); |
| 967 | out[outIdx++] = T; | 951 | out[outIdx++] = T; |
| @@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 969 | } | 953 | } |
| 970 | 954 | ||
| 971 | // Make sure that each of our values is in the proper range... | 955 | // Make sure that each of our values is in the proper range... |
| 972 | for (uint32_t i = 0; i < nValues; i++) { | 956 | for (u32 i = 0; i < nValues; i++) { |
| 973 | assert(out[i] <= 255); | 957 | assert(out[i] <= 255); |
| 974 | } | 958 | } |
| 975 | } | 959 | } |
| 976 | 960 | ||
| 977 | static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | 961 | static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { |
| 978 | uint32_t bitval = val.GetBitValue(); | 962 | u32 bitval = val.bit_value; |
| 979 | uint32_t bitlen = val.BaseBitLength(); | 963 | u32 bitlen = val.num_bits; |
| 980 | 964 | ||
| 981 | uint32_t A = Replicate(bitval & 1, 1, 7); | 965 | u32 A = Replicate(bitval & 1, 1, 7); |
| 982 | uint32_t B = 0, C = 0, D = 0; | 966 | u32 B = 0, C = 0, D = 0; |
| 983 | 967 | ||
| 984 | uint32_t result = 0; | 968 | u32 result = 0; |
| 985 | switch (val.GetEncoding()) { | 969 | switch (val.encoding) { |
| 986 | case eIntegerEncoding_JustBits: | 970 | case IntegerEncoding::JustBits: |
| 987 | result = Replicate(bitval, bitlen, 6); | 971 | result = Replicate(bitval, bitlen, 6); |
| 988 | break; | 972 | break; |
| 989 | 973 | ||
| 990 | case eIntegerEncoding_Trit: { | 974 | case IntegerEncoding::Trit: { |
| 991 | D = val.GetTritValue(); | 975 | D = val.trit_value; |
| 992 | assert(D < 3); | 976 | assert(D < 3); |
| 993 | 977 | ||
| 994 | switch (bitlen) { | 978 | switch (bitlen) { |
| 995 | case 0: { | 979 | case 0: { |
| 996 | uint32_t results[3] = {0, 32, 63}; | 980 | u32 results[3] = {0, 32, 63}; |
| 997 | result = results[D]; | 981 | result = results[D]; |
| 998 | } break; | 982 | } break; |
| 999 | 983 | ||
| @@ -1003,13 +987,13 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1003 | 987 | ||
| 1004 | case 2: { | 988 | case 2: { |
| 1005 | C = 23; | 989 | C = 23; |
| 1006 | uint32_t b = (bitval >> 1) & 1; | 990 | u32 b = (bitval >> 1) & 1; |
| 1007 | B = (b << 6) | (b << 2) | b; | 991 | B = (b << 6) | (b << 2) | b; |
| 1008 | } break; | 992 | } break; |
| 1009 | 993 | ||
| 1010 | case 3: { | 994 | case 3: { |
| 1011 | C = 11; | 995 | C = 11; |
| 1012 | uint32_t cb = (bitval >> 1) & 3; | 996 | u32 cb = (bitval >> 1) & 3; |
| 1013 | B = (cb << 5) | cb; | 997 | B = (cb << 5) | cb; |
| 1014 | } break; | 998 | } break; |
| 1015 | 999 | ||
| @@ -1019,13 +1003,13 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1019 | } | 1003 | } |
| 1020 | } break; | 1004 | } break; |
| 1021 | 1005 | ||
| 1022 | case eIntegerEncoding_Quint: { | 1006 | case IntegerEncoding::Qus32: { |
| 1023 | D = val.GetQuintValue(); | 1007 | D = val.qus32_value; |
| 1024 | assert(D < 5); | 1008 | assert(D < 5); |
| 1025 | 1009 | ||
| 1026 | switch (bitlen) { | 1010 | switch (bitlen) { |
| 1027 | case 0: { | 1011 | case 0: { |
| 1028 | uint32_t results[5] = {0, 16, 32, 47, 63}; | 1012 | u32 results[5] = {0, 16, 32, 47, 63}; |
| 1029 | result = results[D]; | 1013 | result = results[D]; |
| 1030 | } break; | 1014 | } break; |
| 1031 | 1015 | ||
| @@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1035 | 1019 | ||
| 1036 | case 2: { | 1020 | case 2: { |
| 1037 | C = 13; | 1021 | C = 13; |
| 1038 | uint32_t b = (bitval >> 1) & 1; | 1022 | u32 b = (bitval >> 1) & 1; |
| 1039 | B = (b << 6) | (b << 1); | 1023 | B = (b << 6) | (b << 1); |
| 1040 | } break; | 1024 | } break; |
| 1041 | 1025 | ||
| 1042 | default: | 1026 | default: |
| 1043 | assert(!"Invalid quint encoding for texel weight"); | 1027 | assert(!"Invalid qus32 encoding for texel weight"); |
| 1044 | break; | 1028 | break; |
| 1045 | } | 1029 | } |
| 1046 | } break; | 1030 | } break; |
| 1047 | } | 1031 | } |
| 1048 | 1032 | ||
| 1049 | if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { | 1033 | if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { |
| 1050 | // Decode the value... | 1034 | // Decode the value... |
| 1051 | result = D * C + B; | 1035 | result = D * C + B; |
| 1052 | result ^= A; | 1036 | result ^= A; |
| @@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1063 | return result; | 1047 | return result; |
| 1064 | } | 1048 | } |
| 1065 | 1049 | ||
| 1066 | static void UnquantizeTexelWeights(uint32_t out[2][144], | 1050 | static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, |
| 1067 | const std::vector<IntegerEncodedValue>& weights, | 1051 | const TexelWeightParams& params, const u32 blockWidth, |
| 1068 | const TexelWeightParams& params, const uint32_t blockWidth, | 1052 | const u32 blockHeight) { |
| 1069 | const uint32_t blockHeight) { | 1053 | u32 weightIdx = 0; |
| 1070 | uint32_t weightIdx = 0; | 1054 | u32 unquantized[2][144]; |
| 1071 | uint32_t unquantized[2][144]; | ||
| 1072 | 1055 | ||
| 1073 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { | 1056 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { |
| 1074 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); | 1057 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); |
| @@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], | |||
| 1086 | } | 1069 | } |
| 1087 | 1070 | ||
| 1088 | // Do infill if necessary (Section C.2.18) ... | 1071 | // Do infill if necessary (Section C.2.18) ... |
| 1089 | uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); | 1072 | u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); |
| 1090 | uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); | 1073 | u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); |
| 1091 | 1074 | ||
| 1092 | const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; | 1075 | const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; |
| 1093 | for (uint32_t plane = 0; plane < kPlaneScale; plane++) | 1076 | for (u32 plane = 0; plane < kPlaneScale; plane++) |
| 1094 | for (uint32_t t = 0; t < blockHeight; t++) | 1077 | for (u32 t = 0; t < blockHeight; t++) |
| 1095 | for (uint32_t s = 0; s < blockWidth; s++) { | 1078 | for (u32 s = 0; s < blockWidth; s++) { |
| 1096 | uint32_t cs = Ds * s; | 1079 | u32 cs = Ds * s; |
| 1097 | uint32_t ct = Dt * t; | 1080 | u32 ct = Dt * t; |
| 1098 | 1081 | ||
| 1099 | uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; | 1082 | u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; |
| 1100 | uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; | 1083 | u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; |
| 1101 | 1084 | ||
| 1102 | uint32_t js = gs >> 4; | 1085 | u32 js = gs >> 4; |
| 1103 | uint32_t fs = gs & 0xF; | 1086 | u32 fs = gs & 0xF; |
| 1104 | 1087 | ||
| 1105 | uint32_t jt = gt >> 4; | 1088 | u32 jt = gt >> 4; |
| 1106 | uint32_t ft = gt & 0x0F; | 1089 | u32 ft = gt & 0x0F; |
| 1107 | 1090 | ||
| 1108 | uint32_t w11 = (fs * ft + 8) >> 4; | 1091 | u32 w11 = (fs * ft + 8) >> 4; |
| 1109 | uint32_t w10 = ft - w11; | 1092 | u32 w10 = ft - w11; |
| 1110 | uint32_t w01 = fs - w11; | 1093 | u32 w01 = fs - w11; |
| 1111 | uint32_t w00 = 16 - fs - ft + w11; | 1094 | u32 w00 = 16 - fs - ft + w11; |
| 1112 | 1095 | ||
| 1113 | uint32_t v0 = js + jt * params.m_Width; | 1096 | u32 v0 = js + jt * params.m_Width; |
| 1114 | 1097 | ||
| 1115 | #define FIND_TEXEL(tidx, bidx) \ | 1098 | #define FIND_TEXEL(tidx, bidx) \ |
| 1116 | uint32_t p##bidx = 0; \ | 1099 | u32 p##bidx = 0; \ |
| 1117 | do { \ | 1100 | do { \ |
| 1118 | if ((tidx) < (params.m_Width * params.m_Height)) { \ | 1101 | if ((tidx) < (params.m_Width * params.m_Height)) { \ |
| 1119 | p##bidx = unquantized[plane][(tidx)]; \ | 1102 | p##bidx = unquantized[plane][(tidx)]; \ |
| @@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], | |||
| 1133 | } | 1116 | } |
| 1134 | 1117 | ||
| 1135 | // Transfers a bit as described in C.2.14 | 1118 | // Transfers a bit as described in C.2.14 |
| 1136 | static inline void BitTransferSigned(int32_t& a, int32_t& b) { | 1119 | static inline void BitTransferSigned(s32& a, s32& b) { |
| 1137 | b >>= 1; | 1120 | b >>= 1; |
| 1138 | b |= a & 0x80; | 1121 | b |= a & 0x80; |
| 1139 | a >>= 1; | 1122 | a >>= 1; |
| @@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) { | |||
| 1144 | 1127 | ||
| 1145 | // Adds more precision to the blue channel as described | 1128 | // Adds more precision to the blue channel as described |
| 1146 | // in C.2.14 | 1129 | // in C.2.14 |
| 1147 | static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { | 1130 | static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { |
| 1148 | return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), | 1131 | return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), |
| 1149 | static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); | 1132 | static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); |
| 1150 | } | 1133 | } |
| 1151 | 1134 | ||
| 1152 | // Partition selection functions as specified in | 1135 | // Partition selection functions as specified in |
| 1153 | // C.2.21 | 1136 | // C.2.21 |
| 1154 | static inline uint32_t hash52(uint32_t p) { | 1137 | static inline u32 hash52(u32 p) { |
| 1155 | p ^= p >> 15; | 1138 | p ^= p >> 15; |
| 1156 | p -= p << 17; | 1139 | p -= p << 17; |
| 1157 | p += p << 7; | 1140 | p += p << 7; |
| @@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) { | |||
| 1165 | return p; | 1148 | return p; |
| 1166 | } | 1149 | } |
| 1167 | 1150 | ||
| 1168 | static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | 1151 | static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { |
| 1169 | int32_t partitionCount, int32_t smallBlock) { | ||
| 1170 | if (1 == partitionCount) | 1152 | if (1 == partitionCount) |
| 1171 | return 0; | 1153 | return 0; |
| 1172 | 1154 | ||
| @@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1178 | 1160 | ||
| 1179 | seed += (partitionCount - 1) * 1024; | 1161 | seed += (partitionCount - 1) * 1024; |
| 1180 | 1162 | ||
| 1181 | uint32_t rnum = hash52(static_cast<uint32_t>(seed)); | 1163 | u32 rnum = hash52(static_cast<u32>(seed)); |
| 1182 | uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); | 1164 | u8 seed1 = static_cast<u8>(rnum & 0xF); |
| 1183 | uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); | 1165 | u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); |
| 1184 | uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); | 1166 | u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); |
| 1185 | uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); | 1167 | u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); |
| 1186 | uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); | 1168 | u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); |
| 1187 | uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); | 1169 | u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); |
| 1188 | uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); | 1170 | u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); |
| 1189 | uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); | 1171 | u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); |
| 1190 | uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); | 1172 | u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); |
| 1191 | uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); | 1173 | u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); |
| 1192 | uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); | 1174 | u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); |
| 1193 | uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); | 1175 | u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); |
| 1194 | 1176 | ||
| 1195 | seed1 = static_cast<uint8_t>(seed1 * seed1); | 1177 | seed1 = static_cast<u8>(seed1 * seed1); |
| 1196 | seed2 = static_cast<uint8_t>(seed2 * seed2); | 1178 | seed2 = static_cast<u8>(seed2 * seed2); |
| 1197 | seed3 = static_cast<uint8_t>(seed3 * seed3); | 1179 | seed3 = static_cast<u8>(seed3 * seed3); |
| 1198 | seed4 = static_cast<uint8_t>(seed4 * seed4); | 1180 | seed4 = static_cast<u8>(seed4 * seed4); |
| 1199 | seed5 = static_cast<uint8_t>(seed5 * seed5); | 1181 | seed5 = static_cast<u8>(seed5 * seed5); |
| 1200 | seed6 = static_cast<uint8_t>(seed6 * seed6); | 1182 | seed6 = static_cast<u8>(seed6 * seed6); |
| 1201 | seed7 = static_cast<uint8_t>(seed7 * seed7); | 1183 | seed7 = static_cast<u8>(seed7 * seed7); |
| 1202 | seed8 = static_cast<uint8_t>(seed8 * seed8); | 1184 | seed8 = static_cast<u8>(seed8 * seed8); |
| 1203 | seed9 = static_cast<uint8_t>(seed9 * seed9); | 1185 | seed9 = static_cast<u8>(seed9 * seed9); |
| 1204 | seed10 = static_cast<uint8_t>(seed10 * seed10); | 1186 | seed10 = static_cast<u8>(seed10 * seed10); |
| 1205 | seed11 = static_cast<uint8_t>(seed11 * seed11); | 1187 | seed11 = static_cast<u8>(seed11 * seed11); |
| 1206 | seed12 = static_cast<uint8_t>(seed12 * seed12); | 1188 | seed12 = static_cast<u8>(seed12 * seed12); |
| 1207 | 1189 | ||
| 1208 | int32_t sh1, sh2, sh3; | 1190 | s32 sh1, sh2, sh3; |
| 1209 | if (seed & 1) { | 1191 | if (seed & 1) { |
| 1210 | sh1 = (seed & 2) ? 4 : 5; | 1192 | sh1 = (seed & 2) ? 4 : 5; |
| 1211 | sh2 = (partitionCount == 3) ? 6 : 5; | 1193 | sh2 = (partitionCount == 3) ? 6 : 5; |
| @@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1215 | } | 1197 | } |
| 1216 | sh3 = (seed & 0x10) ? sh1 : sh2; | 1198 | sh3 = (seed & 0x10) ? sh1 : sh2; |
| 1217 | 1199 | ||
| 1218 | seed1 = static_cast<uint8_t>(seed1 >> sh1); | 1200 | seed1 = static_cast<u8>(seed1 >> sh1); |
| 1219 | seed2 = static_cast<uint8_t>(seed2 >> sh2); | 1201 | seed2 = static_cast<u8>(seed2 >> sh2); |
| 1220 | seed3 = static_cast<uint8_t>(seed3 >> sh1); | 1202 | seed3 = static_cast<u8>(seed3 >> sh1); |
| 1221 | seed4 = static_cast<uint8_t>(seed4 >> sh2); | 1203 | seed4 = static_cast<u8>(seed4 >> sh2); |
| 1222 | seed5 = static_cast<uint8_t>(seed5 >> sh1); | 1204 | seed5 = static_cast<u8>(seed5 >> sh1); |
| 1223 | seed6 = static_cast<uint8_t>(seed6 >> sh2); | 1205 | seed6 = static_cast<u8>(seed6 >> sh2); |
| 1224 | seed7 = static_cast<uint8_t>(seed7 >> sh1); | 1206 | seed7 = static_cast<u8>(seed7 >> sh1); |
| 1225 | seed8 = static_cast<uint8_t>(seed8 >> sh2); | 1207 | seed8 = static_cast<u8>(seed8 >> sh2); |
| 1226 | seed9 = static_cast<uint8_t>(seed9 >> sh3); | 1208 | seed9 = static_cast<u8>(seed9 >> sh3); |
| 1227 | seed10 = static_cast<uint8_t>(seed10 >> sh3); | 1209 | seed10 = static_cast<u8>(seed10 >> sh3); |
| 1228 | seed11 = static_cast<uint8_t>(seed11 >> sh3); | 1210 | seed11 = static_cast<u8>(seed11 >> sh3); |
| 1229 | seed12 = static_cast<uint8_t>(seed12 >> sh3); | 1211 | seed12 = static_cast<u8>(seed12 >> sh3); |
| 1230 | 1212 | ||
| 1231 | int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | 1213 | s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| 1232 | int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | 1214 | s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| 1233 | int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | 1215 | s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
| 1234 | int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | 1216 | s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
| 1235 | 1217 | ||
| 1236 | a &= 0x3F; | 1218 | a &= 0x3F; |
| 1237 | b &= 0x3F; | 1219 | b &= 0x3F; |
| @@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1252 | return 3; | 1234 | return 3; |
| 1253 | } | 1235 | } |
| 1254 | 1236 | ||
| 1255 | static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, | 1237 | static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { |
| 1256 | int32_t smallBlock) { | ||
| 1257 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); | 1238 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); |
| 1258 | } | 1239 | } |
| 1259 | 1240 | ||
| 1260 | // Section C.2.14 | 1241 | // Section C.2.14 |
| 1261 | static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, | 1242 | static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, |
| 1262 | uint32_t colorEndpointMode) { | 1243 | u32 colorEndpos32Mode) { |
| 1263 | #define READ_UINT_VALUES(N) \ | 1244 | #define READ_UINT_VALUES(N) \ |
| 1264 | uint32_t v[N]; \ | 1245 | u32 v[N]; \ |
| 1265 | for (uint32_t i = 0; i < N; i++) { \ | 1246 | for (u32 i = 0; i < N; i++) { \ |
| 1266 | v[i] = *(colorValues++); \ | 1247 | v[i] = *(colorValues++); \ |
| 1267 | } | 1248 | } |
| 1268 | 1249 | ||
| 1269 | #define READ_INT_VALUES(N) \ | 1250 | #define READ_INT_VALUES(N) \ |
| 1270 | int32_t v[N]; \ | 1251 | s32 v[N]; \ |
| 1271 | for (uint32_t i = 0; i < N; i++) { \ | 1252 | for (u32 i = 0; i < N; i++) { \ |
| 1272 | v[i] = static_cast<int32_t>(*(colorValues++)); \ | 1253 | v[i] = static_cast<s32>(*(colorValues++)); \ |
| 1273 | } | 1254 | } |
| 1274 | 1255 | ||
| 1275 | switch (colorEndpointMode) { | 1256 | switch (colorEndpos32Mode) { |
| 1276 | case 0: { | 1257 | case 0: { |
| 1277 | READ_UINT_VALUES(2) | 1258 | READ_UINT_VALUES(2) |
| 1278 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); | 1259 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); |
| @@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1281 | 1262 | ||
| 1282 | case 1: { | 1263 | case 1: { |
| 1283 | READ_UINT_VALUES(2) | 1264 | READ_UINT_VALUES(2) |
| 1284 | uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); | 1265 | u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 1285 | uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); | 1266 | u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); |
| 1286 | ep1 = Pixel(0xFF, L0, L0, L0); | 1267 | ep1 = Pixel(0xFF, L0, L0, L0); |
| 1287 | ep2 = Pixel(0xFF, L1, L1, L1); | 1268 | ep2 = Pixel(0xFF, L1, L1, L1); |
| 1288 | } break; | 1269 | } break; |
| @@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1371 | } break; | 1352 | } break; |
| 1372 | 1353 | ||
| 1373 | default: | 1354 | default: |
| 1374 | assert(!"Unsupported color endpoint mode (is it HDR?)"); | 1355 | assert(!"Unsupported color endpos32 mode (is it HDR?)"); |
| 1375 | break; | 1356 | break; |
| 1376 | } | 1357 | } |
| 1377 | 1358 | ||
| @@ -1379,8 +1360,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1379 | #undef READ_INT_VALUES | 1360 | #undef READ_INT_VALUES |
| 1380 | } | 1361 | } |
| 1381 | 1362 | ||
| 1382 | static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | 1363 | static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, |
| 1383 | const uint32_t blockHeight, uint32_t* outBuf) { | 1364 | u32* outBuf) { |
| 1384 | InputBitStream strm(inBuf); | 1365 | InputBitStream strm(inBuf); |
| 1385 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1366 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1386 | 1367 | ||
| @@ -1415,7 +1396,7 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1415 | } | 1396 | } |
| 1416 | 1397 | ||
| 1417 | // Read num partitions | 1398 | // Read num partitions |
| 1418 | uint32_t nPartitions = strm.ReadBits(2) + 1; | 1399 | u32 nPartitions = strm.ReadBits<2>() + 1; |
| 1419 | assert(nPartitions <= 4); | 1400 | assert(nPartitions <= 4); |
| 1420 | 1401 | ||
| 1421 | if (nPartitions == 4 && weightParams.m_bDualPlane) { | 1402 | if (nPartitions == 4 && weightParams.m_bDualPlane) { |
| @@ -1424,36 +1405,36 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1424 | return; | 1405 | return; |
| 1425 | } | 1406 | } |
| 1426 | 1407 | ||
| 1427 | // Based on the number of partitions, read the color endpoint mode for | 1408 | // Based on the number of partitions, read the color endpos32 mode for |
| 1428 | // each partition. | 1409 | // each partition. |
| 1429 | 1410 | ||
| 1430 | // Determine partitions, partition index, and color endpoint modes | 1411 | // Determine partitions, partition index, and color endpos32 modes |
| 1431 | int32_t planeIdx = -1; | 1412 | s32 planeIdx = -1; |
| 1432 | uint32_t partitionIndex; | 1413 | u32 partitionIndex; |
| 1433 | uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; | 1414 | u32 colorEndpos32Mode[4] = {0, 0, 0, 0}; |
| 1434 | 1415 | ||
| 1435 | // Define color data. | 1416 | // Define color data. |
| 1436 | uint8_t colorEndpointData[16]; | 1417 | u8 colorEndpos32Data[16]; |
| 1437 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | 1418 | memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data)); |
| 1438 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | 1419 | OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0); |
| 1439 | 1420 | ||
| 1440 | // Read extra config data... | 1421 | // Read extra config data... |
| 1441 | uint32_t baseCEM = 0; | 1422 | u32 baseCEM = 0; |
| 1442 | if (nPartitions == 1) { | 1423 | if (nPartitions == 1) { |
| 1443 | colorEndpointMode[0] = strm.ReadBits(4); | 1424 | colorEndpos32Mode[0] = strm.ReadBits<4>(); |
| 1444 | partitionIndex = 0; | 1425 | partitionIndex = 0; |
| 1445 | } else { | 1426 | } else { |
| 1446 | partitionIndex = strm.ReadBits(10); | 1427 | partitionIndex = strm.ReadBits<10>(); |
| 1447 | baseCEM = strm.ReadBits(6); | 1428 | baseCEM = strm.ReadBits<6>(); |
| 1448 | } | 1429 | } |
| 1449 | uint32_t baseMode = (baseCEM & 3); | 1430 | u32 baseMode = (baseCEM & 3); |
| 1450 | 1431 | ||
| 1451 | // Remaining bits are color endpoint data... | 1432 | // Remaining bits are color endpos32 data... |
| 1452 | uint32_t nWeightBits = weightParams.GetPackedBitSize(); | 1433 | u32 nWeightBits = weightParams.GetPackedBitSize(); |
| 1453 | int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); | 1434 | s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead()); |
| 1454 | 1435 | ||
| 1455 | // Consider extra bits prior to texel data... | 1436 | // Consider extra bits prior to texel data... |
| 1456 | uint32_t extraCEMbits = 0; | 1437 | u32 extraCEMbits = 0; |
| 1457 | if (baseMode) { | 1438 | if (baseMode) { |
| 1458 | switch (nPartitions) { | 1439 | switch (nPartitions) { |
| 1459 | case 2: | 1440 | case 2: |
| @@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1473 | remainingBits -= extraCEMbits; | 1454 | remainingBits -= extraCEMbits; |
| 1474 | 1455 | ||
| 1475 | // Do we have a dual plane situation? | 1456 | // Do we have a dual plane situation? |
| 1476 | uint32_t planeSelectorBits = 0; | 1457 | u32 planeSelectorBits = 0; |
| 1477 | if (weightParams.m_bDualPlane) { | 1458 | if (weightParams.m_bDualPlane) { |
| 1478 | planeSelectorBits = 2; | 1459 | planeSelectorBits = 2; |
| 1479 | } | 1460 | } |
| 1480 | remainingBits -= planeSelectorBits; | 1461 | remainingBits -= planeSelectorBits; |
| 1481 | 1462 | ||
| 1482 | // Read color data... | 1463 | // Read color data... |
| 1483 | uint32_t colorDataBits = remainingBits; | 1464 | u32 colorDataBits = remainingBits; |
| 1484 | while (remainingBits > 0) { | 1465 | while (remainingBits > 0) { |
| 1485 | uint32_t nb = std::min(remainingBits, 8); | 1466 | u32 nb = std::min(remainingBits, 8); |
| 1486 | uint32_t b = strm.ReadBits(nb); | 1467 | u32 b = strm.ReadBits(nb); |
| 1487 | colorEndpointStream.WriteBits(b, nb); | 1468 | colorEndpos32Stream.WriteBits(b, nb); |
| 1488 | remainingBits -= 8; | 1469 | remainingBits -= 8; |
| 1489 | } | 1470 | } |
| 1490 | 1471 | ||
| @@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1493 | 1474 | ||
| 1494 | // Read the rest of the CEM | 1475 | // Read the rest of the CEM |
| 1495 | if (baseMode) { | 1476 | if (baseMode) { |
| 1496 | uint32_t extraCEM = strm.ReadBits(extraCEMbits); | 1477 | u32 extraCEM = strm.ReadBits(extraCEMbits); |
| 1497 | uint32_t CEM = (extraCEM << 6) | baseCEM; | 1478 | u32 CEM = (extraCEM << 6) | baseCEM; |
| 1498 | CEM >>= 2; | 1479 | CEM >>= 2; |
| 1499 | 1480 | ||
| 1500 | bool C[4] = {0}; | 1481 | bool C[4] = {0}; |
| 1501 | for (uint32_t i = 0; i < nPartitions; i++) { | 1482 | for (u32 i = 0; i < nPartitions; i++) { |
| 1502 | C[i] = CEM & 1; | 1483 | C[i] = CEM & 1; |
| 1503 | CEM >>= 1; | 1484 | CEM >>= 1; |
| 1504 | } | 1485 | } |
| 1505 | 1486 | ||
| 1506 | uint8_t M[4] = {0}; | 1487 | u8 M[4] = {0}; |
| 1507 | for (uint32_t i = 0; i < nPartitions; i++) { | 1488 | for (u32 i = 0; i < nPartitions; i++) { |
| 1508 | M[i] = CEM & 3; | 1489 | M[i] = CEM & 3; |
| 1509 | CEM >>= 2; | 1490 | CEM >>= 2; |
| 1510 | assert(M[i] <= 3); | 1491 | assert(M[i] <= 3); |
| 1511 | } | 1492 | } |
| 1512 | 1493 | ||
| 1513 | for (uint32_t i = 0; i < nPartitions; i++) { | 1494 | for (u32 i = 0; i < nPartitions; i++) { |
| 1514 | colorEndpointMode[i] = baseMode; | 1495 | colorEndpos32Mode[i] = baseMode; |
| 1515 | if (!(C[i])) | 1496 | if (!(C[i])) |
| 1516 | colorEndpointMode[i] -= 1; | 1497 | colorEndpos32Mode[i] -= 1; |
| 1517 | colorEndpointMode[i] <<= 2; | 1498 | colorEndpos32Mode[i] <<= 2; |
| 1518 | colorEndpointMode[i] |= M[i]; | 1499 | colorEndpos32Mode[i] |= M[i]; |
| 1519 | } | 1500 | } |
| 1520 | } else if (nPartitions > 1) { | 1501 | } else if (nPartitions > 1) { |
| 1521 | uint32_t CEM = baseCEM >> 2; | 1502 | u32 CEM = baseCEM >> 2; |
| 1522 | for (uint32_t i = 0; i < nPartitions; i++) { | 1503 | for (u32 i = 0; i < nPartitions; i++) { |
| 1523 | colorEndpointMode[i] = CEM; | 1504 | colorEndpos32Mode[i] = CEM; |
| 1524 | } | 1505 | } |
| 1525 | } | 1506 | } |
| 1526 | 1507 | ||
| 1527 | // Make sure everything up till here is sane. | 1508 | // Make sure everything up till here is sane. |
| 1528 | for (uint32_t i = 0; i < nPartitions; i++) { | 1509 | for (u32 i = 0; i < nPartitions; i++) { |
| 1529 | assert(colorEndpointMode[i] < 16); | 1510 | assert(colorEndpos32Mode[i] < 16); |
| 1530 | } | 1511 | } |
| 1531 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); | 1512 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); |
| 1532 | 1513 | ||
| 1533 | // Decode both color data and texel weight data | 1514 | // Decode both color data and texel weight data |
| 1534 | uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions | 1515 | u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions |
| 1535 | DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, | 1516 | DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions, |
| 1536 | colorDataBits); | 1517 | colorDataBits); |
| 1537 | 1518 | ||
| 1538 | Pixel endpoints[4][2]; | 1519 | Pixel endpos32s[4][2]; |
| 1539 | const uint32_t* colorValuesPtr = colorValues; | 1520 | const u32* colorValuesPtr = colorValues; |
| 1540 | for (uint32_t i = 0; i < nPartitions; i++) { | 1521 | for (u32 i = 0; i < nPartitions; i++) { |
| 1541 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); | 1522 | ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]); |
| 1542 | } | 1523 | } |
| 1543 | 1524 | ||
| 1544 | // Read the texel weight data.. | 1525 | // Read the texel weight data.. |
| 1545 | uint8_t texelWeightData[16]; | 1526 | u8 texelWeightData[16]; |
| 1546 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); | 1527 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); |
| 1547 | 1528 | ||
| 1548 | // Reverse everything | 1529 | // Reverse everything |
| 1549 | for (uint32_t i = 0; i < 8; i++) { | 1530 | for (u32 i = 0; i < 8; i++) { |
| 1550 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits | 1531 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits |
| 1551 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 | 1532 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 |
| 1552 | unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); | 1533 | u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); |
| 1553 | unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); | 1534 | u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); |
| 1554 | #undef REVERSE_BYTE | 1535 | #undef REVERSE_BYTE |
| 1555 | 1536 | ||
| 1556 | texelWeightData[i] = b; | 1537 | texelWeightData[i] = b; |
| @@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1558 | } | 1539 | } |
| 1559 | 1540 | ||
| 1560 | // Make sure that higher non-texel bits are set to zero | 1541 | // Make sure that higher non-texel bits are set to zero |
| 1561 | const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1542 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1562 | texelWeightData[clearByteStart - 1] = | 1543 | texelWeightData[clearByteStart - 1] = |
| 1563 | texelWeightData[clearByteStart - 1] & | 1544 | texelWeightData[clearByteStart - 1] & |
| 1564 | static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1545 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1546 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1566 | 1547 | ||
| 1567 | std::vector<IntegerEncodedValue> texelWeightValues; | 1548 | std::vector<IntegerEncodedValue> texelWeightValues; |
| 1549 | texelWeightValues.reserve(64); | ||
| 1550 | |||
| 1568 | InputBitStream weightStream(texelWeightData); | 1551 | InputBitStream weightStream(texelWeightData); |
| 1569 | 1552 | ||
| 1570 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, | 1553 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, |
| 1571 | weightParams.m_MaxWeight, | 1554 | weightParams.GetNumWeightValues()); |
| 1572 | weightParams.GetNumWeightValues()); | ||
| 1573 | 1555 | ||
| 1574 | // Blocks can be at most 12x12, so we can have as many as 144 weights | 1556 | // Blocks can be at most 12x12, so we can have as many as 144 weights |
| 1575 | uint32_t weights[2][144]; | 1557 | u32 weights[2][144]; |
| 1576 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); | 1558 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); |
| 1577 | 1559 | ||
| 1578 | // Now that we have endpoints and weights, we can interpolate and generate | 1560 | // Now that we have endpos32s and weights, we can s32erpolate and generate |
| 1579 | // the proper decoding... | 1561 | // the proper decoding... |
| 1580 | for (uint32_t j = 0; j < blockHeight; j++) | 1562 | for (u32 j = 0; j < blockHeight; j++) |
| 1581 | for (uint32_t i = 0; i < blockWidth; i++) { | 1563 | for (u32 i = 0; i < blockWidth; i++) { |
| 1582 | uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, | 1564 | u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, |
| 1583 | (blockHeight * blockWidth) < 32); | 1565 | (blockHeight * blockWidth) < 32); |
| 1584 | assert(partition < nPartitions); | 1566 | assert(partition < nPartitions); |
| 1585 | 1567 | ||
| 1586 | Pixel p; | 1568 | Pixel p; |
| 1587 | for (uint32_t c = 0; c < 4; c++) { | 1569 | for (u32 c = 0; c < 4; c++) { |
| 1588 | uint32_t C0 = endpoints[partition][0].Component(c); | 1570 | u32 C0 = endpos32s[partition][0].Component(c); |
| 1589 | C0 = Replicate(C0, 8, 16); | 1571 | C0 = Replicate(C0, 8, 16); |
| 1590 | uint32_t C1 = endpoints[partition][1].Component(c); | 1572 | u32 C1 = endpos32s[partition][1].Component(c); |
| 1591 | C1 = Replicate(C1, 8, 16); | 1573 | C1 = Replicate(C1, 8, 16); |
| 1592 | 1574 | ||
| 1593 | uint32_t plane = 0; | 1575 | u32 plane = 0; |
| 1594 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | 1576 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |
| 1595 | plane = 1; | 1577 | plane = 1; |
| 1596 | } | 1578 | } |
| 1597 | 1579 | ||
| 1598 | uint32_t weight = weights[plane][j * blockWidth + i]; | 1580 | u32 weight = weights[plane][j * blockWidth + i]; |
| 1599 | uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; | 1581 | u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; |
| 1600 | if (C == 65535) { | 1582 | if (C == 65535) { |
| 1601 | p.Component(c) = 255; | 1583 | p.Component(c) = 255; |
| 1602 | } else { | 1584 | } else { |
| 1603 | double Cf = static_cast<double>(C); | 1585 | double Cf = static_cast<double>(C); |
| 1604 | p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); | 1586 | p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); |
| 1605 | } | 1587 | } |
| 1606 | } | 1588 | } |
| 1607 | 1589 | ||
| @@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1613 | 1595 | ||
| 1614 | namespace Tegra::Texture::ASTC { | 1596 | namespace Tegra::Texture::ASTC { |
| 1615 | 1597 | ||
| 1616 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, | 1598 | std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, |
| 1617 | uint32_t depth, uint32_t block_width, uint32_t block_height) { | 1599 | u32 block_height) { |
| 1618 | uint32_t blockIdx = 0; | 1600 | u32 blockIdx = 0; |
| 1619 | std::size_t depth_offset = 0; | 1601 | std::size_t depth_offset = 0; |
| 1620 | std::vector<uint8_t> outData(height * width * depth * 4); | 1602 | std::vector<u8> outData(height * width * depth * 4); |
| 1621 | for (uint32_t k = 0; k < depth; k++) { | 1603 | for (u32 k = 0; k < depth; k++) { |
| 1622 | for (uint32_t j = 0; j < height; j += block_height) { | 1604 | for (u32 j = 0; j < height; j += block_height) { |
| 1623 | for (uint32_t i = 0; i < width; i += block_width) { | 1605 | for (u32 i = 0; i < width; i += block_width) { |
| 1624 | 1606 | ||
| 1625 | const uint8_t* blockPtr = data + blockIdx * 16; | 1607 | const u8* blockPtr = data + blockIdx * 16; |
| 1626 | 1608 | ||
| 1627 | // Blocks can be at most 12x12 | 1609 | // Blocks can be at most 12x12 |
| 1628 | uint32_t uncompData[144]; | 1610 | u32 uncompData[144]; |
| 1629 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1611 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); |
| 1630 | 1612 | ||
| 1631 | uint32_t decompWidth = std::min(block_width, width - i); | 1613 | u32 decompWidth = std::min(block_width, width - i); |
| 1632 | uint32_t decompHeight = std::min(block_height, height - j); | 1614 | u32 decompHeight = std::min(block_height, height - j); |
| 1633 | 1615 | ||
| 1634 | uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; | 1616 | u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; |
| 1635 | for (uint32_t jj = 0; jj < decompHeight; jj++) { | 1617 | for (u32 jj = 0; jj < decompHeight; jj++) { |
| 1636 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); | 1618 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); |
| 1637 | } | 1619 | } |
| 1638 | 1620 | ||