diff options
| author | 2021-03-25 16:53:51 -0400 | |
|---|---|---|
| committer | 2021-03-25 16:53:51 -0400 | |
| commit | 2f83d9a61bca42d9ef24074beb2b11b19bd4cecd (patch) | |
| tree | 514e40eb750280c2e3025f9301befb6f8c9b46e9 /src/video_core/textures/astc.cpp | |
| parent | astc_decoder: Reimplement Layers (diff) | |
| download | yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.tar.gz yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.tar.xz yuzu-2f83d9a61bca42d9ef24074beb2b11b19bd4cecd.zip | |
astc_decoder: Refactor for style and more efficient memory use
Diffstat (limited to 'src/video_core/textures/astc.cpp')
| -rw-r--r-- | src/video_core/textures/astc.cpp | 1710 |
1 files changed, 0 insertions, 1710 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp deleted file mode 100644 index 3625b666c..000000000 --- a/src/video_core/textures/astc.cpp +++ /dev/null | |||
| @@ -1,1710 +0,0 @@ | |||
| 1 | // Copyright 2016 The University of North Carolina at Chapel Hill | ||
| 2 | // | ||
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 4 | // you may not use this file except in compliance with the License. | ||
| 5 | // You may obtain a copy of the License at | ||
| 6 | // | ||
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
| 8 | // | ||
| 9 | // Unless required by applicable law or agreed to in writing, software | ||
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 12 | // See the License for the specific language governing permissions and | ||
| 13 | // limitations under the License. | ||
| 14 | // | ||
| 15 | // Please send all BUG REPORTS to <pavel@cs.unc.edu>. | ||
| 16 | // <http://gamma.cs.unc.edu/FasTC/> | ||
| 17 | |||
| 18 | #include <algorithm> | ||
| 19 | #include <cassert> | ||
| 20 | #include <cstring> | ||
| 21 | #include <span> | ||
| 22 | #include <vector> | ||
| 23 | |||
| 24 | #include <boost/container/static_vector.hpp> | ||
| 25 | |||
| 26 | #include "common/common_types.h" | ||
| 27 | |||
| 28 | #include "video_core/textures/astc.h" | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | /// Count the number of bits set in a number. | ||
| 33 | constexpr u32 Popcnt(u32 n) { | ||
| 34 | u32 c = 0; | ||
| 35 | for (; n; c++) { | ||
| 36 | n &= n - 1; | ||
| 37 | } | ||
| 38 | return c; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | class InputBitStream { | ||
| 44 | public: | ||
| 45 | constexpr explicit InputBitStream(std::span<const u8> data, size_t start_offset = 0) | ||
| 46 | : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {} | ||
| 47 | |||
| 48 | constexpr size_t GetBitsRead() const { | ||
| 49 | return bits_read; | ||
| 50 | } | ||
| 51 | |||
| 52 | constexpr bool ReadBit() { | ||
| 53 | if (bits_read >= total_bits * 8) { | ||
| 54 | return 0; | ||
| 55 | } | ||
| 56 | const bool bit = ((*cur_byte >> next_bit) & 1) != 0; | ||
| 57 | ++next_bit; | ||
| 58 | while (next_bit >= 8) { | ||
| 59 | next_bit -= 8; | ||
| 60 | ++cur_byte; | ||
| 61 | } | ||
| 62 | ++bits_read; | ||
| 63 | return bit; | ||
| 64 | } | ||
| 65 | |||
| 66 | constexpr u32 ReadBits(std::size_t nBits) { | ||
| 67 | u32 ret = 0; | ||
| 68 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 69 | ret |= (ReadBit() & 1) << i; | ||
| 70 | } | ||
| 71 | return ret; | ||
| 72 | } | ||
| 73 | |||
| 74 | template <std::size_t nBits> | ||
| 75 | constexpr u32 ReadBits() { | ||
| 76 | u32 ret = 0; | ||
| 77 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 78 | ret |= (ReadBit() & 1) << i; | ||
| 79 | } | ||
| 80 | return ret; | ||
| 81 | } | ||
| 82 | |||
| 83 | private: | ||
| 84 | const u8* cur_byte; | ||
| 85 | size_t total_bits = 0; | ||
| 86 | size_t next_bit = 0; | ||
| 87 | size_t bits_read = 0; | ||
| 88 | }; | ||
| 89 | |||
| 90 | class OutputBitStream { | ||
| 91 | public: | ||
| 92 | constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) | ||
| 93 | : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} | ||
| 94 | |||
| 95 | constexpr std::size_t GetBitsWritten() const { | ||
| 96 | return bits_written; | ||
| 97 | } | ||
| 98 | |||
| 99 | constexpr void WriteBitsR(u32 val, u32 nBits) { | ||
| 100 | for (u32 i = 0; i < nBits; i++) { | ||
| 101 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | constexpr void WriteBits(u32 val, u32 nBits) { | ||
| 106 | for (u32 i = 0; i < nBits; i++) { | ||
| 107 | WriteBit((val >> i) & 1); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | private: | ||
| 112 | constexpr void WriteBit(bool b) { | ||
| 113 | if (bits_written >= num_bits) { | ||
| 114 | return; | ||
| 115 | } | ||
| 116 | |||
| 117 | const u32 mask = 1 << next_bit++; | ||
| 118 | |||
| 119 | // clear the bit | ||
| 120 | *cur_byte &= static_cast<u8>(~mask); | ||
| 121 | |||
| 122 | // Write the bit, if necessary | ||
| 123 | if (b) | ||
| 124 | *cur_byte |= static_cast<u8>(mask); | ||
| 125 | |||
| 126 | // Next byte? | ||
| 127 | if (next_bit >= 8) { | ||
| 128 | cur_byte += 1; | ||
| 129 | next_bit = 0; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | u8* cur_byte; | ||
| 134 | std::size_t num_bits; | ||
| 135 | std::size_t bits_written = 0; | ||
| 136 | std::size_t next_bit = 0; | ||
| 137 | }; | ||
| 138 | |||
| 139 | template <typename IntType> | ||
| 140 | class Bits { | ||
| 141 | public: | ||
| 142 | explicit Bits(const IntType& v) : m_Bits(v) {} | ||
| 143 | |||
| 144 | Bits(const Bits&) = delete; | ||
| 145 | Bits& operator=(const Bits&) = delete; | ||
| 146 | |||
| 147 | u8 operator[](u32 bitPos) const { | ||
| 148 | return static_cast<u8>((m_Bits >> bitPos) & 1); | ||
| 149 | } | ||
| 150 | |||
| 151 | IntType operator()(u32 start, u32 end) const { | ||
| 152 | if (start == end) { | ||
| 153 | return (*this)[start]; | ||
| 154 | } else if (start > end) { | ||
| 155 | u32 t = start; | ||
| 156 | start = end; | ||
| 157 | end = t; | ||
| 158 | } | ||
| 159 | |||
| 160 | u64 mask = (1 << (end - start + 1)) - 1; | ||
| 161 | return (m_Bits >> start) & static_cast<IntType>(mask); | ||
| 162 | } | ||
| 163 | |||
| 164 | private: | ||
| 165 | const IntType& m_Bits; | ||
| 166 | }; | ||
| 167 | |||
| 168 | enum class IntegerEncoding { JustBits, Qus32, Trit }; | ||
| 169 | |||
| 170 | struct IntegerEncodedValue { | ||
| 171 | constexpr IntegerEncodedValue() = default; | ||
| 172 | |||
| 173 | constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) | ||
| 174 | : encoding{encoding_}, num_bits{num_bits_} {} | ||
| 175 | |||
| 176 | constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { | ||
| 177 | return encoding == other.encoding && num_bits == other.num_bits; | ||
| 178 | } | ||
| 179 | |||
| 180 | // Returns the number of bits required to encode nVals values. | ||
| 181 | u32 GetBitLength(u32 nVals) const { | ||
| 182 | u32 totalBits = num_bits * nVals; | ||
| 183 | if (encoding == IntegerEncoding::Trit) { | ||
| 184 | totalBits += (nVals * 8 + 4) / 5; | ||
| 185 | } else if (encoding == IntegerEncoding::Qus32) { | ||
| 186 | totalBits += (nVals * 7 + 2) / 3; | ||
| 187 | } | ||
| 188 | return totalBits; | ||
| 189 | } | ||
| 190 | |||
| 191 | IntegerEncoding encoding{}; | ||
| 192 | u32 num_bits = 0; | ||
| 193 | u32 bit_value = 0; | ||
| 194 | union { | ||
| 195 | u32 qus32_value = 0; | ||
| 196 | u32 trit_value; | ||
| 197 | }; | ||
| 198 | }; | ||
| 199 | using IntegerEncodedVector = boost::container::static_vector< | ||
| 200 | IntegerEncodedValue, 256, | ||
| 201 | boost::container::static_vector_options< | ||
| 202 | boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, | ||
| 203 | boost::container::throw_on_overflow<false>>::type>; | ||
| 204 | |||
| 205 | static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { | ||
| 206 | // Implement the algorithm in section C.2.12 | ||
| 207 | std::array<u32, 5> m; | ||
| 208 | std::array<u32, 5> t; | ||
| 209 | u32 T; | ||
| 210 | |||
| 211 | // Read the trit encoded block according to | ||
| 212 | // table C.2.14 | ||
| 213 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 214 | T = bits.ReadBits<2>(); | ||
| 215 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 216 | T |= bits.ReadBits<2>() << 2; | ||
| 217 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 218 | T |= bits.ReadBit() << 4; | ||
| 219 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 220 | T |= bits.ReadBits<2>() << 5; | ||
| 221 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 222 | T |= bits.ReadBit() << 7; | ||
| 223 | |||
| 224 | u32 C = 0; | ||
| 225 | |||
| 226 | Bits<u32> Tb(T); | ||
| 227 | if (Tb(2, 4) == 7) { | ||
| 228 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 229 | t[4] = t[3] = 2; | ||
| 230 | } else { | ||
| 231 | C = Tb(0, 4); | ||
| 232 | if (Tb(5, 6) == 3) { | ||
| 233 | t[4] = 2; | ||
| 234 | t[3] = Tb[7]; | ||
| 235 | } else { | ||
| 236 | t[4] = Tb[7]; | ||
| 237 | t[3] = Tb(5, 6); | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | Bits<u32> Cb(C); | ||
| 242 | if (Cb(0, 1) == 3) { | ||
| 243 | t[2] = 2; | ||
| 244 | t[1] = Cb[4]; | ||
| 245 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); | ||
| 246 | } else if (Cb(2, 3) == 3) { | ||
| 247 | t[2] = 2; | ||
| 248 | t[1] = 2; | ||
| 249 | t[0] = Cb(0, 1); | ||
| 250 | } else { | ||
| 251 | t[2] = Cb[4]; | ||
| 252 | t[1] = Cb(2, 3); | ||
| 253 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); | ||
| 254 | } | ||
| 255 | |||
| 256 | for (std::size_t i = 0; i < 5; ++i) { | ||
| 257 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); | ||
| 258 | val.bit_value = m[i]; | ||
| 259 | val.trit_value = t[i]; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result, | ||
| 264 | u32 nBitsPerValue) { | ||
| 265 | // Implement the algorithm in section C.2.12 | ||
| 266 | u32 m[3]; | ||
| 267 | u32 q[3]; | ||
| 268 | u32 Q; | ||
| 269 | |||
| 270 | // Read the trit encoded block according to | ||
| 271 | // table C.2.15 | ||
| 272 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 273 | Q = bits.ReadBits<3>(); | ||
| 274 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 275 | Q |= bits.ReadBits<2>() << 3; | ||
| 276 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 277 | Q |= bits.ReadBits<2>() << 5; | ||
| 278 | |||
| 279 | Bits<u32> Qb(Q); | ||
| 280 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 281 | q[0] = q[1] = 4; | ||
| 282 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 283 | } else { | ||
| 284 | u32 C = 0; | ||
| 285 | if (Qb(1, 2) == 3) { | ||
| 286 | q[2] = 4; | ||
| 287 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 288 | } else { | ||
| 289 | q[2] = Qb(5, 6); | ||
| 290 | C = Qb(0, 4); | ||
| 291 | } | ||
| 292 | |||
| 293 | Bits<u32> Cb(C); | ||
| 294 | if (Cb(0, 2) == 5) { | ||
| 295 | q[1] = 4; | ||
| 296 | q[0] = Cb(3, 4); | ||
| 297 | } else { | ||
| 298 | q[1] = Cb(3, 4); | ||
| 299 | q[0] = Cb(0, 2); | ||
| 300 | } | ||
| 301 | } | ||
| 302 | |||
| 303 | for (std::size_t i = 0; i < 3; ++i) { | ||
| 304 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue); | ||
| 305 | val.bit_value = m[i]; | ||
| 306 | val.qus32_value = q[i]; | ||
| 307 | } | ||
| 308 | } | ||
| 309 | |||
| 310 | // Returns a new instance of this struct that corresponds to the | ||
| 311 | // can take no more than maxval values | ||
| 312 | static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { | ||
| 313 | while (maxVal > 0) { | ||
| 314 | u32 check = maxVal + 1; | ||
| 315 | |||
| 316 | // Is maxVal a power of two? | ||
| 317 | if (!(check & (check - 1))) { | ||
| 318 | return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); | ||
| 319 | } | ||
| 320 | |||
| 321 | // Is maxVal of the type 3*2^n - 1? | ||
| 322 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { | ||
| 323 | return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); | ||
| 324 | } | ||
| 325 | |||
| 326 | // Is maxVal of the type 5*2^n - 1? | ||
| 327 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { | ||
| 328 | return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); | ||
| 329 | } | ||
| 330 | |||
| 331 | // Apparently it can't be represented with a bounded integer sequence... | ||
| 332 | // just iterate. | ||
| 333 | maxVal--; | ||
| 334 | } | ||
| 335 | return IntegerEncodedValue(IntegerEncoding::JustBits, 0); | ||
| 336 | } | ||
| 337 | |||
| 338 | static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { | ||
| 339 | std::array<IntegerEncodedValue, 256> encodings{}; | ||
| 340 | for (std::size_t i = 0; i < encodings.size(); ++i) { | ||
| 341 | encodings[i] = CreateEncoding(static_cast<u32>(i)); | ||
| 342 | } | ||
| 343 | return encodings; | ||
| 344 | } | ||
| 345 | |||
| 346 | static constexpr std::array EncodingsValues = MakeEncodedValues(); | ||
| 347 | |||
| 348 | // Fills result with the values that are encoded in the given | ||
| 349 | // bitstream. We must know beforehand what the maximum possible | ||
| 350 | // value is, and how many values we're decoding. | ||
| 351 | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, | ||
| 352 | u32 nValues) { | ||
| 353 | // Determine encoding parameters | ||
| 354 | IntegerEncodedValue val = EncodingsValues[maxRange]; | ||
| 355 | |||
| 356 | // Start decoding | ||
| 357 | u32 nValsDecoded = 0; | ||
| 358 | while (nValsDecoded < nValues) { | ||
| 359 | switch (val.encoding) { | ||
| 360 | case IntegerEncoding::Qus32: | ||
| 361 | DecodeQus32Block(bits, result, val.num_bits); | ||
| 362 | nValsDecoded += 3; | ||
| 363 | break; | ||
| 364 | |||
| 365 | case IntegerEncoding::Trit: | ||
| 366 | DecodeTritBlock(bits, result, val.num_bits); | ||
| 367 | nValsDecoded += 5; | ||
| 368 | break; | ||
| 369 | |||
| 370 | case IntegerEncoding::JustBits: | ||
| 371 | val.bit_value = bits.ReadBits(val.num_bits); | ||
| 372 | result.push_back(val); | ||
| 373 | nValsDecoded++; | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | } | ||
| 377 | } | ||
| 378 | |||
| 379 | namespace ASTCC { | ||
| 380 | |||
| 381 | struct TexelWeightParams { | ||
| 382 | u32 m_Width = 0; | ||
| 383 | u32 m_Height = 0; | ||
| 384 | bool m_bDualPlane = false; | ||
| 385 | u32 m_MaxWeight = 0; | ||
| 386 | bool m_bError = false; | ||
| 387 | bool m_bVoidExtentLDR = false; | ||
| 388 | bool m_bVoidExtentHDR = false; | ||
| 389 | |||
| 390 | u32 GetPackedBitSize() const { | ||
| 391 | // How many indices do we have? | ||
| 392 | u32 nIdxs = m_Height * m_Width; | ||
| 393 | if (m_bDualPlane) { | ||
| 394 | nIdxs *= 2; | ||
| 395 | } | ||
| 396 | |||
| 397 | return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); | ||
| 398 | } | ||
| 399 | |||
| 400 | u32 GetNumWeightValues() const { | ||
| 401 | u32 ret = m_Width * m_Height; | ||
| 402 | if (m_bDualPlane) { | ||
| 403 | ret *= 2; | ||
| 404 | } | ||
| 405 | return ret; | ||
| 406 | } | ||
| 407 | }; | ||
| 408 | |||
| 409 | static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | ||
| 410 | TexelWeightParams params; | ||
| 411 | |||
| 412 | // Read the entire block mode all at once | ||
| 413 | u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); | ||
| 414 | |||
| 415 | // Does this match the void extent block mode? | ||
| 416 | if ((modeBits & 0x01FF) == 0x1FC) { | ||
| 417 | if (modeBits & 0x200) { | ||
| 418 | params.m_bVoidExtentHDR = true; | ||
| 419 | } else { | ||
| 420 | params.m_bVoidExtentLDR = true; | ||
| 421 | } | ||
| 422 | |||
| 423 | // Next two bits must be one. | ||
| 424 | if (!(modeBits & 0x400) || !strm.ReadBit()) { | ||
| 425 | params.m_bError = true; | ||
| 426 | } | ||
| 427 | |||
| 428 | return params; | ||
| 429 | } | ||
| 430 | |||
| 431 | // First check if the last four bits are zero | ||
| 432 | if ((modeBits & 0xF) == 0) { | ||
| 433 | params.m_bError = true; | ||
| 434 | return params; | ||
| 435 | } | ||
| 436 | |||
| 437 | // If the last two bits are zero, then if bits | ||
| 438 | // [6-8] are all ones, this is also reserved. | ||
| 439 | if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) { | ||
| 440 | params.m_bError = true; | ||
| 441 | return params; | ||
| 442 | } | ||
| 443 | |||
| 444 | // Otherwise, there is no error... Figure out the layout | ||
| 445 | // of the block mode. Layout is determined by a number | ||
| 446 | // between 0 and 9 corresponding to table C.2.8 of the | ||
| 447 | // ASTC spec. | ||
| 448 | u32 layout = 0; | ||
| 449 | |||
| 450 | if ((modeBits & 0x1) || (modeBits & 0x2)) { | ||
| 451 | // layout is in [0-4] | ||
| 452 | if (modeBits & 0x8) { | ||
| 453 | // layout is in [2-4] | ||
| 454 | if (modeBits & 0x4) { | ||
| 455 | // layout is in [3-4] | ||
| 456 | if (modeBits & 0x100) { | ||
| 457 | layout = 4; | ||
| 458 | } else { | ||
| 459 | layout = 3; | ||
| 460 | } | ||
| 461 | } else { | ||
| 462 | layout = 2; | ||
| 463 | } | ||
| 464 | } else { | ||
| 465 | // layout is in [0-1] | ||
| 466 | if (modeBits & 0x4) { | ||
| 467 | layout = 1; | ||
| 468 | } else { | ||
| 469 | layout = 0; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | } else { | ||
| 473 | // layout is in [5-9] | ||
| 474 | if (modeBits & 0x100) { | ||
| 475 | // layout is in [7-9] | ||
| 476 | if (modeBits & 0x80) { | ||
| 477 | // layout is in [7-8] | ||
| 478 | assert((modeBits & 0x40) == 0U); | ||
| 479 | if (modeBits & 0x20) { | ||
| 480 | layout = 8; | ||
| 481 | } else { | ||
| 482 | layout = 7; | ||
| 483 | } | ||
| 484 | } else { | ||
| 485 | layout = 9; | ||
| 486 | } | ||
| 487 | } else { | ||
| 488 | // layout is in [5-6] | ||
| 489 | if (modeBits & 0x80) { | ||
| 490 | layout = 6; | ||
| 491 | } else { | ||
| 492 | layout = 5; | ||
| 493 | } | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | assert(layout < 10); | ||
| 498 | |||
| 499 | // Determine R | ||
| 500 | u32 R = !!(modeBits & 0x10); | ||
| 501 | if (layout < 5) { | ||
| 502 | R |= (modeBits & 0x3) << 1; | ||
| 503 | } else { | ||
| 504 | R |= (modeBits & 0xC) >> 1; | ||
| 505 | } | ||
| 506 | assert(2 <= R && R <= 7); | ||
| 507 | |||
| 508 | // Determine width & height | ||
| 509 | switch (layout) { | ||
| 510 | case 0: { | ||
| 511 | u32 A = (modeBits >> 5) & 0x3; | ||
| 512 | u32 B = (modeBits >> 7) & 0x3; | ||
| 513 | params.m_Width = B + 4; | ||
| 514 | params.m_Height = A + 2; | ||
| 515 | break; | ||
| 516 | } | ||
| 517 | |||
| 518 | case 1: { | ||
| 519 | u32 A = (modeBits >> 5) & 0x3; | ||
| 520 | u32 B = (modeBits >> 7) & 0x3; | ||
| 521 | params.m_Width = B + 8; | ||
| 522 | params.m_Height = A + 2; | ||
| 523 | break; | ||
| 524 | } | ||
| 525 | |||
| 526 | case 2: { | ||
| 527 | u32 A = (modeBits >> 5) & 0x3; | ||
| 528 | u32 B = (modeBits >> 7) & 0x3; | ||
| 529 | params.m_Width = A + 2; | ||
| 530 | params.m_Height = B + 8; | ||
| 531 | break; | ||
| 532 | } | ||
| 533 | |||
| 534 | case 3: { | ||
| 535 | u32 A = (modeBits >> 5) & 0x3; | ||
| 536 | u32 B = (modeBits >> 7) & 0x1; | ||
| 537 | params.m_Width = A + 2; | ||
| 538 | params.m_Height = B + 6; | ||
| 539 | break; | ||
| 540 | } | ||
| 541 | |||
| 542 | case 4: { | ||
| 543 | u32 A = (modeBits >> 5) & 0x3; | ||
| 544 | u32 B = (modeBits >> 7) & 0x1; | ||
| 545 | params.m_Width = B + 2; | ||
| 546 | params.m_Height = A + 2; | ||
| 547 | break; | ||
| 548 | } | ||
| 549 | |||
| 550 | case 5: { | ||
| 551 | u32 A = (modeBits >> 5) & 0x3; | ||
| 552 | params.m_Width = 12; | ||
| 553 | params.m_Height = A + 2; | ||
| 554 | break; | ||
| 555 | } | ||
| 556 | |||
| 557 | case 6: { | ||
| 558 | u32 A = (modeBits >> 5) & 0x3; | ||
| 559 | params.m_Width = A + 2; | ||
| 560 | params.m_Height = 12; | ||
| 561 | break; | ||
| 562 | } | ||
| 563 | |||
| 564 | case 7: { | ||
| 565 | params.m_Width = 6; | ||
| 566 | params.m_Height = 10; | ||
| 567 | break; | ||
| 568 | } | ||
| 569 | |||
| 570 | case 8: { | ||
| 571 | params.m_Width = 10; | ||
| 572 | params.m_Height = 6; | ||
| 573 | break; | ||
| 574 | } | ||
| 575 | |||
| 576 | case 9: { | ||
| 577 | u32 A = (modeBits >> 5) & 0x3; | ||
| 578 | u32 B = (modeBits >> 9) & 0x3; | ||
| 579 | params.m_Width = A + 6; | ||
| 580 | params.m_Height = B + 6; | ||
| 581 | break; | ||
| 582 | } | ||
| 583 | |||
| 584 | default: | ||
| 585 | assert(false && "Don't know this layout..."); | ||
| 586 | params.m_bError = true; | ||
| 587 | break; | ||
| 588 | } | ||
| 589 | |||
| 590 | // Determine whether or not we're using dual planes | ||
| 591 | // and/or high precision layouts. | ||
| 592 | bool D = (layout != 9) && (modeBits & 0x400); | ||
| 593 | bool H = (layout != 9) && (modeBits & 0x200); | ||
| 594 | |||
| 595 | if (H) { | ||
| 596 | const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; | ||
| 597 | params.m_MaxWeight = maxWeights[R - 2]; | ||
| 598 | } else { | ||
| 599 | const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; | ||
| 600 | params.m_MaxWeight = maxWeights[R - 2]; | ||
| 601 | } | ||
| 602 | |||
| 603 | params.m_bDualPlane = D; | ||
| 604 | |||
| 605 | return params; | ||
| 606 | } | ||
| 607 | |||
| 608 | static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, | ||
| 609 | u32 blockHeight) { | ||
| 610 | // Don't actually care about the void extent, just read the bits... | ||
| 611 | for (s32 i = 0; i < 4; ++i) { | ||
| 612 | strm.ReadBits<13>(); | ||
| 613 | } | ||
| 614 | |||
| 615 | // Decode the RGBA components and renormalize them to the range [0, 255] | ||
| 616 | u16 r = static_cast<u16>(strm.ReadBits<16>()); | ||
| 617 | u16 g = static_cast<u16>(strm.ReadBits<16>()); | ||
| 618 | u16 b = static_cast<u16>(strm.ReadBits<16>()); | ||
| 619 | u16 a = static_cast<u16>(strm.ReadBits<16>()); | ||
| 620 | |||
| 621 | u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | | ||
| 622 | (static_cast<u32>(a) & 0xFF00) << 16; | ||
| 623 | |||
| 624 | for (u32 j = 0; j < blockHeight; j++) { | ||
| 625 | for (u32 i = 0; i < blockWidth; i++) { | ||
| 626 | outBuf[j * blockWidth + i] = rgba; | ||
| 627 | } | ||
| 628 | } | ||
| 629 | } | ||
| 630 | |||
| 631 | static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { | ||
| 632 | for (u32 j = 0; j < blockHeight; j++) { | ||
| 633 | for (u32 i = 0; i < blockWidth; i++) { | ||
| 634 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | ||
| 635 | } | ||
| 636 | } | ||
| 637 | } | ||
| 638 | |||
| 639 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | ||
| 640 | // is the same as [(numBits - 1):0] and repeats all the way down. | ||
| 641 | template <typename IntType> | ||
| 642 | static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { | ||
| 643 | if (numBits == 0) { | ||
| 644 | return 0; | ||
| 645 | } | ||
| 646 | if (toBit == 0) { | ||
| 647 | return 0; | ||
| 648 | } | ||
| 649 | const IntType v = val & static_cast<IntType>((1 << numBits) - 1); | ||
| 650 | IntType res = v; | ||
| 651 | u32 reslen = numBits; | ||
| 652 | while (reslen < toBit) { | ||
| 653 | u32 comp = 0; | ||
| 654 | if (numBits > toBit - reslen) { | ||
| 655 | u32 newshift = toBit - reslen; | ||
| 656 | comp = numBits - newshift; | ||
| 657 | numBits = newshift; | ||
| 658 | } | ||
| 659 | res = static_cast<IntType>(res << numBits); | ||
| 660 | res = static_cast<IntType>(res | (v >> comp)); | ||
| 661 | reslen += numBits; | ||
| 662 | } | ||
| 663 | return res; | ||
| 664 | } | ||
| 665 | |||
| 666 | static constexpr std::size_t NumReplicateEntries(u32 num_bits) { | ||
| 667 | return std::size_t(1) << num_bits; | ||
| 668 | } | ||
| 669 | |||
| 670 | template <typename IntType, u32 num_bits, u32 to_bit> | ||
| 671 | static constexpr auto MakeReplicateTable() { | ||
| 672 | std::array<IntType, NumReplicateEntries(num_bits)> table{}; | ||
| 673 | for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { | ||
| 674 | table[value] = Replicate(value, num_bits, to_bit); | ||
| 675 | } | ||
| 676 | return table; | ||
| 677 | } | ||
| 678 | |||
| 679 | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 680 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | ||
| 681 | return REPLICATE_BYTE_TO_16_TABLE[value]; | ||
| 682 | } | ||
| 683 | |||
| 684 | static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); | ||
| 685 | static constexpr u32 ReplicateBitTo7(std::size_t value) { | ||
| 686 | return REPLICATE_BIT_TO_7_TABLE[value]; | ||
| 687 | } | ||
| 688 | |||
| 689 | static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); | ||
| 690 | static constexpr u32 ReplicateBitTo9(std::size_t value) { | ||
| 691 | return REPLICATE_BIT_TO_9_TABLE[value]; | ||
| 692 | } | ||
| 693 | |||
| 694 | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||
| 695 | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||
| 696 | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||
| 697 | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||
| 698 | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||
| 699 | static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||
| 700 | static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||
| 701 | static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||
| 702 | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback | ||
| 703 | /// to the runtime implementation | ||
| 704 | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||
| 705 | switch (num_bits) { | ||
| 706 | case 1: | ||
| 707 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 708 | case 2: | ||
| 709 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 710 | case 3: | ||
| 711 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 712 | case 4: | ||
| 713 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 714 | case 5: | ||
| 715 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 716 | case 6: | ||
| 717 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 718 | case 7: | ||
| 719 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 720 | case 8: | ||
| 721 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 722 | default: | ||
| 723 | return Replicate(value, num_bits, 8); | ||
| 724 | } | ||
| 725 | } | ||
| 726 | |||
| 727 | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||
| 728 | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||
| 729 | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||
| 730 | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||
| 731 | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||
| 732 | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||
| 733 | switch (num_bits) { | ||
| 734 | case 1: | ||
| 735 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 736 | case 2: | ||
| 737 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 738 | case 3: | ||
| 739 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 740 | case 4: | ||
| 741 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 742 | case 5: | ||
| 743 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 744 | default: | ||
| 745 | return Replicate(value, num_bits, 6); | ||
| 746 | } | ||
| 747 | } | ||
| 748 | |||
| 749 | class Pixel { | ||
| 750 | protected: | ||
| 751 | using ChannelType = s16; | ||
| 752 | u8 m_BitDepth[4] = {8, 8, 8, 8}; | ||
| 753 | s16 color[4] = {}; | ||
| 754 | |||
| 755 | public: | ||
| 756 | Pixel() = default; | ||
| 757 | Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) | ||
| 758 | : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, | ||
| 759 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), | ||
| 760 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} | ||
| 761 | |||
| 762 | // Changes the depth of each pixel. This scales the values to | ||
| 763 | // the appropriate bit depth by either truncating the least | ||
| 764 | // significant bits when going from larger to smaller bit depth | ||
| 765 | // or by repeating the most significant bits when going from | ||
| 766 | // smaller to larger bit depths. | ||
| 767 | void ChangeBitDepth() { | ||
| 768 | for (u32 i = 0; i < 4; i++) { | ||
| 769 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); | ||
| 770 | m_BitDepth[i] = 8; | ||
| 771 | } | ||
| 772 | } | ||
| 773 | |||
| 774 | template <typename IntType> | ||
| 775 | static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { | ||
| 776 | float denominator = static_cast<float>((1 << bitDepth) - 1); | ||
| 777 | return static_cast<float>(channel) / denominator; | ||
| 778 | } | ||
| 779 | |||
| 780 | // Changes the bit depth of a single component. See the comment | ||
| 781 | // above for how we do this. | ||
| 782 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { | ||
| 783 | assert(oldDepth <= 8); | ||
| 784 | |||
| 785 | if (oldDepth == 8) { | ||
| 786 | // Do nothing | ||
| 787 | return val; | ||
| 788 | } else if (oldDepth == 0) { | ||
| 789 | return static_cast<ChannelType>((1 << 8) - 1); | ||
| 790 | } else if (8 > oldDepth) { | ||
| 791 | return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); | ||
| 792 | } else { | ||
| 793 | // oldDepth > newDepth | ||
| 794 | const u8 bitsWasted = static_cast<u8>(oldDepth - 8); | ||
| 795 | u16 v = static_cast<u16>(val); | ||
| 796 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | ||
| 797 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); | ||
| 798 | return static_cast<u8>(v); | ||
| 799 | } | ||
| 800 | |||
| 801 | assert(false && "We shouldn't get here."); | ||
| 802 | return 0; | ||
| 803 | } | ||
| 804 | |||
| 805 | const ChannelType& A() const { | ||
| 806 | return color[0]; | ||
| 807 | } | ||
| 808 | ChannelType& A() { | ||
| 809 | return color[0]; | ||
| 810 | } | ||
| 811 | const ChannelType& R() const { | ||
| 812 | return color[1]; | ||
| 813 | } | ||
| 814 | ChannelType& R() { | ||
| 815 | return color[1]; | ||
| 816 | } | ||
| 817 | const ChannelType& G() const { | ||
| 818 | return color[2]; | ||
| 819 | } | ||
| 820 | ChannelType& G() { | ||
| 821 | return color[2]; | ||
| 822 | } | ||
| 823 | const ChannelType& B() const { | ||
| 824 | return color[3]; | ||
| 825 | } | ||
| 826 | ChannelType& B() { | ||
| 827 | return color[3]; | ||
| 828 | } | ||
| 829 | const ChannelType& Component(u32 idx) const { | ||
| 830 | return color[idx]; | ||
| 831 | } | ||
| 832 | ChannelType& Component(u32 idx) { | ||
| 833 | return color[idx]; | ||
| 834 | } | ||
| 835 | |||
| 836 | void GetBitDepth(u8 (&outDepth)[4]) const { | ||
| 837 | for (s32 i = 0; i < 4; i++) { | ||
| 838 | outDepth[i] = m_BitDepth[i]; | ||
| 839 | } | ||
| 840 | } | ||
| 841 | |||
| 842 | // Take all of the components, transform them to their 8-bit variants, | ||
| 843 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume | ||
| 844 | // that the architecture is little-endian, so the alpha channel will end | ||
| 845 | // up in the most-significant byte. | ||
| 846 | u32 Pack() const { | ||
| 847 | Pixel eightBit(*this); | ||
| 848 | eightBit.ChangeBitDepth(); | ||
| 849 | |||
| 850 | u32 r = 0; | ||
| 851 | r |= eightBit.A(); | ||
| 852 | r <<= 8; | ||
| 853 | r |= eightBit.B(); | ||
| 854 | r <<= 8; | ||
| 855 | r |= eightBit.G(); | ||
| 856 | r <<= 8; | ||
| 857 | r |= eightBit.R(); | ||
| 858 | return r; | ||
| 859 | } | ||
| 860 | |||
| 861 | // Clamps the pixel to the range [0,255] | ||
| 862 | void ClampByte() { | ||
| 863 | for (u32 i = 0; i < 4; i++) { | ||
| 864 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | ||
| 865 | } | ||
| 866 | } | ||
| 867 | |||
| 868 | void MakeOpaque() { | ||
| 869 | A() = 255; | ||
| 870 | } | ||
| 871 | }; | ||
| 872 | |||
| 873 | static void DecodeColorValues(u32* out, std::span<u8> data, const u32* modes, const u32 nPartitions, | ||
| 874 | const u32 nBitsForColorData) { | ||
| 875 | // First figure out how many color values we have | ||
| 876 | u32 nValues = 0; | ||
| 877 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 878 | nValues += ((modes[i] >> 2) + 1) << 1; | ||
| 879 | } | ||
| 880 | |||
| 881 | // Then based on the number of values and the remaining number of bits, | ||
| 882 | // figure out the max value for each of them... | ||
| 883 | u32 range = 256; | ||
| 884 | while (--range > 0) { | ||
| 885 | IntegerEncodedValue val = EncodingsValues[range]; | ||
| 886 | u32 bitLength = val.GetBitLength(nValues); | ||
| 887 | if (bitLength <= nBitsForColorData) { | ||
| 888 | // Find the smallest possible range that matches the given encoding | ||
| 889 | while (--range > 0) { | ||
| 890 | IntegerEncodedValue newval = EncodingsValues[range]; | ||
| 891 | if (!newval.MatchesEncoding(val)) { | ||
| 892 | break; | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | // Return to last matching range. | ||
| 897 | range++; | ||
| 898 | break; | ||
| 899 | } | ||
| 900 | } | ||
| 901 | |||
| 902 | // We now have enough to decode our integer sequence. | ||
| 903 | IntegerEncodedVector decodedColorValues; | ||
| 904 | |||
| 905 | InputBitStream colorStream(data, 0); | ||
| 906 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | ||
| 907 | |||
| 908 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | ||
| 909 | // This procedure is outlined in ASTC spec C.2.13 | ||
| 910 | u32 outIdx = 0; | ||
| 911 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { | ||
| 912 | // Have we already decoded all that we need? | ||
| 913 | if (outIdx >= nValues) { | ||
| 914 | break; | ||
| 915 | } | ||
| 916 | |||
| 917 | const IntegerEncodedValue& val = *itr; | ||
| 918 | u32 bitlen = val.num_bits; | ||
| 919 | u32 bitval = val.bit_value; | ||
| 920 | |||
| 921 | assert(bitlen >= 1); | ||
| 922 | |||
| 923 | u32 A = 0, B = 0, C = 0, D = 0; | ||
| 924 | // A is just the lsb replicated 9 times. | ||
| 925 | A = ReplicateBitTo9(bitval & 1); | ||
| 926 | |||
| 927 | switch (val.encoding) { | ||
| 928 | // Replicate bits | ||
| 929 | case IntegerEncoding::JustBits: | ||
| 930 | out[outIdx++] = FastReplicateTo8(bitval, bitlen); | ||
| 931 | break; | ||
| 932 | |||
| 933 | // Use algorithm in C.2.13 | ||
| 934 | case IntegerEncoding::Trit: { | ||
| 935 | |||
| 936 | D = val.trit_value; | ||
| 937 | |||
| 938 | switch (bitlen) { | ||
| 939 | case 1: { | ||
| 940 | C = 204; | ||
| 941 | } break; | ||
| 942 | |||
| 943 | case 2: { | ||
| 944 | C = 93; | ||
| 945 | // B = b000b0bb0 | ||
| 946 | u32 b = (bitval >> 1) & 1; | ||
| 947 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | ||
| 948 | } break; | ||
| 949 | |||
| 950 | case 3: { | ||
| 951 | C = 44; | ||
| 952 | // B = cb000cbcb | ||
| 953 | u32 cb = (bitval >> 1) & 3; | ||
| 954 | B = (cb << 7) | (cb << 2) | cb; | ||
| 955 | } break; | ||
| 956 | |||
| 957 | case 4: { | ||
| 958 | C = 22; | ||
| 959 | // B = dcb000dcb | ||
| 960 | u32 dcb = (bitval >> 1) & 7; | ||
| 961 | B = (dcb << 6) | dcb; | ||
| 962 | } break; | ||
| 963 | |||
| 964 | case 5: { | ||
| 965 | C = 11; | ||
| 966 | // B = edcb000ed | ||
| 967 | u32 edcb = (bitval >> 1) & 0xF; | ||
| 968 | B = (edcb << 5) | (edcb >> 2); | ||
| 969 | } break; | ||
| 970 | |||
| 971 | case 6: { | ||
| 972 | C = 5; | ||
| 973 | // B = fedcb000f | ||
| 974 | u32 fedcb = (bitval >> 1) & 0x1F; | ||
| 975 | B = (fedcb << 4) | (fedcb >> 4); | ||
| 976 | } break; | ||
| 977 | |||
| 978 | default: | ||
| 979 | assert(false && "Unsupported trit encoding for color values!"); | ||
| 980 | break; | ||
| 981 | } // switch(bitlen) | ||
| 982 | } // case IntegerEncoding::Trit | ||
| 983 | break; | ||
| 984 | |||
| 985 | case IntegerEncoding::Qus32: { | ||
| 986 | |||
| 987 | D = val.qus32_value; | ||
| 988 | |||
| 989 | switch (bitlen) { | ||
| 990 | case 1: { | ||
| 991 | C = 113; | ||
| 992 | } break; | ||
| 993 | |||
| 994 | case 2: { | ||
| 995 | C = 54; | ||
| 996 | // B = b0000bb00 | ||
| 997 | u32 b = (bitval >> 1) & 1; | ||
| 998 | B = (b << 8) | (b << 3) | (b << 2); | ||
| 999 | } break; | ||
| 1000 | |||
| 1001 | case 3: { | ||
| 1002 | C = 26; | ||
| 1003 | // B = cb0000cbc | ||
| 1004 | u32 cb = (bitval >> 1) & 3; | ||
| 1005 | B = (cb << 7) | (cb << 1) | (cb >> 1); | ||
| 1006 | } break; | ||
| 1007 | |||
| 1008 | case 4: { | ||
| 1009 | C = 13; | ||
| 1010 | // B = dcb0000dc | ||
| 1011 | u32 dcb = (bitval >> 1) & 7; | ||
| 1012 | B = (dcb << 6) | (dcb >> 1); | ||
| 1013 | } break; | ||
| 1014 | |||
| 1015 | case 5: { | ||
| 1016 | C = 6; | ||
| 1017 | // B = edcb0000e | ||
| 1018 | u32 edcb = (bitval >> 1) & 0xF; | ||
| 1019 | B = (edcb << 5) | (edcb >> 3); | ||
| 1020 | } break; | ||
| 1021 | |||
| 1022 | default: | ||
| 1023 | assert(false && "Unsupported quint encoding for color values!"); | ||
| 1024 | break; | ||
| 1025 | } // switch(bitlen) | ||
| 1026 | } // case IntegerEncoding::Qus32 | ||
| 1027 | break; | ||
| 1028 | } // switch(val.encoding) | ||
| 1029 | |||
| 1030 | if (val.encoding != IntegerEncoding::JustBits) { | ||
| 1031 | u32 T = D * C + B; | ||
| 1032 | T ^= A; | ||
| 1033 | T = (A & 0x80) | (T >> 2); | ||
| 1034 | out[outIdx++] = T; | ||
| 1035 | } | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | // Make sure that each of our values is in the proper range... | ||
| 1039 | for (u32 i = 0; i < nValues; i++) { | ||
| 1040 | assert(out[i] <= 255); | ||
| 1041 | } | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | ||
| 1045 | u32 bitval = val.bit_value; | ||
| 1046 | u32 bitlen = val.num_bits; | ||
| 1047 | |||
| 1048 | u32 A = ReplicateBitTo7(bitval & 1); | ||
| 1049 | u32 B = 0, C = 0, D = 0; | ||
| 1050 | |||
| 1051 | u32 result = 0; | ||
| 1052 | switch (val.encoding) { | ||
| 1053 | case IntegerEncoding::JustBits: | ||
| 1054 | result = FastReplicateTo6(bitval, bitlen); | ||
| 1055 | break; | ||
| 1056 | |||
| 1057 | case IntegerEncoding::Trit: { | ||
| 1058 | D = val.trit_value; | ||
| 1059 | assert(D < 3); | ||
| 1060 | |||
| 1061 | switch (bitlen) { | ||
| 1062 | case 0: { | ||
| 1063 | u32 results[3] = {0, 32, 63}; | ||
| 1064 | result = results[D]; | ||
| 1065 | } break; | ||
| 1066 | |||
| 1067 | case 1: { | ||
| 1068 | C = 50; | ||
| 1069 | } break; | ||
| 1070 | |||
| 1071 | case 2: { | ||
| 1072 | C = 23; | ||
| 1073 | u32 b = (bitval >> 1) & 1; | ||
| 1074 | B = (b << 6) | (b << 2) | b; | ||
| 1075 | } break; | ||
| 1076 | |||
| 1077 | case 3: { | ||
| 1078 | C = 11; | ||
| 1079 | u32 cb = (bitval >> 1) & 3; | ||
| 1080 | B = (cb << 5) | cb; | ||
| 1081 | } break; | ||
| 1082 | |||
| 1083 | default: | ||
| 1084 | assert(false && "Invalid trit encoding for texel weight"); | ||
| 1085 | break; | ||
| 1086 | } | ||
| 1087 | } break; | ||
| 1088 | |||
| 1089 | case IntegerEncoding::Qus32: { | ||
| 1090 | D = val.qus32_value; | ||
| 1091 | assert(D < 5); | ||
| 1092 | |||
| 1093 | switch (bitlen) { | ||
| 1094 | case 0: { | ||
| 1095 | u32 results[5] = {0, 16, 32, 47, 63}; | ||
| 1096 | result = results[D]; | ||
| 1097 | } break; | ||
| 1098 | |||
| 1099 | case 1: { | ||
| 1100 | C = 28; | ||
| 1101 | } break; | ||
| 1102 | |||
| 1103 | case 2: { | ||
| 1104 | C = 13; | ||
| 1105 | u32 b = (bitval >> 1) & 1; | ||
| 1106 | B = (b << 6) | (b << 1); | ||
| 1107 | } break; | ||
| 1108 | |||
| 1109 | default: | ||
| 1110 | assert(false && "Invalid quint encoding for texel weight"); | ||
| 1111 | break; | ||
| 1112 | } | ||
| 1113 | } break; | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { | ||
| 1117 | // Decode the value... | ||
| 1118 | result = D * C + B; | ||
| 1119 | result ^= A; | ||
| 1120 | result = (A & 0x20) | (result >> 2); | ||
| 1121 | } | ||
| 1122 | |||
| 1123 | assert(result < 64); | ||
| 1124 | |||
| 1125 | // Change from [0,63] to [0,64] | ||
| 1126 | if (result > 32) { | ||
| 1127 | result += 1; | ||
| 1128 | } | ||
| 1129 | |||
| 1130 | return result; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, | ||
| 1134 | const TexelWeightParams& params, const u32 blockWidth, | ||
| 1135 | const u32 blockHeight) { | ||
| 1136 | u32 weightIdx = 0; | ||
| 1137 | u32 unquantized[2][144]; | ||
| 1138 | |||
| 1139 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { | ||
| 1140 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); | ||
| 1141 | |||
| 1142 | if (params.m_bDualPlane) { | ||
| 1143 | ++itr; | ||
| 1144 | unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr); | ||
| 1145 | if (itr == weights.end()) { | ||
| 1146 | break; | ||
| 1147 | } | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | if (++weightIdx >= (params.m_Width * params.m_Height)) | ||
| 1151 | break; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | // Do infill if necessary (Section C.2.18) ... | ||
| 1155 | u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); | ||
| 1156 | u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); | ||
| 1157 | |||
| 1158 | const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; | ||
| 1159 | for (u32 plane = 0; plane < kPlaneScale; plane++) | ||
| 1160 | for (u32 t = 0; t < blockHeight; t++) | ||
| 1161 | for (u32 s = 0; s < blockWidth; s++) { | ||
| 1162 | u32 cs = Ds * s; | ||
| 1163 | u32 ct = Dt * t; | ||
| 1164 | |||
| 1165 | u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; | ||
| 1166 | u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; | ||
| 1167 | |||
| 1168 | u32 js = gs >> 4; | ||
| 1169 | u32 fs = gs & 0xF; | ||
| 1170 | |||
| 1171 | u32 jt = gt >> 4; | ||
| 1172 | u32 ft = gt & 0x0F; | ||
| 1173 | |||
| 1174 | u32 w11 = (fs * ft + 8) >> 4; | ||
| 1175 | u32 w10 = ft - w11; | ||
| 1176 | u32 w01 = fs - w11; | ||
| 1177 | u32 w00 = 16 - fs - ft + w11; | ||
| 1178 | |||
| 1179 | u32 v0 = js + jt * params.m_Width; | ||
| 1180 | |||
| 1181 | #define FIND_TEXEL(tidx, bidx) \ | ||
| 1182 | u32 p##bidx = 0; \ | ||
| 1183 | do { \ | ||
| 1184 | if ((tidx) < (params.m_Width * params.m_Height)) { \ | ||
| 1185 | p##bidx = unquantized[plane][(tidx)]; \ | ||
| 1186 | } \ | ||
| 1187 | } while (0) | ||
| 1188 | |||
| 1189 | FIND_TEXEL(v0, 00); | ||
| 1190 | FIND_TEXEL(v0 + 1, 01); | ||
| 1191 | FIND_TEXEL(v0 + params.m_Width, 10); | ||
| 1192 | FIND_TEXEL(v0 + params.m_Width + 1, 11); | ||
| 1193 | |||
| 1194 | #undef FIND_TEXEL | ||
| 1195 | |||
| 1196 | out[plane][t * blockWidth + s] = | ||
| 1197 | (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; | ||
| 1198 | } | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | // Transfers a bit as described in C.2.14 | ||
| 1202 | static inline void BitTransferSigned(s32& a, s32& b) { | ||
| 1203 | b >>= 1; | ||
| 1204 | b |= a & 0x80; | ||
| 1205 | a >>= 1; | ||
| 1206 | a &= 0x3F; | ||
| 1207 | if (a & 0x20) | ||
| 1208 | a -= 0x40; | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | // Adds more precision to the blue channel as described | ||
| 1212 | // in C.2.14 | ||
| 1213 | static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { | ||
| 1214 | return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), | ||
| 1215 | static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | // Partition selection functions as specified in | ||
| 1219 | // C.2.21 | ||
| 1220 | static inline u32 hash52(u32 p) { | ||
| 1221 | p ^= p >> 15; | ||
| 1222 | p -= p << 17; | ||
| 1223 | p += p << 7; | ||
| 1224 | p += p << 4; | ||
| 1225 | p ^= p >> 5; | ||
| 1226 | p += p << 16; | ||
| 1227 | p ^= p >> 7; | ||
| 1228 | p ^= p >> 3; | ||
| 1229 | p ^= p << 6; | ||
| 1230 | p ^= p >> 17; | ||
| 1231 | return p; | ||
| 1232 | } | ||
| 1233 | |||
| 1234 | static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { | ||
| 1235 | if (1 == partitionCount) | ||
| 1236 | return 0; | ||
| 1237 | |||
| 1238 | if (smallBlock) { | ||
| 1239 | x <<= 1; | ||
| 1240 | y <<= 1; | ||
| 1241 | z <<= 1; | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | seed += (partitionCount - 1) * 1024; | ||
| 1245 | |||
| 1246 | u32 rnum = hash52(static_cast<u32>(seed)); | ||
| 1247 | u8 seed1 = static_cast<u8>(rnum & 0xF); | ||
| 1248 | u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); | ||
| 1249 | u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); | ||
| 1250 | u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); | ||
| 1251 | u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); | ||
| 1252 | u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); | ||
| 1253 | u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); | ||
| 1254 | u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); | ||
| 1255 | u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); | ||
| 1256 | u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); | ||
| 1257 | u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); | ||
| 1258 | u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); | ||
| 1259 | |||
| 1260 | seed1 = static_cast<u8>(seed1 * seed1); | ||
| 1261 | seed2 = static_cast<u8>(seed2 * seed2); | ||
| 1262 | seed3 = static_cast<u8>(seed3 * seed3); | ||
| 1263 | seed4 = static_cast<u8>(seed4 * seed4); | ||
| 1264 | seed5 = static_cast<u8>(seed5 * seed5); | ||
| 1265 | seed6 = static_cast<u8>(seed6 * seed6); | ||
| 1266 | seed7 = static_cast<u8>(seed7 * seed7); | ||
| 1267 | seed8 = static_cast<u8>(seed8 * seed8); | ||
| 1268 | seed9 = static_cast<u8>(seed9 * seed9); | ||
| 1269 | seed10 = static_cast<u8>(seed10 * seed10); | ||
| 1270 | seed11 = static_cast<u8>(seed11 * seed11); | ||
| 1271 | seed12 = static_cast<u8>(seed12 * seed12); | ||
| 1272 | |||
| 1273 | s32 sh1, sh2, sh3; | ||
| 1274 | if (seed & 1) { | ||
| 1275 | sh1 = (seed & 2) ? 4 : 5; | ||
| 1276 | sh2 = (partitionCount == 3) ? 6 : 5; | ||
| 1277 | } else { | ||
| 1278 | sh1 = (partitionCount == 3) ? 6 : 5; | ||
| 1279 | sh2 = (seed & 2) ? 4 : 5; | ||
| 1280 | } | ||
| 1281 | sh3 = (seed & 0x10) ? sh1 : sh2; | ||
| 1282 | |||
| 1283 | seed1 = static_cast<u8>(seed1 >> sh1); | ||
| 1284 | seed2 = static_cast<u8>(seed2 >> sh2); | ||
| 1285 | seed3 = static_cast<u8>(seed3 >> sh1); | ||
| 1286 | seed4 = static_cast<u8>(seed4 >> sh2); | ||
| 1287 | seed5 = static_cast<u8>(seed5 >> sh1); | ||
| 1288 | seed6 = static_cast<u8>(seed6 >> sh2); | ||
| 1289 | seed7 = static_cast<u8>(seed7 >> sh1); | ||
| 1290 | seed8 = static_cast<u8>(seed8 >> sh2); | ||
| 1291 | seed9 = static_cast<u8>(seed9 >> sh3); | ||
| 1292 | seed10 = static_cast<u8>(seed10 >> sh3); | ||
| 1293 | seed11 = static_cast<u8>(seed11 >> sh3); | ||
| 1294 | seed12 = static_cast<u8>(seed12 >> sh3); | ||
| 1295 | |||
| 1296 | s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | ||
| 1297 | s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | ||
| 1298 | s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | ||
| 1299 | s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | ||
| 1300 | |||
| 1301 | a &= 0x3F; | ||
| 1302 | b &= 0x3F; | ||
| 1303 | c &= 0x3F; | ||
| 1304 | d &= 0x3F; | ||
| 1305 | |||
| 1306 | if (partitionCount < 4) | ||
| 1307 | d = 0; | ||
| 1308 | if (partitionCount < 3) | ||
| 1309 | c = 0; | ||
| 1310 | |||
| 1311 | if (a >= b && a >= c && a >= d) | ||
| 1312 | return 0; | ||
| 1313 | else if (b >= c && b >= d) | ||
| 1314 | return 1; | ||
| 1315 | else if (c >= d) | ||
| 1316 | return 2; | ||
| 1317 | return 3; | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { | ||
| 1321 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | // Section C.2.14 | ||
| 1325 | static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, | ||
| 1326 | u32 colorEndpos32Mode) { | ||
| 1327 | #define READ_UINT_VALUES(N) \ | ||
| 1328 | u32 v[N]; \ | ||
| 1329 | for (u32 i = 0; i < N; i++) { \ | ||
| 1330 | v[i] = *(colorValues++); \ | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | #define READ_INT_VALUES(N) \ | ||
| 1334 | s32 v[N]; \ | ||
| 1335 | for (u32 i = 0; i < N; i++) { \ | ||
| 1336 | v[i] = static_cast<s32>(*(colorValues++)); \ | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | switch (colorEndpos32Mode) { | ||
| 1340 | case 0: { | ||
| 1341 | READ_UINT_VALUES(2) | ||
| 1342 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); | ||
| 1343 | ep2 = Pixel(0xFF, v[1], v[1], v[1]); | ||
| 1344 | } break; | ||
| 1345 | |||
| 1346 | case 1: { | ||
| 1347 | READ_UINT_VALUES(2) | ||
| 1348 | u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); | ||
| 1349 | u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); | ||
| 1350 | ep1 = Pixel(0xFF, L0, L0, L0); | ||
| 1351 | ep2 = Pixel(0xFF, L1, L1, L1); | ||
| 1352 | } break; | ||
| 1353 | |||
| 1354 | case 4: { | ||
| 1355 | READ_UINT_VALUES(4) | ||
| 1356 | ep1 = Pixel(v[2], v[0], v[0], v[0]); | ||
| 1357 | ep2 = Pixel(v[3], v[1], v[1], v[1]); | ||
| 1358 | } break; | ||
| 1359 | |||
| 1360 | case 5: { | ||
| 1361 | READ_INT_VALUES(4) | ||
| 1362 | BitTransferSigned(v[1], v[0]); | ||
| 1363 | BitTransferSigned(v[3], v[2]); | ||
| 1364 | ep1 = Pixel(v[2], v[0], v[0], v[0]); | ||
| 1365 | ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]); | ||
| 1366 | ep1.ClampByte(); | ||
| 1367 | ep2.ClampByte(); | ||
| 1368 | } break; | ||
| 1369 | |||
| 1370 | case 6: { | ||
| 1371 | READ_UINT_VALUES(4) | ||
| 1372 | ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | ||
| 1373 | ep2 = Pixel(0xFF, v[0], v[1], v[2]); | ||
| 1374 | } break; | ||
| 1375 | |||
| 1376 | case 8: { | ||
| 1377 | READ_UINT_VALUES(6) | ||
| 1378 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | ||
| 1379 | ep1 = Pixel(0xFF, v[0], v[2], v[4]); | ||
| 1380 | ep2 = Pixel(0xFF, v[1], v[3], v[5]); | ||
| 1381 | } else { | ||
| 1382 | ep1 = BlueContract(0xFF, v[1], v[3], v[5]); | ||
| 1383 | ep2 = BlueContract(0xFF, v[0], v[2], v[4]); | ||
| 1384 | } | ||
| 1385 | } break; | ||
| 1386 | |||
| 1387 | case 9: { | ||
| 1388 | READ_INT_VALUES(6) | ||
| 1389 | BitTransferSigned(v[1], v[0]); | ||
| 1390 | BitTransferSigned(v[3], v[2]); | ||
| 1391 | BitTransferSigned(v[5], v[4]); | ||
| 1392 | if (v[1] + v[3] + v[5] >= 0) { | ||
| 1393 | ep1 = Pixel(0xFF, v[0], v[2], v[4]); | ||
| 1394 | ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1395 | } else { | ||
| 1396 | ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1397 | ep2 = BlueContract(0xFF, v[0], v[2], v[4]); | ||
| 1398 | } | ||
| 1399 | ep1.ClampByte(); | ||
| 1400 | ep2.ClampByte(); | ||
| 1401 | } break; | ||
| 1402 | |||
| 1403 | case 10: { | ||
| 1404 | READ_UINT_VALUES(6) | ||
| 1405 | ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); | ||
| 1406 | ep2 = Pixel(v[5], v[0], v[1], v[2]); | ||
| 1407 | } break; | ||
| 1408 | |||
| 1409 | case 12: { | ||
| 1410 | READ_UINT_VALUES(8) | ||
| 1411 | if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { | ||
| 1412 | ep1 = Pixel(v[6], v[0], v[2], v[4]); | ||
| 1413 | ep2 = Pixel(v[7], v[1], v[3], v[5]); | ||
| 1414 | } else { | ||
| 1415 | ep1 = BlueContract(v[7], v[1], v[3], v[5]); | ||
| 1416 | ep2 = BlueContract(v[6], v[0], v[2], v[4]); | ||
| 1417 | } | ||
| 1418 | } break; | ||
| 1419 | |||
| 1420 | case 13: { | ||
| 1421 | READ_INT_VALUES(8) | ||
| 1422 | BitTransferSigned(v[1], v[0]); | ||
| 1423 | BitTransferSigned(v[3], v[2]); | ||
| 1424 | BitTransferSigned(v[5], v[4]); | ||
| 1425 | BitTransferSigned(v[7], v[6]); | ||
| 1426 | if (v[1] + v[3] + v[5] >= 0) { | ||
| 1427 | ep1 = Pixel(v[6], v[0], v[2], v[4]); | ||
| 1428 | ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1429 | } else { | ||
| 1430 | ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]); | ||
| 1431 | ep2 = BlueContract(v[6], v[0], v[2], v[4]); | ||
| 1432 | } | ||
| 1433 | ep1.ClampByte(); | ||
| 1434 | ep2.ClampByte(); | ||
| 1435 | } break; | ||
| 1436 | |||
| 1437 | default: | ||
| 1438 | assert(false && "Unsupported color endpoint mode (is it HDR?)"); | ||
| 1439 | break; | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | #undef READ_UINT_VALUES | ||
| 1443 | #undef READ_INT_VALUES | ||
| 1444 | } | ||
| 1445 | |||
| 1446 | static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, | ||
| 1447 | const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { | ||
| 1448 | InputBitStream strm(inBuf); | ||
| 1449 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | ||
| 1450 | |||
| 1451 | // Was there an error? | ||
| 1452 | if (weightParams.m_bError) { | ||
| 1453 | assert(false && "Invalid block mode"); | ||
| 1454 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1455 | return; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | if (weightParams.m_bVoidExtentLDR) { | ||
| 1459 | FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight); | ||
| 1460 | return; | ||
| 1461 | } | ||
| 1462 | |||
| 1463 | if (weightParams.m_bVoidExtentHDR) { | ||
| 1464 | assert(false && "HDR void extent blocks are unsupported!"); | ||
| 1465 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1466 | return; | ||
| 1467 | } | ||
| 1468 | |||
| 1469 | if (weightParams.m_Width > blockWidth) { | ||
| 1470 | assert(false && "Texel weight grid width should be smaller than block width"); | ||
| 1471 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1472 | return; | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | if (weightParams.m_Height > blockHeight) { | ||
| 1476 | assert(false && "Texel weight grid height should be smaller than block height"); | ||
| 1477 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1478 | return; | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | // Read num partitions | ||
| 1482 | u32 nPartitions = strm.ReadBits<2>() + 1; | ||
| 1483 | assert(nPartitions <= 4); | ||
| 1484 | |||
| 1485 | if (nPartitions == 4 && weightParams.m_bDualPlane) { | ||
| 1486 | assert(false && "Dual plane mode is incompatible with four partition blocks"); | ||
| 1487 | FillError(outBuf, blockWidth, blockHeight); | ||
| 1488 | return; | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | // Based on the number of partitions, read the color endpos32 mode for | ||
| 1492 | // each partition. | ||
| 1493 | |||
| 1494 | // Determine partitions, partition index, and color endpos32 modes | ||
| 1495 | s32 planeIdx = -1; | ||
| 1496 | u32 partitionIndex; | ||
| 1497 | u32 colorEndpos32Mode[4] = {0, 0, 0, 0}; | ||
| 1498 | |||
| 1499 | // Define color data. | ||
| 1500 | u8 colorEndpos32Data[16]; | ||
| 1501 | memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data)); | ||
| 1502 | OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0); | ||
| 1503 | |||
| 1504 | // Read extra config data... | ||
| 1505 | u32 baseCEM = 0; | ||
| 1506 | if (nPartitions == 1) { | ||
| 1507 | colorEndpos32Mode[0] = strm.ReadBits<4>(); | ||
| 1508 | partitionIndex = 0; | ||
| 1509 | } else { | ||
| 1510 | partitionIndex = strm.ReadBits<10>(); | ||
| 1511 | baseCEM = strm.ReadBits<6>(); | ||
| 1512 | } | ||
| 1513 | u32 baseMode = (baseCEM & 3); | ||
| 1514 | |||
| 1515 | // Remaining bits are color endpos32 data... | ||
| 1516 | u32 nWeightBits = weightParams.GetPackedBitSize(); | ||
| 1517 | s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead()); | ||
| 1518 | |||
| 1519 | // Consider extra bits prior to texel data... | ||
| 1520 | u32 extraCEMbits = 0; | ||
| 1521 | if (baseMode) { | ||
| 1522 | switch (nPartitions) { | ||
| 1523 | case 2: | ||
| 1524 | extraCEMbits += 2; | ||
| 1525 | break; | ||
| 1526 | case 3: | ||
| 1527 | extraCEMbits += 5; | ||
| 1528 | break; | ||
| 1529 | case 4: | ||
| 1530 | extraCEMbits += 8; | ||
| 1531 | break; | ||
| 1532 | default: | ||
| 1533 | assert(false); | ||
| 1534 | break; | ||
| 1535 | } | ||
| 1536 | } | ||
| 1537 | remainingBits -= extraCEMbits; | ||
| 1538 | |||
| 1539 | // Do we have a dual plane situation? | ||
| 1540 | u32 planeSelectorBits = 0; | ||
| 1541 | if (weightParams.m_bDualPlane) { | ||
| 1542 | planeSelectorBits = 2; | ||
| 1543 | } | ||
| 1544 | remainingBits -= planeSelectorBits; | ||
| 1545 | |||
| 1546 | // Read color data... | ||
| 1547 | u32 colorDataBits = remainingBits; | ||
| 1548 | while (remainingBits > 0) { | ||
| 1549 | u32 nb = std::min(remainingBits, 8); | ||
| 1550 | u32 b = strm.ReadBits(nb); | ||
| 1551 | colorEndpos32Stream.WriteBits(b, nb); | ||
| 1552 | remainingBits -= 8; | ||
| 1553 | } | ||
| 1554 | |||
| 1555 | // Read the plane selection bits | ||
| 1556 | planeIdx = strm.ReadBits(planeSelectorBits); | ||
| 1557 | |||
| 1558 | // Read the rest of the CEM | ||
| 1559 | if (baseMode) { | ||
| 1560 | u32 extraCEM = strm.ReadBits(extraCEMbits); | ||
| 1561 | u32 CEM = (extraCEM << 6) | baseCEM; | ||
| 1562 | CEM >>= 2; | ||
| 1563 | |||
| 1564 | bool C[4] = {0}; | ||
| 1565 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1566 | C[i] = CEM & 1; | ||
| 1567 | CEM >>= 1; | ||
| 1568 | } | ||
| 1569 | |||
| 1570 | u8 M[4] = {0}; | ||
| 1571 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1572 | M[i] = CEM & 3; | ||
| 1573 | CEM >>= 2; | ||
| 1574 | assert(M[i] <= 3); | ||
| 1575 | } | ||
| 1576 | |||
| 1577 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1578 | colorEndpos32Mode[i] = baseMode; | ||
| 1579 | if (!(C[i])) | ||
| 1580 | colorEndpos32Mode[i] -= 1; | ||
| 1581 | colorEndpos32Mode[i] <<= 2; | ||
| 1582 | colorEndpos32Mode[i] |= M[i]; | ||
| 1583 | } | ||
| 1584 | } else if (nPartitions > 1) { | ||
| 1585 | u32 CEM = baseCEM >> 2; | ||
| 1586 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1587 | colorEndpos32Mode[i] = CEM; | ||
| 1588 | } | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | // Make sure everything up till here is sane. | ||
| 1592 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1593 | assert(colorEndpos32Mode[i] < 16); | ||
| 1594 | } | ||
| 1595 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); | ||
| 1596 | |||
| 1597 | // Decode both color data and texel weight data | ||
| 1598 | u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions | ||
| 1599 | DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions, | ||
| 1600 | colorDataBits); | ||
| 1601 | |||
| 1602 | Pixel endpos32s[4][2]; | ||
| 1603 | const u32* colorValuesPtr = colorValues; | ||
| 1604 | for (u32 i = 0; i < nPartitions; i++) { | ||
| 1605 | ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]); | ||
| 1606 | } | ||
| 1607 | |||
| 1608 | // Read the texel weight data.. | ||
| 1609 | std::array<u8, 16> texelWeightData; | ||
| 1610 | std::ranges::copy(inBuf, texelWeightData.begin()); | ||
| 1611 | |||
| 1612 | // Reverse everything | ||
| 1613 | for (u32 i = 0; i < 8; i++) { | ||
| 1614 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits | ||
| 1615 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 | ||
| 1616 | u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); | ||
| 1617 | u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); | ||
| 1618 | #undef REVERSE_BYTE | ||
| 1619 | |||
| 1620 | texelWeightData[i] = b; | ||
| 1621 | texelWeightData[15 - i] = a; | ||
| 1622 | } | ||
| 1623 | |||
| 1624 | // Make sure that higher non-texel bits are set to zero | ||
| 1625 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | ||
| 1626 | if (clearByteStart > 0 && clearByteStart <= texelWeightData.size()) { | ||
| 1627 | texelWeightData[clearByteStart - 1] &= | ||
| 1628 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | ||
| 1629 | std::memset(texelWeightData.data() + clearByteStart, 0, | ||
| 1630 | std::min(16U - clearByteStart, 16U)); | ||
| 1631 | } | ||
| 1632 | |||
| 1633 | IntegerEncodedVector texelWeightValues; | ||
| 1634 | |||
| 1635 | InputBitStream weightStream(texelWeightData); | ||
| 1636 | |||
| 1637 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, | ||
| 1638 | weightParams.GetNumWeightValues()); | ||
| 1639 | |||
| 1640 | // Blocks can be at most 12x12, so we can have as many as 144 weights | ||
| 1641 | u32 weights[2][144]; | ||
| 1642 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); | ||
| 1643 | |||
| 1644 | // Now that we have endpos32s and weights, we can s32erpolate and generate | ||
| 1645 | // the proper decoding... | ||
| 1646 | for (u32 j = 0; j < blockHeight; j++) | ||
| 1647 | for (u32 i = 0; i < blockWidth; i++) { | ||
| 1648 | u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, | ||
| 1649 | (blockHeight * blockWidth) < 32); | ||
| 1650 | assert(partition < nPartitions); | ||
| 1651 | |||
| 1652 | Pixel p; | ||
| 1653 | for (u32 c = 0; c < 4; c++) { | ||
| 1654 | u32 C0 = endpos32s[partition][0].Component(c); | ||
| 1655 | C0 = ReplicateByteTo16(C0); | ||
| 1656 | u32 C1 = endpos32s[partition][1].Component(c); | ||
| 1657 | C1 = ReplicateByteTo16(C1); | ||
| 1658 | |||
| 1659 | u32 plane = 0; | ||
| 1660 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | ||
| 1661 | plane = 1; | ||
| 1662 | } | ||
| 1663 | |||
| 1664 | u32 weight = weights[plane][j * blockWidth + i]; | ||
| 1665 | u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; | ||
| 1666 | if (C == 65535) { | ||
| 1667 | p.Component(c) = 255; | ||
| 1668 | } else { | ||
| 1669 | double Cf = static_cast<double>(C); | ||
| 1670 | p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); | ||
| 1671 | } | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | outBuf[j * blockWidth + i] = p.Pack(); | ||
| 1675 | } | ||
| 1676 | } | ||
| 1677 | |||
| 1678 | } // namespace ASTCC | ||
| 1679 | |||
| 1680 | namespace Tegra::Texture::ASTC { | ||
| 1681 | |||
| 1682 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | ||
| 1683 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { | ||
| 1684 | u32 block_index = 0; | ||
| 1685 | std::size_t depth_offset = 0; | ||
| 1686 | for (u32 z = 0; z < depth; z++) { | ||
| 1687 | for (u32 y = 0; y < height; y += block_height) { | ||
| 1688 | for (u32 x = 0; x < width; x += block_width) { | ||
| 1689 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; | ||
| 1690 | |||
| 1691 | // Blocks can be at most 12x12 | ||
| 1692 | std::array<u32, 12 * 12> uncompData; | ||
| 1693 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | ||
| 1694 | |||
| 1695 | u32 decompWidth = std::min(block_width, width - x); | ||
| 1696 | u32 decompHeight = std::min(block_height, height - y); | ||
| 1697 | |||
| 1698 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); | ||
| 1699 | for (u32 jj = 0; jj < decompHeight; jj++) { | ||
| 1700 | std::memcpy(outRow.data() + jj * width * 4, | ||
| 1701 | uncompData.data() + jj * block_width, decompWidth * 4); | ||
| 1702 | } | ||
| 1703 | ++block_index; | ||
| 1704 | } | ||
| 1705 | } | ||
| 1706 | depth_offset += height * width * 4; | ||
| 1707 | } | ||
| 1708 | } | ||
| 1709 | |||
| 1710 | } // namespace Tegra::Texture::ASTC | ||