summaryrefslogtreecommitdiff
path: root/src/video_core/textures
diff options
context:
space:
mode:
authorGravatar bunnei2021-08-06 21:45:24 -0700
committerGravatar GitHub2021-08-06 21:45:24 -0700
commit268b5764c70a8300d24c32985dee595046a1e2e1 (patch)
tree178317fbc7f34549a93b8e28d9f0b6857aa104c8 /src/video_core/textures
parentMerge pull request #6799 from ameerj/vp9-fixes (diff)
parentastc_decoder: Reduce workgroup size (diff)
downloadyuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.gz
yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.xz
yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.zip
Merge pull request #6791 from ameerj/astc-opt
astc_decoder: Various performance and memory optimizations
Diffstat (limited to 'src/video_core/textures')
-rw-r--r--src/video_core/textures/astc.cpp156
-rw-r--r--src/video_core/textures/astc.h111
2 files changed, 133 insertions, 134 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 3ab500760..25161df1f 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -151,6 +151,76 @@ private:
151 const IntType& m_Bits; 151 const IntType& m_Bits;
152}; 152};
153 153
154enum class IntegerEncoding { JustBits, Quint, Trit };
155
156struct IntegerEncodedValue {
157 constexpr IntegerEncodedValue() = default;
158
159 constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
160 : encoding{encoding_}, num_bits{num_bits_} {}
161
162 constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
163 return encoding == other.encoding && num_bits == other.num_bits;
164 }
165
166 // Returns the number of bits required to encode num_vals values.
167 u32 GetBitLength(u32 num_vals) const {
168 u32 total_bits = num_bits * num_vals;
169 if (encoding == IntegerEncoding::Trit) {
170 total_bits += (num_vals * 8 + 4) / 5;
171 } else if (encoding == IntegerEncoding::Quint) {
172 total_bits += (num_vals * 7 + 2) / 3;
173 }
174 return total_bits;
175 }
176
177 IntegerEncoding encoding{};
178 u32 num_bits = 0;
179 u32 bit_value = 0;
180 union {
181 u32 quint_value = 0;
182 u32 trit_value;
183 };
184};
185
186// Returns a new instance of this struct that corresponds to the
187// can take no more than mav_value values
188static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
189 while (mav_value > 0) {
190 u32 check = mav_value + 1;
191
192 // Is mav_value a power of two?
193 if (!(check & (check - 1))) {
194 return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
195 }
196
197 // Is mav_value of the type 3*2^n - 1?
198 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
199 return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
200 }
201
202 // Is mav_value of the type 5*2^n - 1?
203 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
204 return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
205 }
206
207 // Apparently it can't be represented with a bounded integer sequence...
208 // just iterate.
209 mav_value--;
210 }
211 return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
212}
213
214static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
215 std::array<IntegerEncodedValue, 256> encodings{};
216 for (std::size_t i = 0; i < encodings.size(); ++i) {
217 encodings[i] = CreateEncoding(static_cast<u32>(i));
218 }
219 return encodings;
220}
221
222static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
223
154namespace Tegra::Texture::ASTC { 224namespace Tegra::Texture::ASTC {
155using IntegerEncodedVector = boost::container::static_vector< 225using IntegerEncodedVector = boost::container::static_vector<
156 IntegerEncodedValue, 256, 226 IntegerEncodedValue, 256,
@@ -521,35 +591,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
521 return params; 591 return params;
522} 592}
523 593
524static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, 594// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
525 u32 blockHeight) { 595// is the same as [(num_bits - 1):0] and repeats all the way down.
526 // Don't actually care about the void extent, just read the bits... 596template <typename IntType>
527 for (s32 i = 0; i < 4; ++i) { 597static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
528 strm.ReadBits<13>(); 598 if (num_bits == 0 || to_bit == 0) {
599 return 0;
529 } 600 }
530 601 const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
531 // Decode the RGBA components and renormalize them to the range [0, 255] 602 IntType res = v;
532 u16 r = static_cast<u16>(strm.ReadBits<16>()); 603 u32 reslen = num_bits;
533 u16 g = static_cast<u16>(strm.ReadBits<16>()); 604 while (reslen < to_bit) {
534 u16 b = static_cast<u16>(strm.ReadBits<16>()); 605 u32 comp = 0;
535 u16 a = static_cast<u16>(strm.ReadBits<16>()); 606 if (num_bits > to_bit - reslen) {
536 607 u32 newshift = to_bit - reslen;
537 u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | 608 comp = num_bits - newshift;
538 (static_cast<u32>(a) & 0xFF00) << 16; 609 num_bits = newshift;
539
540 for (u32 j = 0; j < blockHeight; j++) {
541 for (u32 i = 0; i < blockWidth; i++) {
542 outBuf[j * blockWidth + i] = rgba;
543 } 610 }
611 res = static_cast<IntType>(res << num_bits);
612 res = static_cast<IntType>(res | (v >> comp));
613 reslen += num_bits;
544 } 614 }
615 return res;
545} 616}
546 617
547static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { 618static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
548 for (u32 j = 0; j < blockHeight; j++) { 619 return std::size_t(1) << num_bits;
549 for (u32 i = 0; i < blockWidth; i++) { 620}
550 outBuf[j * blockWidth + i] = 0xFFFF00FF; 621
551 } 622template <typename IntType, u32 num_bits, u32 to_bit>
623static constexpr auto MakeReplicateTable() {
624 std::array<IntType, NumReplicateEntries(num_bits)> table{};
625 for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
626 table[value] = Replicate(value, num_bits, to_bit);
552 } 627 }
628 return table;
553} 629}
554 630
555static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); 631static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
@@ -572,6 +648,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
572static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); 648static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
573static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); 649static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
574static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); 650static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
651static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
652static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
653static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
575/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback 654/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
576/// to the runtime implementation 655/// to the runtime implementation
577static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { 656static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
@@ -1316,6 +1395,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1316#undef READ_INT_VALUES 1395#undef READ_INT_VALUES
1317} 1396}
1318 1397
1398static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
1399 u32 blockHeight) {
1400 // Don't actually care about the void extent, just read the bits...
1401 for (s32 i = 0; i < 4; ++i) {
1402 strm.ReadBits<13>();
1403 }
1404
1405 // Decode the RGBA components and renormalize them to the range [0, 255]
1406 u16 r = static_cast<u16>(strm.ReadBits<16>());
1407 u16 g = static_cast<u16>(strm.ReadBits<16>());
1408 u16 b = static_cast<u16>(strm.ReadBits<16>());
1409 u16 a = static_cast<u16>(strm.ReadBits<16>());
1410
1411 u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
1412 (static_cast<u32>(a) & 0xFF00) << 16;
1413
1414 for (u32 j = 0; j < blockHeight; j++) {
1415 for (u32 i = 0; i < blockWidth; i++) {
1416 outBuf[j * blockWidth + i] = rgba;
1417 }
1418 }
1419}
1420
1421static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
1422 for (u32 j = 0; j < blockHeight; j++) {
1423 for (u32 i = 0; i < blockWidth; i++) {
1424 outBuf[j * blockWidth + i] = 0xFFFF00FF;
1425 }
1426 }
1427}
1428
1319static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, 1429static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1320 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { 1430 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1321 InputBitStream strm(inBuf); 1431 InputBitStream strm(inBuf);
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 0229ae122..14d2beec0 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,117 +9,6 @@
9 9
10namespace Tegra::Texture::ASTC { 10namespace Tegra::Texture::ASTC {
11 11
12enum class IntegerEncoding { JustBits, Quint, Trit };
13
14struct IntegerEncodedValue {
15 constexpr IntegerEncodedValue() = default;
16
17 constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
18 : encoding{encoding_}, num_bits{num_bits_} {}
19
20 constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
21 return encoding == other.encoding && num_bits == other.num_bits;
22 }
23
24 // Returns the number of bits required to encode num_vals values.
25 u32 GetBitLength(u32 num_vals) const {
26 u32 total_bits = num_bits * num_vals;
27 if (encoding == IntegerEncoding::Trit) {
28 total_bits += (num_vals * 8 + 4) / 5;
29 } else if (encoding == IntegerEncoding::Quint) {
30 total_bits += (num_vals * 7 + 2) / 3;
31 }
32 return total_bits;
33 }
34
35 IntegerEncoding encoding{};
36 u32 num_bits = 0;
37 u32 bit_value = 0;
38 union {
39 u32 quint_value = 0;
40 u32 trit_value;
41 };
42};
43
44// Returns a new instance of this struct that corresponds to the
45// can take no more than mav_value values
46constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
47 while (mav_value > 0) {
48 u32 check = mav_value + 1;
49
50 // Is mav_value a power of two?
51 if (!(check & (check - 1))) {
52 return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
53 }
54
55 // Is mav_value of the type 3*2^n - 1?
56 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
57 return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
58 }
59
60 // Is mav_value of the type 5*2^n - 1?
61 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
62 return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
63 }
64
65 // Apparently it can't be represented with a bounded integer sequence...
66 // just iterate.
67 mav_value--;
68 }
69 return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
70}
71
72constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
73 std::array<IntegerEncodedValue, 256> encodings{};
74 for (std::size_t i = 0; i < encodings.size(); ++i) {
75 encodings[i] = CreateEncoding(static_cast<u32>(i));
76 }
77 return encodings;
78}
79
80constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
81
82// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
83// is the same as [(num_bits - 1):0] and repeats all the way down.
84template <typename IntType>
85constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
86 if (num_bits == 0 || to_bit == 0) {
87 return 0;
88 }
89 const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
90 IntType res = v;
91 u32 reslen = num_bits;
92 while (reslen < to_bit) {
93 u32 comp = 0;
94 if (num_bits > to_bit - reslen) {
95 u32 newshift = to_bit - reslen;
96 comp = num_bits - newshift;
97 num_bits = newshift;
98 }
99 res = static_cast<IntType>(res << num_bits);
100 res = static_cast<IntType>(res | (v >> comp));
101 reslen += num_bits;
102 }
103 return res;
104}
105
106constexpr std::size_t NumReplicateEntries(u32 num_bits) {
107 return std::size_t(1) << num_bits;
108}
109
110template <typename IntType, u32 num_bits, u32 to_bit>
111constexpr auto MakeReplicateTable() {
112 std::array<IntType, NumReplicateEntries(num_bits)> table{};
113 for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
114 table[value] = Replicate(value, num_bits, to_bit);
115 }
116 return table;
117}
118
119constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
120constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
121constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
122
123void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, 12void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
124 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); 13 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
125 14