summaryrefslogtreecommitdiff
path: root/src/video_core/textures/astc.h
diff options
context:
space:
mode:
authorGravatar ameerj2021-02-13 15:50:12 -0500
committerGravatar ameerj2021-03-13 12:16:03 -0500
commit2985e5e94c82febcf215feb0023f4184b38bb24a (patch)
tree7b7cd8be3605560707a74a74c281577920a24248 /src/video_core/textures/astc.h
parentMerge pull request #6053 from Morph1984/time-CalculateSpanBetween (diff)
downloadyuzu-2985e5e94c82febcf215feb0023f4184b38bb24a.tar.gz
yuzu-2985e5e94c82febcf215feb0023f4184b38bb24a.tar.xz
yuzu-2985e5e94c82febcf215feb0023f4184b38bb24a.zip
renderer_opengl: Accelerate ASTC texture decoding with a compute shader
ASTC texture decoding is currently handled by a CPU decoder for GPU's without native ASTC decoding support (most desktop GPUs). This is the cause for noticeable performance degradation in titles which use the format extensively. This commit adds support to accelerate ASTC decoding using a compute shader on OpenGL for GPUs without native support.
Diffstat (limited to 'src/video_core/textures/astc.h')
-rw-r--r--src/video_core/textures/astc.h190
1 files changed, 190 insertions, 0 deletions
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 9105119bc..bc8bddaec 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -8,6 +8,196 @@
8 8
9namespace Tegra::Texture::ASTC { 9namespace Tegra::Texture::ASTC {
10 10
11/// Count the number of bits set in a number.
12constexpr u32 Popcnt(u32 n) {
13 u32 c = 0;
14 for (; n; c++) {
15 n &= n - 1;
16 }
17 return c;
18}
19
20enum class IntegerEncoding { JustBits, Qus32, Trit };
21
22struct IntegerEncodedValue {
23 constexpr IntegerEncodedValue() = default;
24
25 constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
26 : encoding{encoding_}, num_bits{num_bits_} {}
27
28 constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
29 return encoding == other.encoding && num_bits == other.num_bits;
30 }
31
32 // Returns the number of bits required to encode nVals values.
33 u32 GetBitLength(u32 nVals) const {
34 u32 totalBits = num_bits * nVals;
35 if (encoding == IntegerEncoding::Trit) {
36 totalBits += (nVals * 8 + 4) / 5;
37 } else if (encoding == IntegerEncoding::Qus32) {
38 totalBits += (nVals * 7 + 2) / 3;
39 }
40 return totalBits;
41 }
42
43 IntegerEncoding encoding{};
44 u32 num_bits = 0;
45 u32 bit_value = 0;
46 union {
47 u32 qus32_value = 0;
48 u32 trit_value;
49 };
50};
51
52// Returns a new instance of this struct that corresponds to the
53// can take no more than maxval values
54static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) {
55 while (maxVal > 0) {
56 u32 check = maxVal + 1;
57
58 // Is maxVal a power of two?
59 if (!(check & (check - 1))) {
60 return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal));
61 }
62
63 // Is maxVal of the type 3*2^n - 1?
64 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
65 return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1));
66 }
67
68 // Is maxVal of the type 5*2^n - 1?
69 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
70 return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1));
71 }
72
73 // Apparently it can't be represented with a bounded integer sequence...
74 // just iterate.
75 maxVal--;
76 }
77 return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
78}
79
80static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
81 std::array<IntegerEncodedValue, 256> encodings{};
82 for (std::size_t i = 0; i < encodings.size(); ++i) {
83 encodings[i] = CreateEncoding(static_cast<u32>(i));
84 }
85 return encodings;
86}
87
88static constexpr std::array<IntegerEncodedValue, 256> EncodingsValues = MakeEncodedValues();
89
90// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
91// is the same as [(numBits - 1):0] and repeats all the way down.
92template <typename IntType>
93static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) {
94 if (numBits == 0) {
95 return 0;
96 }
97 if (toBit == 0) {
98 return 0;
99 }
100 const IntType v = val & static_cast<IntType>((1 << numBits) - 1);
101 IntType res = v;
102 u32 reslen = numBits;
103 while (reslen < toBit) {
104 u32 comp = 0;
105 if (numBits > toBit - reslen) {
106 u32 newshift = toBit - reslen;
107 comp = numBits - newshift;
108 numBits = newshift;
109 }
110 res = static_cast<IntType>(res << numBits);
111 res = static_cast<IntType>(res | (v >> comp));
112 reslen += numBits;
113 }
114 return res;
115}
116
117static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
118 return std::size_t(1) << num_bits;
119}
120
121template <typename IntType, u32 num_bits, u32 to_bit>
122static constexpr auto MakeReplicateTable() {
123 std::array<IntType, NumReplicateEntries(num_bits)> table{};
124 for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
125 table[value] = Replicate(value, num_bits, to_bit);
126 }
127 return table;
128}
129
130static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
131static constexpr u32 ReplicateByteTo16(std::size_t value) {
132 return REPLICATE_BYTE_TO_16_TABLE[value];
133}
134
135static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>();
136static constexpr u32 ReplicateBitTo7(std::size_t value) {
137 return REPLICATE_BIT_TO_7_TABLE[value];
138}
139
140static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>();
141static constexpr u32 ReplicateBitTo9(std::size_t value) {
142 return REPLICATE_BIT_TO_9_TABLE[value];
143}
144
145static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>();
146static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>();
147static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
148static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
149static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
150static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
151static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
152static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
153/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
154/// to the runtime implementation
155static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
156 switch (num_bits) {
157 case 1:
158 return REPLICATE_1_BIT_TO_8_TABLE[value];
159 case 2:
160 return REPLICATE_2_BIT_TO_8_TABLE[value];
161 case 3:
162 return REPLICATE_3_BIT_TO_8_TABLE[value];
163 case 4:
164 return REPLICATE_4_BIT_TO_8_TABLE[value];
165 case 5:
166 return REPLICATE_5_BIT_TO_8_TABLE[value];
167 case 6:
168 return REPLICATE_6_BIT_TO_8_TABLE[value];
169 case 7:
170 return REPLICATE_7_BIT_TO_8_TABLE[value];
171 case 8:
172 return REPLICATE_8_BIT_TO_8_TABLE[value];
173 default:
174 return Replicate(value, num_bits, 8);
175 }
176}
177
178static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>();
179static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>();
180static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>();
181static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>();
182static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>();
183
184static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) {
185 switch (num_bits) {
186 case 1:
187 return REPLICATE_1_BIT_TO_6_TABLE[value];
188 case 2:
189 return REPLICATE_2_BIT_TO_6_TABLE[value];
190 case 3:
191 return REPLICATE_3_BIT_TO_6_TABLE[value];
192 case 4:
193 return REPLICATE_4_BIT_TO_6_TABLE[value];
194 case 5:
195 return REPLICATE_5_BIT_TO_6_TABLE[value];
196 default:
197 return Replicate(value, num_bits, 6);
198 }
199}
200
11void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, 201void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
12 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); 202 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
13 203