summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-06-17 23:50:44 -0400
committerGravatar bunnei2018-06-18 01:56:59 -0400
commit61779fa072fea906410eca3e29ba54fe1ee347d3 (patch)
treecf52473bbca8d54e6edfddf28d874d8a1e50856b /src
parentMerge pull request #569 from bunnei/fix-cache (diff)
downloadyuzu-61779fa072fea906410eca3e29ba54fe1ee347d3.tar.gz
yuzu-61779fa072fea906410eca3e29ba54fe1ee347d3.tar.xz
yuzu-61779fa072fea906410eca3e29ba54fe1ee347d3.zip
gl_rasterizer: Implement texture format ASTC_2D_4X4.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h16
-rw-r--r--src/video_core/textures/astc.cpp1646
-rw-r--r--src/video_core/textures/astc.h15
-rw-r--r--src/video_core/textures/decoders.cpp3
6 files changed, 1709 insertions, 1 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6e193e7e1..c6431e722 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -41,6 +41,8 @@ add_library(video_core STATIC
41 renderer_opengl/maxwell_to_gl.h 41 renderer_opengl/maxwell_to_gl.h
42 renderer_opengl/renderer_opengl.cpp 42 renderer_opengl/renderer_opengl.cpp
43 renderer_opengl/renderer_opengl.h 43 renderer_opengl/renderer_opengl.h
44 textures/astc.cpp
45 textures/astc.h
44 textures/decoders.cpp 46 textures/decoders.cpp
45 textures/decoders.h 47 textures/decoders.h
46 textures/texture.h 48 textures/texture.h
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index e61960cc0..b2a61024e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -28,6 +28,7 @@
28#include "video_core/engines/maxwell_3d.h" 28#include "video_core/engines/maxwell_3d.h"
29#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 29#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
30#include "video_core/renderer_opengl/gl_state.h" 30#include "video_core/renderer_opengl/gl_state.h"
31#include "video_core/textures/astc.h"
31#include "video_core/textures/decoders.h" 32#include "video_core/textures/decoders.h"
32#include "video_core/utils.h" 33#include "video_core/utils.h"
33#include "video_core/video_core.h" 34#include "video_core/video_core.h"
@@ -55,6 +56,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
55 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 56 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
56 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 57 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
57 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 58 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
59 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
58}}; 60}};
59 61
60static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 62static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -86,6 +88,23 @@ static u16 GetResolutionScaleFactor() {
86 : Settings::values.resolution_factor); 88 : Settings::values.resolution_factor);
87} 89}
88 90
91static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
92 u32 block_width{};
93 u32 block_height{};
94
95 switch (format) {
96 case PixelFormat::ASTC_2D_4X4:
97 block_width = 4;
98 block_height = 4;
99 break;
100 default:
101 NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
102 UNREACHABLE();
103 }
104
105 data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
106}
107
89template <bool morton_to_gl, PixelFormat format> 108template <bool morton_to_gl, PixelFormat format>
90void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, 109void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base,
91 Tegra::GPUVAddr start, Tegra::GPUVAddr end) { 110 Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
@@ -97,6 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::
97 auto data = Tegra::Texture::UnswizzleTexture( 116 auto data = Tegra::Texture::UnswizzleTexture(
98 *gpu.memory_manager->GpuToCpuAddress(base), 117 *gpu.memory_manager->GpuToCpuAddress(base),
99 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); 118 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
119
120 if (SurfaceParams::IsFormatASTC(format)) {
121 // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
122 ConvertASTCToRGBA8(data, format, stride, height);
123 }
124
100 std::memcpy(gl_buffer, data.data(), data.size()); 125 std::memcpy(gl_buffer, data.data(), data.size());
101 } else { 126 } else {
102 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check 127 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
@@ -118,7 +143,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
118 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, 143 MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
119 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>, 144 MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
120 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, 145 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
121 MortonCopy<true, PixelFormat::DXN1>, 146 MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
122}; 147};
123 148
124static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, 149static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -137,6 +162,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
137 nullptr, 162 nullptr,
138 nullptr, 163 nullptr,
139 nullptr, 164 nullptr,
165 MortonCopy<false, PixelFormat::ABGR8>,
140}; 166};
141 167
142// Allocate an uninitialized texture of appropriate size and format for the surface 168// Allocate an uninitialized texture of appropriate size and format for the surface
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 0f43e863d..9da945e19 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -65,6 +65,7 @@ struct SurfaceParams {
65 DXT23 = 8, 65 DXT23 = 8,
66 DXT45 = 9, 66 DXT45 = 9,
67 DXN1 = 10, // This is also known as BC4 67 DXN1 = 10, // This is also known as BC4
68 ASTC_2D_4X4 = 11,
68 69
69 Max, 70 Max,
70 Invalid = 255, 71 Invalid = 255,
@@ -111,6 +112,7 @@ struct SurfaceParams {
111 4, // DXT23 112 4, // DXT23
112 4, // DXT45 113 4, // DXT45
113 4, // DXN1 114 4, // DXN1
115 1, // ASTC_2D_4X4
114 }}; 116 }};
115 117
116 ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); 118 ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -136,6 +138,7 @@ struct SurfaceParams {
136 128, // DXT23 138 128, // DXT23
137 128, // DXT45 139 128, // DXT45
138 64, // DXN1 140 64, // DXN1
141 32, // ASTC_2D_4X4
139 }}; 142 }};
140 143
141 ASSERT(static_cast<size_t>(format) < bpp_table.size()); 144 ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -162,6 +165,15 @@ struct SurfaceParams {
162 } 165 }
163 } 166 }
164 167
168 static bool IsFormatASTC(PixelFormat format) {
169 switch (format) {
170 case PixelFormat::ASTC_2D_4X4:
171 return true;
172 default:
173 return false;
174 }
175 }
176
165 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { 177 static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
166 switch (format) { 178 switch (format) {
167 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 179 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
@@ -197,6 +209,8 @@ struct SurfaceParams {
197 return PixelFormat::DXT45; 209 return PixelFormat::DXT45;
198 case Tegra::Texture::TextureFormat::DXN1: 210 case Tegra::Texture::TextureFormat::DXN1:
199 return PixelFormat::DXN1; 211 return PixelFormat::DXN1;
212 case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
213 return PixelFormat::ASTC_2D_4X4;
200 default: 214 default:
201 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 215 NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
202 UNREACHABLE(); 216 UNREACHABLE();
@@ -228,6 +242,8 @@ struct SurfaceParams {
228 return Tegra::Texture::TextureFormat::DXT45; 242 return Tegra::Texture::TextureFormat::DXT45;
229 case PixelFormat::DXN1: 243 case PixelFormat::DXN1:
230 return Tegra::Texture::TextureFormat::DXN1; 244 return Tegra::Texture::TextureFormat::DXN1;
245 case PixelFormat::ASTC_2D_4X4:
246 return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
231 default: 247 default:
232 UNREACHABLE(); 248 UNREACHABLE();
233 } 249 }
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
new file mode 100644
index 000000000..3c4ad1c9d
--- /dev/null
+++ b/src/video_core/textures/astc.cpp
@@ -0,0 +1,1646 @@
1// Copyright 2016 The University of North Carolina at Chapel Hill
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Please send all BUG REPORTS to <pavel@cs.unc.edu>.
16// <http://gamma.cs.unc.edu/FasTC/>
17
18#include <algorithm>
19#include <cassert>
20#include <cstdint>
21#include <cstring>
22#include <vector>
23
24#include "video_core/textures/astc.h"
25
26class BitStream {
27public:
28 BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
29 : m_BitsWritten(0), m_BitsRead(0), m_NumBits(nBits), m_CurByte(ptr),
30 m_NextBit(start_offset % 8), done(false) {}
31
32 int GetBitsWritten() const {
33 return m_BitsWritten;
34 }
35
36 ~BitStream() {}
37
38 void WriteBitsR(unsigned int val, unsigned int nBits) {
39 for (unsigned int i = 0; i < nBits; i++) {
40 WriteBit((val >> (nBits - i - 1)) & 1);
41 }
42 }
43
44 void WriteBits(unsigned int val, unsigned int nBits) {
45 for (unsigned int i = 0; i < nBits; i++) {
46 WriteBit((val >> i) & 1);
47 }
48 }
49
50 int GetBitsRead() const {
51 return m_BitsRead;
52 }
53
54 int ReadBit() {
55
56 int bit = *m_CurByte >> m_NextBit++;
57 while (m_NextBit >= 8) {
58 m_NextBit -= 8;
59 m_CurByte++;
60 }
61
62 m_BitsRead++;
63 return bit & 1;
64 }
65
66 unsigned int ReadBits(unsigned int nBits) {
67 unsigned int ret = 0;
68 for (unsigned int i = 0; i < nBits; i++) {
69 ret |= (ReadBit() & 1) << i;
70 }
71 return ret;
72 }
73
74private:
75 void WriteBit(int b) {
76
77 if (done)
78 return;
79
80 const unsigned int mask = 1 << m_NextBit++;
81
82 // clear the bit
83 *m_CurByte &= ~mask;
84
85 // Write the bit, if necessary
86 if (b)
87 *m_CurByte |= mask;
88
89 // Next byte?
90 if (m_NextBit >= 8) {
91 m_CurByte += 1;
92 m_NextBit = 0;
93 }
94
95 done = done || ++m_BitsWritten >= m_NumBits;
96 }
97
98 int m_BitsWritten;
99 const int m_NumBits;
100 unsigned char* m_CurByte;
101 int m_NextBit;
102 int m_BitsRead;
103
104 bool done;
105};
106
107template <typename IntType>
108class Bits {
109private:
110 const IntType& m_Bits;
111
112 // Don't copy
113 Bits() {}
114 Bits(const Bits&) {}
115 Bits& operator=(const Bits&) {}
116
117public:
118 explicit Bits(IntType& v) : m_Bits(v) {}
119
120 uint8_t operator[](uint32_t bitPos) {
121 return static_cast<uint8_t>((m_Bits >> bitPos) & 1);
122 }
123
124 IntType operator()(uint32_t start, uint32_t end) {
125 if (start == end) {
126 return (*this)[start];
127 } else if (start > end) {
128 uint32_t t = start;
129 start = end;
130 end = t;
131 }
132
133 uint64_t mask = (1 << (end - start + 1)) - 1;
134 return (m_Bits >> start) & mask;
135 }
136};
137
138enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit };
139
140class IntegerEncodedValue {
141private:
142 const EIntegerEncoding m_Encoding;
143 const uint32_t m_NumBits;
144 uint32_t m_BitValue;
145 union {
146 uint32_t m_QuintValue;
147 uint32_t m_TritValue;
148 };
149
150public:
151 // Jank, but we're not doing any heavy lifting in this class, so it's
152 // probably OK. It allows us to use these in std::vectors...
153 IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
154 new (this) IntegerEncodedValue(other);
155 return *this;
156 }
157
158 IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits)
159 : m_Encoding(encoding), m_NumBits(numBits) {}
160
161 EIntegerEncoding GetEncoding() const {
162 return m_Encoding;
163 }
164 uint32_t BaseBitLength() const {
165 return m_NumBits;
166 }
167
168 uint32_t GetBitValue() const {
169 return m_BitValue;
170 }
171 void SetBitValue(uint32_t val) {
172 m_BitValue = val;
173 }
174
175 uint32_t GetTritValue() const {
176 return m_TritValue;
177 }
178 void SetTritValue(uint32_t val) {
179 m_TritValue = val;
180 }
181
182 uint32_t GetQuintValue() const {
183 return m_QuintValue;
184 }
185 void SetQuintValue(uint32_t val) {
186 m_QuintValue = val;
187 }
188
189 bool MatchesEncoding(const IntegerEncodedValue& other) {
190 return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
191 }
192
193 // Returns the number of bits required to encode nVals values.
194 uint32_t GetBitLength(uint32_t nVals) {
195 uint32_t totalBits = m_NumBits * nVals;
196 if (m_Encoding == eIntegerEncoding_Trit) {
197 totalBits += (nVals * 8 + 4) / 5;
198 } else if (m_Encoding == eIntegerEncoding_Quint) {
199 totalBits += (nVals * 7 + 2) / 3;
200 }
201 return totalBits;
202 }
203
204 // Count the number of bits set in a number.
205 static inline uint32_t Popcnt(uint32_t n) {
206 uint32_t c;
207 for (c = 0; n; c++) {
208 n &= n - 1;
209 }
210 return c;
211 }
212
213 // Returns a new instance of this struct that corresponds to the
214 // can take no more than maxval values
215 static IntegerEncodedValue CreateEncoding(uint32_t maxVal) {
216 while (maxVal > 0) {
217 uint32_t check = maxVal + 1;
218
219 // Is maxVal a power of two?
220 if (!(check & (check - 1))) {
221 return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal));
222 }
223
224 // Is maxVal of the type 3*2^n - 1?
225 if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
226 return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1));
227 }
228
229 // Is maxVal of the type 5*2^n - 1?
230 if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
231 return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1));
232 }
233
234 // Apparently it can't be represented with a bounded integer sequence...
235 // just iterate.
236 maxVal--;
237 }
238 return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
239 }
240
241 // Fills result with the values that are encoded in the given
242 // bitstream. We must know beforehand what the maximum possible
243 // value is, and how many values we're decoding.
244 static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
245 uint32_t maxRange, uint32_t nValues) {
246 // Determine encoding parameters
247 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
248
249 // Start decoding
250 uint32_t nValsDecoded = 0;
251 while (nValsDecoded < nValues) {
252 switch (val.GetEncoding()) {
253 case eIntegerEncoding_Quint:
254 DecodeQuintBlock(bits, result, val.BaseBitLength());
255 nValsDecoded += 3;
256 break;
257
258 case eIntegerEncoding_Trit:
259 DecodeTritBlock(bits, result, val.BaseBitLength());
260 nValsDecoded += 5;
261 break;
262
263 case eIntegerEncoding_JustBits:
264 val.SetBitValue(bits.ReadBits(val.BaseBitLength()));
265 result.push_back(val);
266 nValsDecoded++;
267 break;
268 }
269 }
270 }
271
272private:
273 static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
274 uint32_t nBitsPerValue) {
275 // Implement the algorithm in section C.2.12
276 uint32_t m[5];
277 uint32_t t[5];
278 uint32_t T;
279
280 // Read the trit encoded block according to
281 // table C.2.14
282 m[0] = bits.ReadBits(nBitsPerValue);
283 T = bits.ReadBits(2);
284 m[1] = bits.ReadBits(nBitsPerValue);
285 T |= bits.ReadBits(2) << 2;
286 m[2] = bits.ReadBits(nBitsPerValue);
287 T |= bits.ReadBit() << 4;
288 m[3] = bits.ReadBits(nBitsPerValue);
289 T |= bits.ReadBits(2) << 5;
290 m[4] = bits.ReadBits(nBitsPerValue);
291 T |= bits.ReadBit() << 7;
292
293 uint32_t C = 0;
294
295 Bits<uint32_t> Tb(T);
296 if (Tb(2, 4) == 7) {
297 C = (Tb(5, 7) << 2) | Tb(0, 1);
298 t[4] = t[3] = 2;
299 } else {
300 C = Tb(0, 4);
301 if (Tb(5, 6) == 3) {
302 t[4] = 2;
303 t[3] = Tb[7];
304 } else {
305 t[4] = Tb[7];
306 t[3] = Tb(5, 6);
307 }
308 }
309
310 Bits<uint32_t> Cb(C);
311 if (Cb(0, 1) == 3) {
312 t[2] = 2;
313 t[1] = Cb[4];
314 t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
315 } else if (Cb(2, 3) == 3) {
316 t[2] = 2;
317 t[1] = 2;
318 t[0] = Cb(0, 1);
319 } else {
320 t[2] = Cb[4];
321 t[1] = Cb(2, 3);
322 t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
323 }
324
325 for (uint32_t i = 0; i < 5; i++) {
326 IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue);
327 val.SetBitValue(m[i]);
328 val.SetTritValue(t[i]);
329 result.push_back(val);
330 }
331 }
332
333 static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
334 uint32_t nBitsPerValue) {
335 // Implement the algorithm in section C.2.12
336 uint32_t m[3];
337 uint32_t q[3];
338 uint32_t Q;
339
340 // Read the trit encoded block according to
341 // table C.2.15
342 m[0] = bits.ReadBits(nBitsPerValue);
343 Q = bits.ReadBits(3);
344 m[1] = bits.ReadBits(nBitsPerValue);
345 Q |= bits.ReadBits(2) << 3;
346 m[2] = bits.ReadBits(nBitsPerValue);
347 Q |= bits.ReadBits(2) << 5;
348
349 Bits<uint32_t> Qb(Q);
350 if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
351 q[0] = q[1] = 4;
352 q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
353 } else {
354 uint32_t C = 0;
355 if (Qb(1, 2) == 3) {
356 q[2] = 4;
357 C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
358 } else {
359 q[2] = Qb(5, 6);
360 C = Qb(0, 4);
361 }
362
363 Bits<uint32_t> Cb(C);
364 if (Cb(0, 2) == 5) {
365 q[1] = 4;
366 q[0] = Cb(3, 4);
367 } else {
368 q[1] = Cb(3, 4);
369 q[0] = Cb(0, 2);
370 }
371 }
372
373 for (uint32_t i = 0; i < 3; i++) {
374 IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue);
375 val.m_BitValue = m[i];
376 val.m_QuintValue = q[i];
377 result.push_back(val);
378 }
379 }
380};
381
382namespace ASTCC {
383
384struct TexelWeightParams {
385 uint32_t m_Width;
386 uint32_t m_Height;
387 bool m_bDualPlane;
388 uint32_t m_MaxWeight;
389 bool m_bError;
390 bool m_bVoidExtentLDR;
391 bool m_bVoidExtentHDR;
392
393 TexelWeightParams() {
394 memset(this, 0, sizeof(*this));
395 }
396
397 uint32_t GetPackedBitSize() {
398 // How many indices do we have?
399 uint32_t nIdxs = m_Height * m_Width;
400 if (m_bDualPlane) {
401 nIdxs *= 2;
402 }
403
404 return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs);
405 }
406
407 uint32_t GetNumWeightValues() const {
408 uint32_t ret = m_Width * m_Height;
409 if (m_bDualPlane) {
410 ret *= 2;
411 }
412 return ret;
413 }
414};
415
416TexelWeightParams DecodeBlockInfo(BitStream& strm) {
417 TexelWeightParams params;
418
419 // Read the entire block mode all at once
420 uint16_t modeBits = strm.ReadBits(11);
421
422 // Does this match the void extent block mode?
423 if ((modeBits & 0x01FF) == 0x1FC) {
424 if (modeBits & 0x200) {
425 params.m_bVoidExtentHDR = true;
426 } else {
427 params.m_bVoidExtentLDR = true;
428 }
429
430 // Next two bits must be one.
431 if (!(modeBits & 0x400) || !strm.ReadBit()) {
432 params.m_bError = true;
433 }
434
435 return params;
436 }
437
438 // First check if the last four bits are zero
439 if ((modeBits & 0xF) == 0) {
440 params.m_bError = true;
441 return params;
442 }
443
444 // If the last two bits are zero, then if bits
445 // [6-8] are all ones, this is also reserved.
446 if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) {
447 params.m_bError = true;
448 return params;
449 }
450
451 // Otherwise, there is no error... Figure out the layout
452 // of the block mode. Layout is determined by a number
453 // between 0 and 9 corresponding to table C.2.8 of the
454 // ASTC spec.
455 uint32_t layout = 0;
456
457 if ((modeBits & 0x1) || (modeBits & 0x2)) {
458 // layout is in [0-4]
459 if (modeBits & 0x8) {
460 // layout is in [2-4]
461 if (modeBits & 0x4) {
462 // layout is in [3-4]
463 if (modeBits & 0x100) {
464 layout = 4;
465 } else {
466 layout = 3;
467 }
468 } else {
469 layout = 2;
470 }
471 } else {
472 // layout is in [0-1]
473 if (modeBits & 0x4) {
474 layout = 1;
475 } else {
476 layout = 0;
477 }
478 }
479 } else {
480 // layout is in [5-9]
481 if (modeBits & 0x100) {
482 // layout is in [7-9]
483 if (modeBits & 0x80) {
484 // layout is in [7-8]
485 assert((modeBits & 0x40) == 0U);
486 if (modeBits & 0x20) {
487 layout = 8;
488 } else {
489 layout = 7;
490 }
491 } else {
492 layout = 9;
493 }
494 } else {
495 // layout is in [5-6]
496 if (modeBits & 0x80) {
497 layout = 6;
498 } else {
499 layout = 5;
500 }
501 }
502 }
503
504 assert(layout < 10);
505
506 // Determine R
507 uint32_t R = !!(modeBits & 0x10);
508 if (layout < 5) {
509 R |= (modeBits & 0x3) << 1;
510 } else {
511 R |= (modeBits & 0xC) >> 1;
512 }
513 assert(2 <= R && R <= 7);
514
515 // Determine width & height
516 switch (layout) {
517 case 0: {
518 uint32_t A = (modeBits >> 5) & 0x3;
519 uint32_t B = (modeBits >> 7) & 0x3;
520 params.m_Width = B + 4;
521 params.m_Height = A + 2;
522 break;
523 }
524
525 case 1: {
526 uint32_t A = (modeBits >> 5) & 0x3;
527 uint32_t B = (modeBits >> 7) & 0x3;
528 params.m_Width = B + 8;
529 params.m_Height = A + 2;
530 break;
531 }
532
533 case 2: {
534 uint32_t A = (modeBits >> 5) & 0x3;
535 uint32_t B = (modeBits >> 7) & 0x3;
536 params.m_Width = A + 2;
537 params.m_Height = B + 8;
538 break;
539 }
540
541 case 3: {
542 uint32_t A = (modeBits >> 5) & 0x3;
543 uint32_t B = (modeBits >> 7) & 0x1;
544 params.m_Width = A + 2;
545 params.m_Height = B + 6;
546 break;
547 }
548
549 case 4: {
550 uint32_t A = (modeBits >> 5) & 0x3;
551 uint32_t B = (modeBits >> 7) & 0x1;
552 params.m_Width = B + 2;
553 params.m_Height = A + 2;
554 break;
555 }
556
557 case 5: {
558 uint32_t A = (modeBits >> 5) & 0x3;
559 params.m_Width = 12;
560 params.m_Height = A + 2;
561 break;
562 }
563
564 case 6: {
565 uint32_t A = (modeBits >> 5) & 0x3;
566 params.m_Width = A + 2;
567 params.m_Height = 12;
568 break;
569 }
570
571 case 7: {
572 params.m_Width = 6;
573 params.m_Height = 10;
574 break;
575 }
576
577 case 8: {
578 params.m_Width = 10;
579 params.m_Height = 6;
580 break;
581 }
582
583 case 9: {
584 uint32_t A = (modeBits >> 5) & 0x3;
585 uint32_t B = (modeBits >> 9) & 0x3;
586 params.m_Width = A + 6;
587 params.m_Height = B + 6;
588 break;
589 }
590
591 default:
592 assert(!"Don't know this layout...");
593 params.m_bError = true;
594 break;
595 }
596
597 // Determine whether or not we're using dual planes
598 // and/or high precision layouts.
599 bool D = (layout != 9) && (modeBits & 0x400);
600 bool H = (layout != 9) && (modeBits & 0x200);
601
602 if (H) {
603 const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31};
604 params.m_MaxWeight = maxWeights[R - 2];
605 } else {
606 const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7};
607 params.m_MaxWeight = maxWeights[R - 2];
608 }
609
610 params.m_bDualPlane = D;
611
612 return params;
613}
614
615void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
616 uint32_t blockHeight) {
617 // Don't actually care about the void extent, just read the bits...
618 for (int i = 0; i < 4; ++i) {
619 strm.ReadBits(13);
620 }
621
622 // Decode the RGBA components and renormalize them to the range [0, 255]
623 uint16_t r = strm.ReadBits(16);
624 uint16_t g = strm.ReadBits(16);
625 uint16_t b = strm.ReadBits(16);
626 uint16_t a = strm.ReadBits(16);
627
628 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
629 (static_cast<uint32_t>(a) & 0xFF00) << 16;
630
631 for (uint32_t j = 0; j < blockHeight; j++)
632 for (uint32_t i = 0; i < blockWidth; i++) {
633 outBuf[j * blockWidth + i] = rgba;
634 }
635}
636
637void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) {
638 for (uint32_t j = 0; j < blockHeight; j++)
639 for (uint32_t i = 0; i < blockWidth; i++) {
640 outBuf[j * blockWidth + i] = 0xFFFF00FF;
641 }
642}
643
644// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
645// is the same as [(numBits - 1):0] and repeats all the way down.
646template <typename IntType>
647IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
648 if (numBits == 0)
649 return 0;
650 if (toBit == 0)
651 return 0;
652 IntType v = val & ((1 << numBits) - 1);
653 IntType res = v;
654 uint32_t reslen = numBits;
655 while (reslen < toBit) {
656 uint32_t comp = 0;
657 if (numBits > toBit - reslen) {
658 uint32_t newshift = toBit - reslen;
659 comp = numBits - newshift;
660 numBits = newshift;
661 }
662 res <<= numBits;
663 res |= v >> comp;
664 reslen += numBits;
665 }
666 return res;
667}
668
669class Pixel {
670protected:
671 typedef int16_t ChannelType;
672 uint8_t m_BitDepth[4];
673 int16_t color[4];
674
675public:
676 Pixel() {
677 for (int i = 0; i < 4; i++) {
678 m_BitDepth[i] = 8;
679 color[i] = 0;
680 }
681 }
682
683 Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) {
684 for (int i = 0; i < 4; i++)
685 m_BitDepth[i] = bitDepth;
686
687 color[0] = a;
688 color[1] = r;
689 color[2] = g;
690 color[3] = b;
691 }
692
693 // Changes the depth of each pixel. This scales the values to
694 // the appropriate bit depth by either truncating the least
695 // significant bits when going from larger to smaller bit depth
696 // or by repeating the most significant bits when going from
697 // smaller to larger bit depths.
698 void ChangeBitDepth(const uint8_t (&depth)[4]) {
699 for (uint32_t i = 0; i < 4; i++) {
700 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
701 m_BitDepth[i] = depth[i];
702 }
703 }
704
705 template <typename IntType>
706 static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) {
707 float denominator = static_cast<float>((1 << bitDepth) - 1);
708 return static_cast<float>(channel) / denominator;
709 }
710
711 // Changes the bit depth of a single component. See the comment
712 // above for how we do this.
713 static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) {
714 assert(newDepth <= 8);
715 assert(oldDepth <= 8);
716
717 if (oldDepth == newDepth) {
718 // Do nothing
719 return val;
720 } else if (oldDepth == 0 && newDepth != 0) {
721 return (1 << newDepth) - 1;
722 } else if (newDepth > oldDepth) {
723 return Replicate(val, oldDepth, newDepth);
724 } else {
725 // oldDepth > newDepth
726 if (newDepth == 0) {
727 return 0xFF;
728 } else {
729 uint8_t bitsWasted = oldDepth - newDepth;
730 uint16_t v = static_cast<uint16_t>(val);
731 v = (v + (1 << (bitsWasted - 1))) >> bitsWasted;
732 v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1);
733 return static_cast<uint8_t>(v);
734 }
735 }
736
737 assert(!"We shouldn't get here.");
738 return 0;
739 }
740
741 const ChannelType& A() const {
742 return color[0];
743 }
744 ChannelType& A() {
745 return color[0];
746 }
747 const ChannelType& R() const {
748 return color[1];
749 }
750 ChannelType& R() {
751 return color[1];
752 }
753 const ChannelType& G() const {
754 return color[2];
755 }
756 ChannelType& G() {
757 return color[2];
758 }
759 const ChannelType& B() const {
760 return color[3];
761 }
762 ChannelType& B() {
763 return color[3];
764 }
765 const ChannelType& Component(uint32_t idx) const {
766 return color[idx];
767 }
768 ChannelType& Component(uint32_t idx) {
769 return color[idx];
770 }
771
772 void GetBitDepth(uint8_t (&outDepth)[4]) const {
773 for (int i = 0; i < 4; i++) {
774 outDepth[i] = m_BitDepth[i];
775 }
776 }
777
778 // Take all of the components, transform them to their 8-bit variants,
779 // and then pack each channel into an R8G8B8A8 32-bit integer. We assume
780 // that the architecture is little-endian, so the alpha channel will end
781 // up in the most-significant byte.
782 uint32_t Pack() const {
783 Pixel eightBit(*this);
784 const uint8_t eightBitDepth[4] = {8, 8, 8, 8};
785 eightBit.ChangeBitDepth(eightBitDepth);
786
787 uint32_t r = 0;
788 r |= eightBit.A();
789 r <<= 8;
790 r |= eightBit.B();
791 r <<= 8;
792 r |= eightBit.G();
793 r <<= 8;
794 r |= eightBit.R();
795 return r;
796 }
797
798 // Clamps the pixel to the range [0,255]
799 void ClampByte() {
800 for (uint32_t i = 0; i < 4; i++) {
801 color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
802 }
803 }
804
805 void MakeOpaque() {
806 A() = 255;
807 }
808};
809
810void DecodeColorValues(uint32_t* out, uint8_t* data, uint32_t* modes, const uint32_t nPartitions,
811 const uint32_t nBitsForColorData) {
812 // First figure out how many color values we have
813 uint32_t nValues = 0;
814 for (uint32_t i = 0; i < nPartitions; i++) {
815 nValues += ((modes[i] >> 2) + 1) << 1;
816 }
817
818 // Then based on the number of values and the remaining number of bits,
819 // figure out the max value for each of them...
820 uint32_t range = 256;
821 while (--range > 0) {
822 IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range);
823 uint32_t bitLength = val.GetBitLength(nValues);
824 if (bitLength <= nBitsForColorData) {
825 // Find the smallest possible range that matches the given encoding
826 while (--range > 0) {
827 IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range);
828 if (!newval.MatchesEncoding(val)) {
829 break;
830 }
831 }
832
833 // Return to last matching range.
834 range++;
835 break;
836 }
837 }
838
839 // We now have enough to decode our integer sequence.
840 std::vector<IntegerEncodedValue> decodedColorValues;
841 BitStream colorStream(data);
842 IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
843
844 // Once we have the decoded values, we need to dequantize them to the 0-255 range
845 // This procedure is outlined in ASTC spec C.2.13
846 uint32_t outIdx = 0;
847 std::vector<IntegerEncodedValue>::const_iterator itr;
848 for (itr = decodedColorValues.begin(); itr != decodedColorValues.end(); itr++) {
849 // Have we already decoded all that we need?
850 if (outIdx >= nValues) {
851 break;
852 }
853
854 const IntegerEncodedValue& val = *itr;
855 uint32_t bitlen = val.BaseBitLength();
856 uint32_t bitval = val.GetBitValue();
857
858 assert(bitlen >= 1);
859
860 uint32_t A = 0, B = 0, C = 0, D = 0;
861 // A is just the lsb replicated 9 times.
862 A = Replicate(bitval & 1, 1, 9);
863
864 switch (val.GetEncoding()) {
865 // Replicate bits
866 case eIntegerEncoding_JustBits:
867 out[outIdx++] = Replicate(bitval, bitlen, 8);
868 break;
869
870 // Use algorithm in C.2.13
871 case eIntegerEncoding_Trit: {
872
873 D = val.GetTritValue();
874
875 switch (bitlen) {
876 case 1: {
877 C = 204;
878 } break;
879
880 case 2: {
881 C = 93;
882 // B = b000b0bb0
883 uint32_t b = (bitval >> 1) & 1;
884 B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
885 } break;
886
887 case 3: {
888 C = 44;
889 // B = cb000cbcb
890 uint32_t cb = (bitval >> 1) & 3;
891 B = (cb << 7) | (cb << 2) | cb;
892 } break;
893
894 case 4: {
895 C = 22;
896 // B = dcb000dcb
897 uint32_t dcb = (bitval >> 1) & 7;
898 B = (dcb << 6) | dcb;
899 } break;
900
901 case 5: {
902 C = 11;
903 // B = edcb000ed
904 uint32_t edcb = (bitval >> 1) & 0xF;
905 B = (edcb << 5) | (edcb >> 2);
906 } break;
907
908 case 6: {
909 C = 5;
910 // B = fedcb000f
911 uint32_t fedcb = (bitval >> 1) & 0x1F;
912 B = (fedcb << 4) | (fedcb >> 4);
913 } break;
914
915 default:
916 assert(!"Unsupported trit encoding for color values!");
917 break;
918 } // switch(bitlen)
919 } // case eIntegerEncoding_Trit
920 break;
921
922 case eIntegerEncoding_Quint: {
923
924 D = val.GetQuintValue();
925
926 switch (bitlen) {
927 case 1: {
928 C = 113;
929 } break;
930
931 case 2: {
932 C = 54;
933 // B = b0000bb00
934 uint32_t b = (bitval >> 1) & 1;
935 B = (b << 8) | (b << 3) | (b << 2);
936 } break;
937
938 case 3: {
939 C = 26;
940 // B = cb0000cbc
941 uint32_t cb = (bitval >> 1) & 3;
942 B = (cb << 7) | (cb << 1) | (cb >> 1);
943 } break;
944
945 case 4: {
946 C = 13;
947 // B = dcb0000dc
948 uint32_t dcb = (bitval >> 1) & 7;
949 B = (dcb << 6) | (dcb >> 1);
950 } break;
951
952 case 5: {
953 C = 6;
954 // B = edcb0000e
955 uint32_t edcb = (bitval >> 1) & 0xF;
956 B = (edcb << 5) | (edcb >> 3);
957 } break;
958
959 default:
960 assert(!"Unsupported quint encoding for color values!");
961 break;
962 } // switch(bitlen)
963 } // case eIntegerEncoding_Quint
964 break;
965 } // switch(val.GetEncoding())
966
967 if (val.GetEncoding() != eIntegerEncoding_JustBits) {
968 uint32_t T = D * C + B;
969 T ^= A;
970 T = (A & 0x80) | (T >> 2);
971 out[outIdx++] = T;
972 }
973 }
974
975 // Make sure that each of our values is in the proper range...
976 for (uint32_t i = 0; i < nValues; i++) {
977 assert(out[i] <= 255);
978 }
979}
980
981uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
982 uint32_t bitval = val.GetBitValue();
983 uint32_t bitlen = val.BaseBitLength();
984
985 uint32_t A = Replicate(bitval & 1, 1, 7);
986 uint32_t B = 0, C = 0, D = 0;
987
988 uint32_t result = 0;
989 switch (val.GetEncoding()) {
990 case eIntegerEncoding_JustBits:
991 result = Replicate(bitval, bitlen, 6);
992 break;
993
994 case eIntegerEncoding_Trit: {
995 D = val.GetTritValue();
996 assert(D < 3);
997
998 switch (bitlen) {
999 case 0: {
1000 uint32_t results[3] = {0, 32, 63};
1001 result = results[D];
1002 } break;
1003
1004 case 1: {
1005 C = 50;
1006 } break;
1007
1008 case 2: {
1009 C = 23;
1010 uint32_t b = (bitval >> 1) & 1;
1011 B = (b << 6) | (b << 2) | b;
1012 } break;
1013
1014 case 3: {
1015 C = 11;
1016 uint32_t cb = (bitval >> 1) & 3;
1017 B = (cb << 5) | cb;
1018 } break;
1019
1020 default:
1021 assert(!"Invalid trit encoding for texel weight");
1022 break;
1023 }
1024 } break;
1025
1026 case eIntegerEncoding_Quint: {
1027 D = val.GetQuintValue();
1028 assert(D < 5);
1029
1030 switch (bitlen) {
1031 case 0: {
1032 uint32_t results[5] = {0, 16, 32, 47, 63};
1033 result = results[D];
1034 } break;
1035
1036 case 1: {
1037 C = 28;
1038 } break;
1039
1040 case 2: {
1041 C = 13;
1042 uint32_t b = (bitval >> 1) & 1;
1043 B = (b << 6) | (b << 1);
1044 } break;
1045
1046 default:
1047 assert(!"Invalid quint encoding for texel weight");
1048 break;
1049 }
1050 } break;
1051 }
1052
1053 if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) {
1054 // Decode the value...
1055 result = D * C + B;
1056 result ^= A;
1057 result = (A & 0x20) | (result >> 2);
1058 }
1059
1060 assert(result < 64);
1061
1062 // Change from [0,63] to [0,64]
1063 if (result > 32) {
1064 result += 1;
1065 }
1066
1067 return result;
1068}
1069
1070void UnquantizeTexelWeights(uint32_t out[2][144], std::vector<IntegerEncodedValue>& weights,
1071 const TexelWeightParams& params, const uint32_t blockWidth,
1072 const uint32_t blockHeight) {
1073 uint32_t weightIdx = 0;
1074 uint32_t unquantized[2][144];
1075 std::vector<IntegerEncodedValue>::const_iterator itr;
1076 for (itr = weights.begin(); itr != weights.end(); itr++) {
1077 unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
1078
1079 if (params.m_bDualPlane) {
1080 itr++;
1081 unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr);
1082 if (itr == weights.end()) {
1083 break;
1084 }
1085 }
1086
1087 if (++weightIdx >= (params.m_Width * params.m_Height))
1088 break;
1089 }
1090
1091 // Do infill if necessary (Section C.2.18) ...
1092 uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
1093 uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
1094
1095 const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U;
1096 for (uint32_t plane = 0; plane < kPlaneScale; plane++)
1097 for (uint32_t t = 0; t < blockHeight; t++)
1098 for (uint32_t s = 0; s < blockWidth; s++) {
1099 uint32_t cs = Ds * s;
1100 uint32_t ct = Dt * t;
1101
1102 uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6;
1103 uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6;
1104
1105 uint32_t js = gs >> 4;
1106 uint32_t fs = gs & 0xF;
1107
1108 uint32_t jt = gt >> 4;
1109 uint32_t ft = gt & 0x0F;
1110
1111 uint32_t w11 = (fs * ft + 8) >> 4;
1112 uint32_t w10 = ft - w11;
1113 uint32_t w01 = fs - w11;
1114 uint32_t w00 = 16 - fs - ft + w11;
1115
1116 uint32_t v0 = js + jt * params.m_Width;
1117
1118#define FIND_TEXEL(tidx, bidx) \
1119 uint32_t p##bidx = 0; \
1120 do { \
1121 if ((tidx) < (params.m_Width * params.m_Height)) { \
1122 p##bidx = unquantized[plane][(tidx)]; \
1123 } \
1124 } while (0)
1125
1126 FIND_TEXEL(v0, 00);
1127 FIND_TEXEL(v0 + 1, 01);
1128 FIND_TEXEL(v0 + params.m_Width, 10);
1129 FIND_TEXEL(v0 + params.m_Width + 1, 11);
1130
1131#undef FIND_TEXEL
1132
1133 out[plane][t * blockWidth + s] =
1134 (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
1135 }
1136}
1137
1138// Transfers a bit as described in C.2.14
1139static inline void BitTransferSigned(int32_t& a, int32_t& b) {
1140 b >>= 1;
1141 b |= a & 0x80;
1142 a >>= 1;
1143 a &= 0x3F;
1144 if (a & 0x20)
1145 a -= 0x40;
1146}
1147
1148// Adds more precision to the blue channel as described
1149// in C.2.14
1150static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) {
1151 return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1),
1152 static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b));
1153}
1154
1155// Partition selection functions as specified in
1156// C.2.21
1157static inline uint32_t hash52(uint32_t p) {
1158 p ^= p >> 15;
1159 p -= p << 17;
1160 p += p << 7;
1161 p += p << 4;
1162 p ^= p >> 5;
1163 p += p << 16;
1164 p ^= p >> 7;
1165 p ^= p >> 3;
1166 p ^= p << 6;
1167 p ^= p >> 17;
1168 return p;
1169}
1170
1171static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
1172 int32_t partitionCount, int32_t smallBlock) {
1173 if (1 == partitionCount)
1174 return 0;
1175
1176 if (smallBlock) {
1177 x <<= 1;
1178 y <<= 1;
1179 z <<= 1;
1180 }
1181
1182 seed += (partitionCount - 1) * 1024;
1183
1184 uint32_t rnum = hash52(static_cast<uint32_t>(seed));
1185 uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF);
1186 uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF);
1187 uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF);
1188 uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF);
1189 uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF);
1190 uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF);
1191 uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF);
1192 uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF);
1193 uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF);
1194 uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF);
1195 uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
1196 uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
1197
1198 seed1 *= seed1;
1199 seed2 *= seed2;
1200 seed3 *= seed3;
1201 seed4 *= seed4;
1202 seed5 *= seed5;
1203 seed6 *= seed6;
1204 seed7 *= seed7;
1205 seed8 *= seed8;
1206 seed9 *= seed9;
1207 seed10 *= seed10;
1208 seed11 *= seed11;
1209 seed12 *= seed12;
1210
1211 int32_t sh1, sh2, sh3;
1212 if (seed & 1) {
1213 sh1 = (seed & 2) ? 4 : 5;
1214 sh2 = (partitionCount == 3) ? 6 : 5;
1215 } else {
1216 sh1 = (partitionCount == 3) ? 6 : 5;
1217 sh2 = (seed & 2) ? 4 : 5;
1218 }
1219 sh3 = (seed & 0x10) ? sh1 : sh2;
1220
1221 seed1 >>= sh1;
1222 seed2 >>= sh2;
1223 seed3 >>= sh1;
1224 seed4 >>= sh2;
1225 seed5 >>= sh1;
1226 seed6 >>= sh2;
1227 seed7 >>= sh1;
1228 seed8 >>= sh2;
1229 seed9 >>= sh3;
1230 seed10 >>= sh3;
1231 seed11 >>= sh3;
1232 seed12 >>= sh3;
1233
1234 int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
1235 int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
1236 int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
1237 int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
1238
1239 a &= 0x3F;
1240 b &= 0x3F;
1241 c &= 0x3F;
1242 d &= 0x3F;
1243
1244 if (partitionCount < 4)
1245 d = 0;
1246 if (partitionCount < 3)
1247 c = 0;
1248
1249 if (a >= b && a >= c && a >= d)
1250 return 0;
1251 else if (b >= c && b >= d)
1252 return 1;
1253 else if (c >= d)
1254 return 2;
1255 return 3;
1256}
1257
1258static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount,
1259 int32_t smallBlock) {
1260 return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
1261}
1262
1263// Section C.2.14
1264void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues,
1265 uint32_t colorEndpointMode) {
1266#define READ_UINT_VALUES(N) \
1267 uint32_t v[N]; \
1268 for (uint32_t i = 0; i < N; i++) { \
1269 v[i] = *(colorValues++); \
1270 }
1271
1272#define READ_INT_VALUES(N) \
1273 int32_t v[N]; \
1274 for (uint32_t i = 0; i < N; i++) { \
1275 v[i] = static_cast<int32_t>(*(colorValues++)); \
1276 }
1277
1278 switch (colorEndpointMode) {
1279 case 0: {
1280 READ_UINT_VALUES(2)
1281 ep1 = Pixel(0xFF, v[0], v[0], v[0]);
1282 ep2 = Pixel(0xFF, v[1], v[1], v[1]);
1283 } break;
1284
1285 case 1: {
1286 READ_UINT_VALUES(2)
1287 uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0);
1288 uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
1289 ep1 = Pixel(0xFF, L0, L0, L0);
1290 ep2 = Pixel(0xFF, L1, L1, L1);
1291 } break;
1292
1293 case 4: {
1294 READ_UINT_VALUES(4)
1295 ep1 = Pixel(v[2], v[0], v[0], v[0]);
1296 ep2 = Pixel(v[3], v[1], v[1], v[1]);
1297 } break;
1298
1299 case 5: {
1300 READ_INT_VALUES(4)
1301 BitTransferSigned(v[1], v[0]);
1302 BitTransferSigned(v[3], v[2]);
1303 ep1 = Pixel(v[2], v[0], v[0], v[0]);
1304 ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]);
1305 ep1.ClampByte();
1306 ep2.ClampByte();
1307 } break;
1308
1309 case 6: {
1310 READ_UINT_VALUES(4)
1311 ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
1312 ep2 = Pixel(0xFF, v[0], v[1], v[2]);
1313 } break;
1314
1315 case 8: {
1316 READ_UINT_VALUES(6)
1317 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
1318 ep1 = Pixel(0xFF, v[0], v[2], v[4]);
1319 ep2 = Pixel(0xFF, v[1], v[3], v[5]);
1320 } else {
1321 ep1 = BlueContract(0xFF, v[1], v[3], v[5]);
1322 ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
1323 }
1324 } break;
1325
1326 case 9: {
1327 READ_INT_VALUES(6)
1328 BitTransferSigned(v[1], v[0]);
1329 BitTransferSigned(v[3], v[2]);
1330 BitTransferSigned(v[5], v[4]);
1331 if (v[1] + v[3] + v[5] >= 0) {
1332 ep1 = Pixel(0xFF, v[0], v[2], v[4]);
1333 ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1334 } else {
1335 ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1336 ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
1337 }
1338 ep1.ClampByte();
1339 ep2.ClampByte();
1340 } break;
1341
1342 case 10: {
1343 READ_UINT_VALUES(6)
1344 ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
1345 ep2 = Pixel(v[5], v[0], v[1], v[2]);
1346 } break;
1347
1348 case 12: {
1349 READ_UINT_VALUES(8)
1350 if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
1351 ep1 = Pixel(v[6], v[0], v[2], v[4]);
1352 ep2 = Pixel(v[7], v[1], v[3], v[5]);
1353 } else {
1354 ep1 = BlueContract(v[7], v[1], v[3], v[5]);
1355 ep2 = BlueContract(v[6], v[0], v[2], v[4]);
1356 }
1357 } break;
1358
1359 case 13: {
1360 READ_INT_VALUES(8)
1361 BitTransferSigned(v[1], v[0]);
1362 BitTransferSigned(v[3], v[2]);
1363 BitTransferSigned(v[5], v[4]);
1364 BitTransferSigned(v[7], v[6]);
1365 if (v[1] + v[3] + v[5] >= 0) {
1366 ep1 = Pixel(v[6], v[0], v[2], v[4]);
1367 ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1368 } else {
1369 ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
1370 ep2 = BlueContract(v[6], v[0], v[2], v[4]);
1371 }
1372 ep1.ClampByte();
1373 ep2.ClampByte();
1374 } break;
1375
1376 default:
1377 assert(!"Unsupported color endpoint mode (is it HDR?)");
1378 break;
1379 }
1380
1381#undef READ_UINT_VALUES
1382#undef READ_INT_VALUES
1383}
1384
1385void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, const uint32_t blockHeight,
1386 uint32_t* outBuf) {
1387 BitStream strm(inBuf);
1388 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1389
1390 // Was there an error?
1391 if (weightParams.m_bError) {
1392 assert(!"Invalid block mode");
1393 FillError(outBuf, blockWidth, blockHeight);
1394 return;
1395 }
1396
1397 if (weightParams.m_bVoidExtentLDR) {
1398 FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight);
1399 return;
1400 }
1401
1402 if (weightParams.m_bVoidExtentHDR) {
1403 assert(!"HDR void extent blocks are unsupported!");
1404 FillError(outBuf, blockWidth, blockHeight);
1405 return;
1406 }
1407
1408 if (weightParams.m_Width > blockWidth) {
1409 assert(!"Texel weight grid width should be smaller than block width");
1410 FillError(outBuf, blockWidth, blockHeight);
1411 return;
1412 }
1413
1414 if (weightParams.m_Height > blockHeight) {
1415 assert(!"Texel weight grid height should be smaller than block height");
1416 FillError(outBuf, blockWidth, blockHeight);
1417 return;
1418 }
1419
1420 // Read num partitions
1421 uint32_t nPartitions = strm.ReadBits(2) + 1;
1422 assert(nPartitions <= 4);
1423
1424 if (nPartitions == 4 && weightParams.m_bDualPlane) {
1425 assert(!"Dual plane mode is incompatible with four partition blocks");
1426 FillError(outBuf, blockWidth, blockHeight);
1427 return;
1428 }
1429
1430 // Based on the number of partitions, read the color endpoint mode for
1431 // each partition.
1432
1433 // Determine partitions, partition index, and color endpoint modes
1434 int32_t planeIdx = -1;
1435 uint32_t partitionIndex;
1436 uint32_t colorEndpointMode[4] = {0, 0, 0, 0};
1437
1438 // Define color data.
1439 uint8_t colorEndpointData[16];
1440 memset(colorEndpointData, 0, sizeof(colorEndpointData));
1441 BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1442
1443 // Read extra config data...
1444 uint32_t baseCEM = 0;
1445 if (nPartitions == 1) {
1446 colorEndpointMode[0] = strm.ReadBits(4);
1447 partitionIndex = 0;
1448 } else {
1449 partitionIndex = strm.ReadBits(10);
1450 baseCEM = strm.ReadBits(6);
1451 }
1452 uint32_t baseMode = (baseCEM & 3);
1453
1454 // Remaining bits are color endpoint data...
1455 uint32_t nWeightBits = weightParams.GetPackedBitSize();
1456 int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead();
1457
1458 // Consider extra bits prior to texel data...
1459 uint32_t extraCEMbits = 0;
1460 if (baseMode) {
1461 switch (nPartitions) {
1462 case 2:
1463 extraCEMbits += 2;
1464 break;
1465 case 3:
1466 extraCEMbits += 5;
1467 break;
1468 case 4:
1469 extraCEMbits += 8;
1470 break;
1471 default:
1472 assert(false);
1473 break;
1474 }
1475 }
1476 remainingBits -= extraCEMbits;
1477
1478 // Do we have a dual plane situation?
1479 uint32_t planeSelectorBits = 0;
1480 if (weightParams.m_bDualPlane) {
1481 planeSelectorBits = 2;
1482 }
1483 remainingBits -= planeSelectorBits;
1484
1485 // Read color data...
1486 uint32_t colorDataBits = remainingBits;
1487 while (remainingBits > 0) {
1488 uint32_t nb = std::min(remainingBits, 8);
1489 uint32_t b = strm.ReadBits(nb);
1490 colorEndpointStream.WriteBits(b, nb);
1491 remainingBits -= 8;
1492 }
1493
1494 // Read the plane selection bits
1495 planeIdx = strm.ReadBits(planeSelectorBits);
1496
1497 // Read the rest of the CEM
1498 if (baseMode) {
1499 uint32_t extraCEM = strm.ReadBits(extraCEMbits);
1500 uint32_t CEM = (extraCEM << 6) | baseCEM;
1501 CEM >>= 2;
1502
1503 bool C[4] = {0};
1504 for (uint32_t i = 0; i < nPartitions; i++) {
1505 C[i] = CEM & 1;
1506 CEM >>= 1;
1507 }
1508
1509 uint8_t M[4] = {0};
1510 for (uint32_t i = 0; i < nPartitions; i++) {
1511 M[i] = CEM & 3;
1512 CEM >>= 2;
1513 assert(M[i] <= 3);
1514 }
1515
1516 for (uint32_t i = 0; i < nPartitions; i++) {
1517 colorEndpointMode[i] = baseMode;
1518 if (!(C[i]))
1519 colorEndpointMode[i] -= 1;
1520 colorEndpointMode[i] <<= 2;
1521 colorEndpointMode[i] |= M[i];
1522 }
1523 } else if (nPartitions > 1) {
1524 uint32_t CEM = baseCEM >> 2;
1525 for (uint32_t i = 0; i < nPartitions; i++) {
1526 colorEndpointMode[i] = CEM;
1527 }
1528 }
1529
1530 // Make sure everything up till here is sane.
1531 for (uint32_t i = 0; i < nPartitions; i++) {
1532 assert(colorEndpointMode[i] < 16);
1533 }
1534 assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
1535
1536 // Decode both color data and texel weight data
1537 uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions
1538 DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions,
1539 colorDataBits);
1540
1541 Pixel endpoints[4][2];
1542 const uint32_t* colorValuesPtr = colorValues;
1543 for (uint32_t i = 0; i < nPartitions; i++) {
1544 ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]);
1545 }
1546
1547 // Read the texel weight data..
1548 uint8_t texelWeightData[16];
1549 memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
1550
1551 // Reverse everything
1552 for (uint32_t i = 0; i < 8; i++) {
1553// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
1554#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
1555 unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i]));
1556 unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i]));
1557#undef REVERSE_BYTE
1558
1559 texelWeightData[i] = b;
1560 texelWeightData[15 - i] = a;
1561 }
1562
1563 // Make sure that higher non-texel bits are set to zero
1564 const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1565 texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1;
1566 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1567
1568 std::vector<IntegerEncodedValue> texelWeightValues;
1569 BitStream weightStream(texelWeightData);
1570
1571 IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1572 weightParams.m_MaxWeight,
1573 weightParams.GetNumWeightValues());
1574
1575 // Blocks can be at most 12x12, so we can have as many as 144 weights
1576 uint32_t weights[2][144];
1577 UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
1578
1579 // Now that we have endpoints and weights, we can interpolate and generate
1580 // the proper decoding...
1581 for (uint32_t j = 0; j < blockHeight; j++)
1582 for (uint32_t i = 0; i < blockWidth; i++) {
1583 uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions,
1584 (blockHeight * blockWidth) < 32);
1585 assert(partition < nPartitions);
1586
1587 Pixel p;
1588 for (uint32_t c = 0; c < 4; c++) {
1589 uint32_t C0 = endpoints[partition][0].Component(c);
1590 C0 = Replicate(C0, 8, 16);
1591 uint32_t C1 = endpoints[partition][1].Component(c);
1592 C1 = Replicate(C1, 8, 16);
1593
1594 uint32_t plane = 0;
1595 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
1596 plane = 1;
1597 }
1598
1599 uint32_t weight = weights[plane][j * blockWidth + i];
1600 uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
1601 if (C == 65535) {
1602 p.Component(c) = 255;
1603 } else {
1604 double Cf = static_cast<double>(C);
1605 p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5);
1606 }
1607 }
1608
1609 outBuf[j * blockWidth + i] = p.Pack();
1610 }
1611}
1612
1613} // namespace ASTCC
1614
1615namespace Tegra::Texture::ASTC {
1616
1617std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
1618 uint32_t block_width, uint32_t block_height) {
1619 uint32_t blockIdx = 0;
1620 std::vector<uint8_t> outData;
1621 outData.resize(height * width * 4);
1622 for (uint32_t j = 0; j < height; j += block_height) {
1623 for (uint32_t i = 0; i < width; i += block_width) {
1624
1625 uint8_t* blockPtr = data.data() + blockIdx * 16;
1626
1627 // Blocks can be at most 12x12
1628 uint32_t uncompData[144];
1629 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1630
1631 uint32_t decompWidth = std::min(block_width, width - i);
1632 uint32_t decompHeight = std::min(block_height, height - j);
1633
1634 uint8_t* outRow = outData.data() + (j * width + i) * 4;
1635 for (uint32_t jj = 0; jj < decompHeight; jj++) {
1636 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
1637 }
1638
1639 blockIdx++;
1640 }
1641 }
1642
1643 return outData;
1644}
1645
1646} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
new file mode 100644
index 000000000..f0d7c0e56
--- /dev/null
+++ b/src/video_core/textures/astc.h
@@ -0,0 +1,15 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstdint>
8#include <vector>
9
10namespace Tegra::Texture::ASTC {
11
12std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
13 uint32_t block_width, uint32_t block_height);
14
15} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7bf9c4c4b..0db4367f1 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -53,6 +53,7 @@ u32 BytesPerPixel(TextureFormat format) {
53 case TextureFormat::DXT45: 53 case TextureFormat::DXT45:
54 // In this case a 'pixel' actually refers to a 4x4 tile. 54 // In this case a 'pixel' actually refers to a 4x4 tile.
55 return 16; 55 return 16;
56 case TextureFormat::ASTC_2D_4X4:
56 case TextureFormat::A8R8G8B8: 57 case TextureFormat::A8R8G8B8:
57 case TextureFormat::A2B10G10R10: 58 case TextureFormat::A2B10G10R10:
58 case TextureFormat::BF10GF11RF11: 59 case TextureFormat::BF10GF11RF11:
@@ -94,6 +95,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
94 case TextureFormat::R8: 95 case TextureFormat::R8:
95 case TextureFormat::R16_G16_B16_A16: 96 case TextureFormat::R16_G16_B16_A16:
96 case TextureFormat::BF10GF11RF11: 97 case TextureFormat::BF10GF11RF11:
98 case TextureFormat::ASTC_2D_4X4:
97 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, 99 CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
98 unswizzled_data.data(), true, block_height); 100 unswizzled_data.data(), true, block_height);
99 break; 101 break;
@@ -115,6 +117,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
115 case TextureFormat::DXT23: 117 case TextureFormat::DXT23:
116 case TextureFormat::DXT45: 118 case TextureFormat::DXT45:
117 case TextureFormat::DXN1: 119 case TextureFormat::DXN1:
120 case TextureFormat::ASTC_2D_4X4:
118 case TextureFormat::A8R8G8B8: 121 case TextureFormat::A8R8G8B8:
119 case TextureFormat::A2B10G10R10: 122 case TextureFormat::A2B10G10R10:
120 case TextureFormat::A1B5G5R5: 123 case TextureFormat::A1B5G5R5: