Merge pull request #6791 from ameerj/astc-opt

astc_decoder: Various performance and memory optimizations
author: bunnei 2021-08-06 21:45:24 -0700
committer: GitHub 2021-08-06 21:45:24 -0700
commit: 268b5764c70a8300d24c32985dee595046a1e2e1 (patch)
tree: 178317fbc7f34549a93b8e28d9f0b6857aa104c8 /src/video_core/textures
parent: Merge pull request #6799 from ameerj/vp9-fixes (diff)
parent: astc_decoder: Reduce workgroup size (diff)
download: yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.gz
yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.xz
yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.zip
2 files changed, 133 insertions, 134 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 3ab500760..25161df1f 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -151,6 +151,76 @@ private:
    const IntType& m_Bits;
 };
+enum class IntegerEncoding { JustBits, Quint, Trit };
+struct IntegerEncodedValue {
+    constexpr IntegerEncodedValue() = default;
+    constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
+        : encoding{encoding_}, num_bits{num_bits_} {}
+    constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
+        return encoding == other.encoding && num_bits == other.num_bits;
+    }
+    // Returns the number of bits required to encode num_vals values.
+    u32 GetBitLength(u32 num_vals) const {
+        u32 total_bits = num_bits * num_vals;
+        if (encoding == IntegerEncoding::Trit) {
+            total_bits += (num_vals * 8 + 4) / 5;
+        } else if (encoding == IntegerEncoding::Quint) {
+            total_bits += (num_vals * 7 + 2) / 3;
+        }
+        return total_bits;
+    }
+    IntegerEncoding encoding{};
+    u32 num_bits = 0;
+    u32 bit_value = 0;
+    union {
+        u32 quint_value = 0;
+        u32 trit_value;
+    };
+};
+// Returns a new instance of this struct that corresponds to the
+// can take no more than mav_value values
+static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
+    while (mav_value > 0) {
+        u32 check = mav_value + 1;
+        // Is mav_value a power of two?
+        if (!(check & (check - 1))) {
+            return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
+        }
+        // Is mav_value of the type 3*2^n - 1?
+        if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
+            return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
+        }
+        // Is mav_value of the type 5*2^n - 1?
+        if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
+            return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
+        }
+        // Apparently it can't be represented with a bounded integer sequence...
+        // just iterate.
+        mav_value--;
+    }
+    return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
+}
+static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
+    std::array<IntegerEncodedValue, 256> encodings{};
+    for (std::size_t i = 0; i < encodings.size(); ++i) {
+        encodings[i] = CreateEncoding(static_cast<u32>(i));
+    }
+    return encodings;
+}
+static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
 namespace Tegra::Texture::ASTC {
 using IntegerEncodedVector = boost::container::static_vector<
    IntegerEncodedValue, 256,
@@ -521,35 +591,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
    return params;
 }
-static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
+// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
-                              u32 blockHeight) {
+// is the same as [(num_bits - 1):0] and repeats all the way down.
-    // Don't actually care about the void extent, just read the bits...
+template <typename IntType>
-    for (s32 i = 0; i < 4; ++i) {
+static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
-        strm.ReadBits<13>();
+    if (num_bits == 0 || to_bit == 0) {
+        return 0;
    }
+    const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
-    // Decode the RGBA components and renormalize them to the range [0, 255]
+    IntType res = v;
-    u16 r = static_cast<u16>(strm.ReadBits<16>());
+    u32 reslen = num_bits;
-    u16 g = static_cast<u16>(strm.ReadBits<16>());
+    while (reslen < to_bit) {
-    u16 b = static_cast<u16>(strm.ReadBits<16>());
+        u32 comp = 0;
-    u16 a = static_cast<u16>(strm.ReadBits<16>());
+        if (num_bits > to_bit - reslen) {
+            u32 newshift = to_bit - reslen;
-    u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
+            comp = num_bits - newshift;
-               (static_cast<u32>(a) & 0xFF00) << 16;
+            num_bits = newshift;
-    for (u32 j = 0; j < blockHeight; j++) {
-        for (u32 i = 0; i < blockWidth; i++) {
-            outBuf[j * blockWidth + i] = rgba;
        }
+        res = static_cast<IntType>(res << num_bits);
+        res = static_cast<IntType>(res | (v >> comp));
+        reslen += num_bits;
    }
+    return res;
 }
-static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
+static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
-    for (u32 j = 0; j < blockHeight; j++) {
+    return std::size_t(1) << num_bits;
-        for (u32 i = 0; i < blockWidth; i++) {
+}
-            outBuf[j * blockWidth + i] = 0xFFFF00FF;
-        }
+template <typename IntType, u32 num_bits, u32 to_bit>
+static constexpr auto MakeReplicateTable() {
+    std::array<IntType, NumReplicateEntries(num_bits)> table{};
+    for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
+        table[value] = Replicate(value, num_bits, to_bit);
    }
+    return table;
 }
 static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
@@ -572,6 +648,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
 static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
 static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
 static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
+static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
+static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
+static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
 /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
 /// to the runtime implementation
 static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
@@ -1316,6 +1395,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
 #undef READ_INT_VALUES
 }
+static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
+                              u32 blockHeight) {
+    // Don't actually care about the void extent, just read the bits...
+    for (s32 i = 0; i < 4; ++i) {
+        strm.ReadBits<13>();
+    }
+    // Decode the RGBA components and renormalize them to the range [0, 255]
+    u16 r = static_cast<u16>(strm.ReadBits<16>());
+    u16 g = static_cast<u16>(strm.ReadBits<16>());
+    u16 b = static_cast<u16>(strm.ReadBits<16>());
+    u16 a = static_cast<u16>(strm.ReadBits<16>());
+    u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 |
+               (static_cast<u32>(a) & 0xFF00) << 16;
+    for (u32 j = 0; j < blockHeight; j++) {
+        for (u32 i = 0; i < blockWidth; i++) {
+            outBuf[j * blockWidth + i] = rgba;
+        }
+    }
+}
+static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
+    for (u32 j = 0; j < blockHeight; j++) {
+        for (u32 i = 0; i < blockWidth; i++) {
+            outBuf[j * blockWidth + i] = 0xFFFF00FF;
+        }
+    }
+}
 static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
                            const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
    InputBitStream strm(inBuf);
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 0229ae122..14d2beec0 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,117 +9,6 @@
 namespace Tegra::Texture::ASTC {
-enum class IntegerEncoding { JustBits, Quint, Trit };
-struct IntegerEncodedValue {
-    constexpr IntegerEncodedValue() = default;
-    constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
-        : encoding{encoding_}, num_bits{num_bits_} {}
-    constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
-        return encoding == other.encoding && num_bits == other.num_bits;
-    }
-    // Returns the number of bits required to encode num_vals values.
-    u32 GetBitLength(u32 num_vals) const {
-        u32 total_bits = num_bits * num_vals;
-        if (encoding == IntegerEncoding::Trit) {
-            total_bits += (num_vals * 8 + 4) / 5;
-        } else if (encoding == IntegerEncoding::Quint) {
-            total_bits += (num_vals * 7 + 2) / 3;
-        }
-        return total_bits;
-    }
-    IntegerEncoding encoding{};
-    u32 num_bits = 0;
-    u32 bit_value = 0;
-    union {
-        u32 quint_value = 0;
-        u32 trit_value;
-    };
-};
-// Returns a new instance of this struct that corresponds to the
-// can take no more than mav_value values
-constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
-    while (mav_value > 0) {
-        u32 check = mav_value + 1;
-        // Is mav_value a power of two?
-        if (!(check & (check - 1))) {
-            return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
-        }
-        // Is mav_value of the type 3*2^n - 1?
-        if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
-            return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
-        }
-        // Is mav_value of the type 5*2^n - 1?
-        if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
-            return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
-        }
-        // Apparently it can't be represented with a bounded integer sequence...
-        // just iterate.
-        mav_value--;
-    }
-    return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
-}
-constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
-    std::array<IntegerEncodedValue, 256> encodings{};
-    for (std::size_t i = 0; i < encodings.size(); ++i) {
-        encodings[i] = CreateEncoding(static_cast<u32>(i));
-    }
-    return encodings;
-}
-constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
-// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
-// is the same as [(num_bits - 1):0] and repeats all the way down.
-template <typename IntType>
-constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
-    if (num_bits == 0 || to_bit == 0) {
-        return 0;
-    }
-    const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
-    IntType res = v;
-    u32 reslen = num_bits;
-    while (reslen < to_bit) {
-        u32 comp = 0;
-        if (num_bits > to_bit - reslen) {
-            u32 newshift = to_bit - reslen;
-            comp = num_bits - newshift;
-            num_bits = newshift;
-        }
-        res = static_cast<IntType>(res << num_bits);
-        res = static_cast<IntType>(res | (v >> comp));
-        reslen += num_bits;
-    }
-    return res;
-}
-constexpr std::size_t NumReplicateEntries(u32 num_bits) {
-    return std::size_t(1) << num_bits;
-}
-template <typename IntType, u32 num_bits, u32 to_bit>
-constexpr auto MakeReplicateTable() {
-    std::array<IntType, NumReplicateEntries(num_bits)> table{};
-    for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
-        table[value] = Replicate(value, num_bits, to_bit);
-    }
-    return table;
-}
-constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
-constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
-constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
 void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
                uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
author	bunnei	2021-08-06 21:45:24 -0700
committer	GitHub	2021-08-06 21:45:24 -0700
commit	268b5764c70a8300d24c32985dee595046a1e2e1 (patch)
tree	178317fbc7f34549a93b8e28d9f0b6857aa104c8 /src/video_core/textures
parent	Merge pull request #6799 from ameerj/vp9-fixes (diff)
parent	astc_decoder: Reduce workgroup size (diff)
download	yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.gz yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.tar.xz yuzu-268b5764c70a8300d24c32985dee595046a1e2e1.zip

diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 3ab500760..25161df1f 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp
@@ -151,6 +151,76 @@ private:
151	const IntType& m_Bits;	151	const IntType& m_Bits;
152	};	152	};
153		153
		154	enum class IntegerEncoding { JustBits, Quint, Trit };
		155
		156	struct IntegerEncodedValue {
		157	constexpr IntegerEncodedValue() = default;
		158
		159	constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
		160	: encoding{encoding_}, num_bits{num_bits_} {}
		161
		162	constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
		163	return encoding == other.encoding && num_bits == other.num_bits;
		164	}
		165
		166	// Returns the number of bits required to encode num_vals values.
		167	u32 GetBitLength(u32 num_vals) const {
		168	u32 total_bits = num_bits * num_vals;
		169	if (encoding == IntegerEncoding::Trit) {
		170	total_bits += (num_vals * 8 + 4) / 5;
		171	} else if (encoding == IntegerEncoding::Quint) {
		172	total_bits += (num_vals * 7 + 2) / 3;
		173	}
		174	return total_bits;
		175	}
		176
		177	IntegerEncoding encoding{};
		178	u32 num_bits = 0;
		179	u32 bit_value = 0;
		180	union {
		181	u32 quint_value = 0;
		182	u32 trit_value;
		183	};
		184	};
		185
		186	// Returns a new instance of this struct that corresponds to the
		187	// can take no more than mav_value values
		188	static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
		189	while (mav_value > 0) {
		190	u32 check = mav_value + 1;
		191
		192	// Is mav_value a power of two?
		193	if (!(check & (check - 1))) {
		194	return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
		195	}
		196
		197	// Is mav_value of the type 3*2^n - 1?
		198	if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
		199	return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
		200	}
		201
		202	// Is mav_value of the type 5*2^n - 1?
		203	if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
		204	return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
		205	}
		206
		207	// Apparently it can't be represented with a bounded integer sequence...
		208	// just iterate.
		209	mav_value--;
		210	}
		211	return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
		212	}
		213
		214	static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
		215	std::array<IntegerEncodedValue, 256> encodings{};
		216	for (std::size_t i = 0; i < encodings.size(); ++i) {
		217	encodings[i] = CreateEncoding(static_cast<u32>(i));
		218	}
		219	return encodings;
		220	}
		221
		222	static constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
		223
154	namespace Tegra::Texture::ASTC {	224	namespace Tegra::Texture::ASTC {
155	using IntegerEncodedVector = boost::container::static_vector<	225	using IntegerEncodedVector = boost::container::static_vector<
156	IntegerEncodedValue, 256,	226	IntegerEncodedValue, 256,
@@ -521,35 +591,41 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
521	return params;	591	return params;
522	}	592	}
523		593
524	static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,	594	// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
525	u32 blockHeight) {	595	// is the same as [(num_bits - 1):0] and repeats all the way down.
526	// Don't actually care about the void extent, just read the bits...	596	template <typename IntType>
527	for (s32 i = 0; i < 4; ++i) {	597	static constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
528	strm.ReadBits<13>();	598	if (num_bits == 0 \|\| to_bit == 0) {
		599	return 0;
529	}	600	}
530		601	const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
531	// Decode the RGBA components and renormalize them to the range [0, 255]	602	IntType res = v;
532	u16 r = static_cast<u16>(strm.ReadBits<16>());	603	u32 reslen = num_bits;
533	u16 g = static_cast<u16>(strm.ReadBits<16>());	604	while (reslen < to_bit) {
534	u16 b = static_cast<u16>(strm.ReadBits<16>());	605	u32 comp = 0;
535	u16 a = static_cast<u16>(strm.ReadBits<16>());	606	if (num_bits > to_bit - reslen) {
536		607	u32 newshift = to_bit - reslen;
537	u32 rgba = (r >> 8) \| (g & 0xFF00) \| (static_cast<u32>(b) & 0xFF00) << 8 \|	608	comp = num_bits - newshift;
538	(static_cast<u32>(a) & 0xFF00) << 16;	609	num_bits = newshift;
539
540	for (u32 j = 0; j < blockHeight; j++) {
541	for (u32 i = 0; i < blockWidth; i++) {
542	outBuf[j * blockWidth + i] = rgba;
543	}	610	}
		611	res = static_cast<IntType>(res << num_bits);
		612	res = static_cast<IntType>(res \| (v >> comp));
		613	reslen += num_bits;
544	}	614	}
		615	return res;
545	}	616	}
546		617
547	static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {	618	static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
548	for (u32 j = 0; j < blockHeight; j++) {	619	return std::size_t(1) << num_bits;
549	for (u32 i = 0; i < blockWidth; i++) {	620	}
550	outBuf[j * blockWidth + i] = 0xFFFF00FF;	621
551	}	622	template <typename IntType, u32 num_bits, u32 to_bit>
		623	static constexpr auto MakeReplicateTable() {
		624	std::array<IntType, NumReplicateEntries(num_bits)> table{};
		625	for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
		626	table[value] = Replicate(value, num_bits, to_bit);
552	}	627	}
		628	return table;
553	}	629	}
554		630
555	static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();	631	static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
@@ -572,6 +648,9 @@ static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>
572	static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();	648	static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
573	static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();	649	static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
574	static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();	650	static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
		651	static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
		652	static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
		653	static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
575	/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback	654	/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
576	/// to the runtime implementation	655	/// to the runtime implementation
577	static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {	656	static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
@@ -1316,6 +1395,37 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1316	#undef READ_INT_VALUES	1395	#undef READ_INT_VALUES
1317	}	1396	}
1318		1397
		1398	static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
		1399	u32 blockHeight) {
		1400	// Don't actually care about the void extent, just read the bits...
		1401	for (s32 i = 0; i < 4; ++i) {
		1402	strm.ReadBits<13>();
		1403	}
		1404
		1405	// Decode the RGBA components and renormalize them to the range [0, 255]
		1406	u16 r = static_cast<u16>(strm.ReadBits<16>());
		1407	u16 g = static_cast<u16>(strm.ReadBits<16>());
		1408	u16 b = static_cast<u16>(strm.ReadBits<16>());
		1409	u16 a = static_cast<u16>(strm.ReadBits<16>());
		1410
		1411	u32 rgba = (r >> 8) \| (g & 0xFF00) \| (static_cast<u32>(b) & 0xFF00) << 8 \|
		1412	(static_cast<u32>(a) & 0xFF00) << 16;
		1413
		1414	for (u32 j = 0; j < blockHeight; j++) {
		1415	for (u32 i = 0; i < blockWidth; i++) {
		1416	outBuf[j * blockWidth + i] = rgba;
		1417	}
		1418	}
		1419	}
		1420
		1421	static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
		1422	for (u32 j = 0; j < blockHeight; j++) {
		1423	for (u32 i = 0; i < blockWidth; i++) {
		1424	outBuf[j * blockWidth + i] = 0xFFFF00FF;
		1425	}
		1426	}
		1427	}
		1428
1319	static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,	1429	static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1320	const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {	1430	const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1321	InputBitStream strm(inBuf);	1431	InputBitStream strm(inBuf);


diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 0229ae122..14d2beec0 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h
@@ -9,117 +9,6 @@
9		9
10	namespace Tegra::Texture::ASTC {	10	namespace Tegra::Texture::ASTC {
11		11
12	enum class IntegerEncoding { JustBits, Quint, Trit };
13
14	struct IntegerEncodedValue {
15	constexpr IntegerEncodedValue() = default;
16
17	constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_)
18	: encoding{encoding_}, num_bits{num_bits_} {}
19
20	constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const {
21	return encoding == other.encoding && num_bits == other.num_bits;
22	}
23
24	// Returns the number of bits required to encode num_vals values.
25	u32 GetBitLength(u32 num_vals) const {
26	u32 total_bits = num_bits * num_vals;
27	if (encoding == IntegerEncoding::Trit) {
28	total_bits += (num_vals * 8 + 4) / 5;
29	} else if (encoding == IntegerEncoding::Quint) {
30	total_bits += (num_vals * 7 + 2) / 3;
31	}
32	return total_bits;
33	}
34
35	IntegerEncoding encoding{};
36	u32 num_bits = 0;
37	u32 bit_value = 0;
38	union {
39	u32 quint_value = 0;
40	u32 trit_value;
41	};
42	};
43
44	// Returns a new instance of this struct that corresponds to the
45	// can take no more than mav_value values
46	constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) {
47	while (mav_value > 0) {
48	u32 check = mav_value + 1;
49
50	// Is mav_value a power of two?
51	if (!(check & (check - 1))) {
52	return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value));
53	}
54
55	// Is mav_value of the type 3*2^n - 1?
56	if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
57	return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1));
58	}
59
60	// Is mav_value of the type 5*2^n - 1?
61	if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
62	return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1));
63	}
64
65	// Apparently it can't be represented with a bounded integer sequence...
66	// just iterate.
67	mav_value--;
68	}
69	return IntegerEncodedValue(IntegerEncoding::JustBits, 0);
70	}
71
72	constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() {
73	std::array<IntegerEncodedValue, 256> encodings{};
74	for (std::size_t i = 0; i < encodings.size(); ++i) {
75	encodings[i] = CreateEncoding(static_cast<u32>(i));
76	}
77	return encodings;
78	}
79
80	constexpr std::array<IntegerEncodedValue, 256> ASTC_ENCODINGS_VALUES = MakeEncodedValues();
81
82	// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
83	// is the same as [(num_bits - 1):0] and repeats all the way down.
84	template <typename IntType>
85	constexpr IntType Replicate(IntType val, u32 num_bits, u32 to_bit) {
86	if (num_bits == 0 \|\| to_bit == 0) {
87	return 0;
88	}
89	const IntType v = val & static_cast<IntType>((1 << num_bits) - 1);
90	IntType res = v;
91	u32 reslen = num_bits;
92	while (reslen < to_bit) {
93	u32 comp = 0;
94	if (num_bits > to_bit - reslen) {
95	u32 newshift = to_bit - reslen;
96	comp = num_bits - newshift;
97	num_bits = newshift;
98	}
99	res = static_cast<IntType>(res << num_bits);
100	res = static_cast<IntType>(res \| (v >> comp));
101	reslen += num_bits;
102	}
103	return res;
104	}
105
106	constexpr std::size_t NumReplicateEntries(u32 num_bits) {
107	return std::size_t(1) << num_bits;
108	}
109
110	template <typename IntType, u32 num_bits, u32 to_bit>
111	constexpr auto MakeReplicateTable() {
112	std::array<IntType, NumReplicateEntries(num_bits)> table{};
113	for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
114	table[value] = Replicate(value, num_bits, to_bit);
115	}
116	return table;
117	}
118
119	constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
120	constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
121	constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
122
123	void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,	12	void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
124	uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);	13	uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
125		14