video_core: Rewrite the texture cache

The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
author: ReinUsesLisp 2020-12-30 02:25:23 -0300
committer: ReinUsesLisp 2020-12-30 03:38:50 -0300
commit: 9764c13d6d2977903f407761b27d847c0056e1c4 (patch)
tree: f6f5d6d6379b0404147969e7d1f548ed3d49ca01 /src/video_core/textures/astc.cpp
parent: video_core: Add a delayed destruction ring abstraction (diff)
download: yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.gz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.xz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.zip
1 files changed, 28 insertions, 30 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstring>
+#include <span>
 #include <vector>
 #include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
    return params;
 }
-static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
                              u32 blockHeight) {
    // Don't actually care about the void extent, just read the bits...
    for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
    }
 }
-static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
+static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
    for (u32 j = 0; j < blockHeight; j++) {
        for (u32 i = 0; i < blockWidth; i++) {
            outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
 #undef READ_INT_VALUES
 }
-static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,
+static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
-                            u32* outBuf) {
+                            const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
-    InputBitStream strm(inBuf);
+    InputBitStream strm(inBuf.data());
    TexelWeightParams weightParams = DecodeBlockInfo(strm);
    // Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
    }
    // Read the texel weight data..
-    u8 texelWeightData[16];
+    std::array<u8, 16> texelWeightData;
-    memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
+    std::ranges::copy(inBuf, texelWeightData.begin());
    // Reverse everything
    for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
    // Make sure that higher non-texel bits are set to zero
    const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
-    texelWeightData[clearByteStart - 1] =
+    if (clearByteStart > 0) {
-        texelWeightData[clearByteStart - 1] &
+        texelWeightData[clearByteStart - 1] &=
-        static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
+            static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
-    memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
+    }
+    std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
    IntegerEncodedVector texelWeightValues;
-    InputBitStream weightStream(texelWeightData);
+    InputBitStream weightStream(texelWeightData.data());
    DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
                          weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
 namespace Tegra::Texture::ASTC {
-std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,
+void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
-                           u32 block_height) {
+                uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
-    u32 blockIdx = 0;
+    u32 block_index = 0;
    std::size_t depth_offset = 0;
-    std::vector<u8> outData(height * width * depth * 4);
+    for (u32 z = 0; z < depth; z++) {
-    for (u32 k = 0; k < depth; k++) {
+        for (u32 y = 0; y < height; y += block_height) {
-        for (u32 j = 0; j < height; j += block_height) {
+            for (u32 x = 0; x < width; x += block_width) {
-            for (u32 i = 0; i < width; i += block_width) {
+                const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
-                const u8* blockPtr = data + blockIdx * 16;
                // Blocks can be at most 12x12
-                u32 uncompData[144];
+                std::array<u32, 12 * 12> uncompData;
                ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
-                u32 decompWidth = std::min(block_width, width - i);
+                u32 decompWidth = std::min(block_width, width - x);
-                u32 decompHeight = std::min(block_height, height - j);
+                u32 decompHeight = std::min(block_height, height - y);
-                u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;
+                const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
                for (u32 jj = 0; jj < decompHeight; jj++) {
-                    memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
+                    std::memcpy(outRow.data() + jj * width * 4,
+                                uncompData.data() + jj * block_width, decompWidth * 4);
                }
+                ++block_index;
-                blockIdx++;
            }
        }
        depth_offset += height * width * 4;
    }
-    return outData;
 }
 } // namespace Tegra::Texture::ASTC
author	ReinUsesLisp	2020-12-30 02:25:23 -0300
committer	ReinUsesLisp	2020-12-30 03:38:50 -0300
commit	9764c13d6d2977903f407761b27d847c0056e1c4 (patch)
tree	f6f5d6d6379b0404147969e7d1f548ed3d49ca01 /src/video_core/textures/astc.cpp
parent	video_core: Add a delayed destruction ring abstraction (diff)
download	yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.gz yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.xz yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.zip

diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
18	#include <algorithm>	18	#include <algorithm>
19	#include <cassert>	19	#include <cassert>
20	#include <cstring>	20	#include <cstring>
		21	#include <span>
21	#include <vector>	22	#include <vector>
22		23
23	#include <boost/container/static_vector.hpp>	24	#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
600	return params;	601	return params;
601	}	602	}
602		603
603	static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth,	604	static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
604	u32 blockHeight) {	605	u32 blockHeight) {
605	// Don't actually care about the void extent, just read the bits...	606	// Don't actually care about the void extent, just read the bits...
606	for (s32 i = 0; i < 4; ++i) {	607	for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
623	}	624	}
624	}	625	}
625		626
626	static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {	627	static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
627	for (u32 j = 0; j < blockHeight; j++) {	628	for (u32 j = 0; j < blockHeight; j++) {
628	for (u32 i = 0; i < blockWidth; i++) {	629	for (u32 i = 0; i < blockWidth; i++) {
629	outBuf[j * blockWidth + i] = 0xFFFF00FF;	630	outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1438	#undef READ_INT_VALUES	1439	#undef READ_INT_VALUES
1439	}	1440	}
1440		1441
1441	static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight,	1442	static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1442	u32* outBuf) {	1443	const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1443	InputBitStream strm(inBuf);	1444	InputBitStream strm(inBuf.data());
1444	TexelWeightParams weightParams = DecodeBlockInfo(strm);	1445	TexelWeightParams weightParams = DecodeBlockInfo(strm);
1445		1446
1446	// Was there an error?	1447	// Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1601	}	1602	}
1602		1603
1603	// Read the texel weight data..	1604	// Read the texel weight data..
1604	u8 texelWeightData[16];	1605	std::array<u8, 16> texelWeightData;
1605	memcpy(texelWeightData, inBuf, sizeof(texelWeightData));	1606	std::ranges::copy(inBuf, texelWeightData.begin());
1606		1607
1607	// Reverse everything	1608	// Reverse everything
1608	for (u32 i = 0; i < 8; i++) {	1609	for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1618		1619
1619	// Make sure that higher non-texel bits are set to zero	1620	// Make sure that higher non-texel bits are set to zero
1620	const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;	1621	const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1621	texelWeightData[clearByteStart - 1] =	1622	if (clearByteStart > 0) {
1622	texelWeightData[clearByteStart - 1] &	1623	texelWeightData[clearByteStart - 1] &=
1623	static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);	1624	static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1624	memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);	1625	}
		1626	std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1625		1627
1626	IntegerEncodedVector texelWeightValues;	1628	IntegerEncodedVector texelWeightValues;
1627		1629
1628	InputBitStream weightStream(texelWeightData);	1630	InputBitStream weightStream(texelWeightData.data());
1629		1631
1630	DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,	1632	DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1631	weightParams.GetNumWeightValues());	1633	weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1672		1674
1673	namespace Tegra::Texture::ASTC {	1675	namespace Tegra::Texture::ASTC {
1674		1676
1675	std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width,	1677	void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1676	u32 block_height) {	1678	uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1677	u32 blockIdx = 0;	1679	u32 block_index = 0;
1678	std::size_t depth_offset = 0;	1680	std::size_t depth_offset = 0;
1679	std::vector<u8> outData(height * width * depth * 4);	1681	for (u32 z = 0; z < depth; z++) {
1680	for (u32 k = 0; k < depth; k++) {	1682	for (u32 y = 0; y < height; y += block_height) {
1681	for (u32 j = 0; j < height; j += block_height) {	1683	for (u32 x = 0; x < width; x += block_width) {
1682	for (u32 i = 0; i < width; i += block_width) {	1684	const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1683
1684	const u8* blockPtr = data + blockIdx * 16;
1685		1685
1686	// Blocks can be at most 12x12	1686	// Blocks can be at most 12x12
1687	u32 uncompData[144];	1687	std::array<u32, 12 * 12> uncompData;
1688	ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);	1688	ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1689		1689
1690	u32 decompWidth = std::min(block_width, width - i);	1690	u32 decompWidth = std::min(block_width, width - x);
1691	u32 decompHeight = std::min(block_height, height - j);	1691	u32 decompHeight = std::min(block_height, height - y);
1692		1692
1693	u8* outRow = depth_offset + outData.data() + (j * width + i) * 4;	1693	const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1694	for (u32 jj = 0; jj < decompHeight; jj++) {	1694	for (u32 jj = 0; jj < decompHeight; jj++) {
1695	memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);	1695	std::memcpy(outRow.data() + jj * width * 4,
		1696	uncompData.data() + jj * block_width, decompWidth * 4);
1696	}	1697	}
1697		1698	++block_index;
1698	blockIdx++;
1699	}	1699	}
1700	}	1700	}
1701	depth_offset += height * width * 4;	1701	depth_offset += height * width * 4;
1702	}	1702	}
1703
1704	return outData;
1705	}	1703	}
1706		1704
1707	} // namespace Tegra::Texture::ASTC	1705	} // namespace Tegra::Texture::ASTC