6 files changed, 168 insertions, 48 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index bc50a4876..b508d64e9 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
 #include "video_core/textures/astc.h"
-class BitStream {
+class InputBitStream {
 public:
-    explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+    explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
-    ~BitStream() = default;
+    ~InputBitStream() = default;
-    int GetBitsWritten() const {
-        return m_BitsWritten;
-    }
-    void WriteBitsR(unsigned int val, unsigned int nBits) {
-        for (unsigned int i = 0; i < nBits; i++) {
-            WriteBit((val >> (nBits - i - 1)) & 1);
-        }
-    }
-    void WriteBits(unsigned int val, unsigned int nBits) {
-        for (unsigned int i = 0; i < nBits; i++) {
-            WriteBit((val >> i) & 1);
-        }
-    }
    int GetBitsRead() const {
        return m_BitsRead;
@@ -71,6 +55,38 @@ public:
    }
 private:
+    const int m_NumBits;
+    const unsigned char* m_CurByte;
+    int m_NextBit = 0;
+    int m_BitsRead = 0;
+    bool done = false;
+};
+class OutputBitStream {
+public:
+    explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+        : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
+    ~OutputBitStream() = default;
+    int GetBitsWritten() const {
+        return m_BitsWritten;
+    }
+    void WriteBitsR(unsigned int val, unsigned int nBits) {
+        for (unsigned int i = 0; i < nBits; i++) {
+            WriteBit((val >> (nBits - i - 1)) & 1);
+        }
+    }
+    void WriteBits(unsigned int val, unsigned int nBits) {
+        for (unsigned int i = 0; i < nBits; i++) {
+            WriteBit((val >> i) & 1);
+        }
+    }
+private:
    void WriteBit(int b) {
        if (done)
@@ -238,8 +254,8 @@ public:
    // Fills result with the values that are encoded in the given
    // bitstream. We must know beforehand what the maximum possible
    // value is, and how many values we're decoding.
-    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
+    static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
-                                      uint32_t maxRange, uint32_t nValues) {
+                                      InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
        // Determine encoding parameters
        IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
@@ -267,7 +283,7 @@ public:
    }
 private:
-    static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+    static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
                                uint32_t nBitsPerValue) {
        // Implement the algorithm in section C.2.12
        uint32_t m[5];
@@ -327,7 +343,7 @@ private:
        }
    }
-    static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+    static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
                                 uint32_t nBitsPerValue) {
        // Implement the algorithm in section C.2.12
        uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
    }
 };
-static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
+static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
    TexelWeightParams params;
    // Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
    return params;
 }
-static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
+static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
                              uint32_t blockHeight) {
    // Don't actually care about the void extent, just read the bits...
    for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
    // We now have enough to decode our integer sequence.
    std::vector<IntegerEncodedValue> decodedColorValues;
-    BitStream colorStream(data);
+    InputBitStream colorStream(data);
    IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
    // Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
 #undef READ_INT_VALUES
 }
-static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
+static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
                            const uint32_t blockHeight, uint32_t* outBuf) {
-    BitStream strm(inBuf);
+    InputBitStream strm(inBuf);
    TexelWeightParams weightParams = DecodeBlockInfo(strm);
    // Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
    // Define color data.
    uint8_t colorEndpointData[16];
    memset(colorEndpointData, 0, sizeof(colorEndpointData));
-    BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
+    OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
    // Read extra config data...
    uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
    memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
    std::vector<IntegerEncodedValue> texelWeightValues;
-    BitStream weightStream(texelWeightData);
+    InputBitStream weightStream(texelWeightData);
    IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
                                               weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
 namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                uint32_t depth, uint32_t block_width, uint32_t block_height) {
    uint32_t blockIdx = 0;
    std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
        for (uint32_t j = 0; j < height; j += block_height) {
            for (uint32_t i = 0; i < width; i += block_width) {
-                uint8_t* blockPtr = data.data() + blockIdx * 16;
+                const uint8_t* blockPtr = data + blockIdx * 16;
                // Blocks can be at most 12x12
                uint32_t uncompData[144];
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index d419dd025..991cdba72 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
 namespace Tegra::Texture::ASTC {
-std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
                                uint32_t depth, uint32_t block_width, uint32_t block_height);
 } // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
new file mode 100644
index 000000000..5e439f036
--- /dev/null
+++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#include <algorithm>
+#include <cstring>
+#include <tuple>
+#include <vector>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/textures/astc.h"
+#include "video_core/textures/convert.h"
+namespace Tegra::Texture {
+using VideoCore::Surface::PixelFormat;
+template <bool reverse>
+void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+    union S8Z24 {
+        BitField<0, 24, u32> z24;
+        BitField<24, 8, u32> s8;
+    };
+    static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
+    union Z24S8 {
+        BitField<0, 8, u32> s8;
+        BitField<8, 24, u32> z24;
+    };
+    static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
+    S8Z24 s8z24_pixel{};
+    Z24S8 z24s8_pixel{};
+    constexpr auto bpp{
+        VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
+    for (std::size_t y = 0; y < height; ++y) {
+        for (std::size_t x = 0; x < width; ++x) {
+            const std::size_t offset{bpp * (y * width + x)};
+            if constexpr (reverse) {
+                std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
+                s8z24_pixel.s8.Assign(z24s8_pixel.s8);
+                s8z24_pixel.z24.Assign(z24s8_pixel.z24);
+                std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
+            } else {
+                std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
+                z24s8_pixel.s8.Assign(s8z24_pixel.s8);
+                z24s8_pixel.z24.Assign(s8z24_pixel.z24);
+                std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
+            }
+        }
+    }
+}
+static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
+    SwapS8Z24ToZ24S8<false>(data, width, height);
+}
+static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
+    SwapS8Z24ToZ24S8<true>(data, width, height);
+}
+void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+                            bool convert_astc, bool convert_s8z24) {
+    if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+        // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
+        u32 block_width{};
+        u32 block_height{};
+        std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
+        const std::vector<u8> rgba8_data =
+            Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
+        std::copy(rgba8_data.begin(), rgba8_data.end(), data);
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+        Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
+    }
+}
+void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
+                            bool convert_astc, bool convert_s8z24) {
+    if (convert_astc && IsPixelFormatASTC(pixel_format)) {
+        LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
+                     static_cast<u32>(pixel_format));
+        UNREACHABLE();
+    } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
+        Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
+    }
+}
+} // namespace Tegra::Texture
+\ No newline at end of file
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
new file mode 100644
index 000000000..07cd8b5da
--- /dev/null
+++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#pragma once
+#include "common/common_types.h"
+#include "video_core/surface.h"
+namespace Tegra::Texture {
+void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
+                            u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
+} // namespace Tegra::Texture
+\ No newline at end of file
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 5db75de22..cad7340f5 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
                const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
                const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
                const u32 pixel_index{out_x + pixel_base};
-                data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
+                data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
-                data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
+                data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
                std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
            }
            pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
            for (u32 xb = 0; xb < blocks_on_x; xb++) {
                const u32 x_start = xb * block_x_elements;
                const u32 x_end = std::min(width, x_start + block_x_elements);
-                if (fast) {
+                if constexpr (fast) {
                    FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
                                     z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
                                     layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 85b7e9f7b..65df86890 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
    return 512;
 }
-/**
+/// Unswizzles a swizzled texture without changing its format.
- * Unswizzles a swizzled texture without changing its format.
- */
 void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
                      u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                      u32 block_height = TICEntry::DefaultBlockHeight,
                      u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
-/**
- * Unswizzles a swizzled texture without changing its format.
+/// Unswizzles a swizzled texture without changing its format.
- */
 std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
                                 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                                 u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
                      u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
                      bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
-/**
+/// Decodes an unswizzled texture into a A8R8G8B8 texture.
- * Decodes an unswizzled texture into a A8R8G8B8 texture.
- */
 std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
                              u32 height);
-/**
+/// This function calculates the correct size of a texture depending if it's tiled or not.
- * This function calculates the correct size of a texture depending if it's tiled or not.
- */
 std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
                          u32 block_height, u32 block_depth);
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
 void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
                    u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
                    u32 block_height);
 /// Copies a tiled subrectangle into a linear surface.
 void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
                      u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,

diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index bc50a4876..b508d64e9 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp
@@ -23,28 +23,12 @@
23		23
24	#include "video_core/textures/astc.h"	24	#include "video_core/textures/astc.h"
25		25
26	class BitStream {	26	class InputBitStream {
27	public:	27	public:
28	explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)	28	explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0)
29	: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}	29	: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
30		30
31	~BitStream() = default;	31	~InputBitStream() = default;
32
33	int GetBitsWritten() const {
34	return m_BitsWritten;
35	}
36
37	void WriteBitsR(unsigned int val, unsigned int nBits) {
38	for (unsigned int i = 0; i < nBits; i++) {
39	WriteBit((val >> (nBits - i - 1)) & 1);
40	}
41	}
42
43	void WriteBits(unsigned int val, unsigned int nBits) {
44	for (unsigned int i = 0; i < nBits; i++) {
45	WriteBit((val >> i) & 1);
46	}
47	}
48		32
49	int GetBitsRead() const {	33	int GetBitsRead() const {
50	return m_BitsRead;	34	return m_BitsRead;
@@ -71,6 +55,38 @@ public:
71	}	55	}
72		56
73	private:	57	private:
		58	const int m_NumBits;
		59	const unsigned char* m_CurByte;
		60	int m_NextBit = 0;
		61	int m_BitsRead = 0;
		62
		63	bool done = false;
		64	};
		65
		66	class OutputBitStream {
		67	public:
		68	explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
		69	: m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {}
		70
		71	~OutputBitStream() = default;
		72
		73	int GetBitsWritten() const {
		74	return m_BitsWritten;
		75	}
		76
		77	void WriteBitsR(unsigned int val, unsigned int nBits) {
		78	for (unsigned int i = 0; i < nBits; i++) {
		79	WriteBit((val >> (nBits - i - 1)) & 1);
		80	}
		81	}
		82
		83	void WriteBits(unsigned int val, unsigned int nBits) {
		84	for (unsigned int i = 0; i < nBits; i++) {
		85	WriteBit((val >> i) & 1);
		86	}
		87	}
		88
		89	private:
74	void WriteBit(int b) {	90	void WriteBit(int b) {
75		91
76	if (done)	92	if (done)
@@ -238,8 +254,8 @@ public:
238	// Fills result with the values that are encoded in the given	254	// Fills result with the values that are encoded in the given
239	// bitstream. We must know beforehand what the maximum possible	255	// bitstream. We must know beforehand what the maximum possible
240	// value is, and how many values we're decoding.	256	// value is, and how many values we're decoding.
241	static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,	257	static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result,
242	uint32_t maxRange, uint32_t nValues) {	258	InputBitStream& bits, uint32_t maxRange, uint32_t nValues) {
243	// Determine encoding parameters	259	// Determine encoding parameters
244	IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);	260	IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
245		261
@@ -267,7 +283,7 @@ public:
267	}	283	}
268		284
269	private:	285	private:
270	static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,	286	static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
271	uint32_t nBitsPerValue) {	287	uint32_t nBitsPerValue) {
272	// Implement the algorithm in section C.2.12	288	// Implement the algorithm in section C.2.12
273	uint32_t m[5];	289	uint32_t m[5];
@@ -327,7 +343,7 @@ private:
327	}	343	}
328	}	344	}
329		345
330	static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,	346	static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result,
331	uint32_t nBitsPerValue) {	347	uint32_t nBitsPerValue) {
332	// Implement the algorithm in section C.2.12	348	// Implement the algorithm in section C.2.12
333	uint32_t m[3];	349	uint32_t m[3];
@@ -406,7 +422,7 @@ struct TexelWeightParams {
406	}	422	}
407	};	423	};
408		424
409	static TexelWeightParams DecodeBlockInfo(BitStream& strm) {	425	static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
410	TexelWeightParams params;	426	TexelWeightParams params;
411		427
412	// Read the entire block mode all at once	428	// Read the entire block mode all at once
@@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) {
605	return params;	621	return params;
606	}	622	}
607		623
608	static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,	624	static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
609	uint32_t blockHeight) {	625	uint32_t blockHeight) {
610	// Don't actually care about the void extent, just read the bits...	626	// Don't actually care about the void extent, just read the bits...
611	for (int i = 0; i < 4; ++i) {	627	for (int i = 0; i < 4; ++i) {
@@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode
821		837
822	// We now have enough to decode our integer sequence.	838	// We now have enough to decode our integer sequence.
823	std::vector<IntegerEncodedValue> decodedColorValues;	839	std::vector<IntegerEncodedValue> decodedColorValues;
824	BitStream colorStream(data);	840	InputBitStream colorStream(data);
825	IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);	841	IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
826		842
827	// Once we have the decoded values, we need to dequantize them to the 0-255 range	843	// Once we have the decoded values, we need to dequantize them to the 0-255 range
@@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue
1365	#undef READ_INT_VALUES	1381	#undef READ_INT_VALUES
1366	}	1382	}
1367		1383
1368	static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,	1384	static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
1369	const uint32_t blockHeight, uint32_t* outBuf) {	1385	const uint32_t blockHeight, uint32_t* outBuf) {
1370	BitStream strm(inBuf);	1386	InputBitStream strm(inBuf);
1371	TexelWeightParams weightParams = DecodeBlockInfo(strm);	1387	TexelWeightParams weightParams = DecodeBlockInfo(strm);
1372		1388
1373	// Was there an error?	1389	// Was there an error?
@@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1421	// Define color data.	1437	// Define color data.
1422	uint8_t colorEndpointData[16];	1438	uint8_t colorEndpointData[16];
1423	memset(colorEndpointData, 0, sizeof(colorEndpointData));	1439	memset(colorEndpointData, 0, sizeof(colorEndpointData));
1424	BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);	1440	OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
1425		1441
1426	// Read extra config data...	1442	// Read extra config data...
1427	uint32_t baseCEM = 0;	1443	uint32_t baseCEM = 0;
@@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1549	memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);	1565	memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1550		1566
1551	std::vector<IntegerEncodedValue> texelWeightValues;	1567	std::vector<IntegerEncodedValue> texelWeightValues;
1552	BitStream weightStream(texelWeightData);	1568	InputBitStream weightStream(texelWeightData);
1553		1569
1554	IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,	1570	IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
1555	weightParams.m_MaxWeight,	1571	weightParams.m_MaxWeight,
@@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth,
1597		1613
1598	namespace Tegra::Texture::ASTC {	1614	namespace Tegra::Texture::ASTC {
1599		1615
1600	std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,	1616	std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
1601	uint32_t depth, uint32_t block_width, uint32_t block_height) {	1617	uint32_t depth, uint32_t block_width, uint32_t block_height) {
1602	uint32_t blockIdx = 0;	1618	uint32_t blockIdx = 0;
1603	std::vector<uint8_t> outData(height * width * depth * 4);	1619	std::vector<uint8_t> outData(height * width * depth * 4);
@@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint
1605	for (uint32_t j = 0; j < height; j += block_height) {	1621	for (uint32_t j = 0; j < height; j += block_height) {
1606	for (uint32_t i = 0; i < width; i += block_width) {	1622	for (uint32_t i = 0; i < width; i += block_width) {
1607		1623
1608	uint8_t* blockPtr = data.data() + blockIdx * 16;	1624	const uint8_t* blockPtr = data + blockIdx * 16;
1609		1625
1610	// Blocks can be at most 12x12	1626	// Blocks can be at most 12x12
1611	uint32_t uncompData[144];	1627	uint32_t uncompData[144];


diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index d419dd025..991cdba72 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h
@@ -9,7 +9,7 @@
9		9
10	namespace Tegra::Texture::ASTC {	10	namespace Tegra::Texture::ASTC {
11		11
12	std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,	12	std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height,
13	uint32_t depth, uint32_t block_width, uint32_t block_height);	13	uint32_t depth, uint32_t block_width, uint32_t block_height);
14		14
15	} // namespace Tegra::Texture::ASTC	15	} // namespace Tegra::Texture::ASTC


diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp new file mode 100644 index 000000000..5e439f036 --- /dev/null +++ b/src/video_core/textures/convert.cpp
@@ -0,0 +1,92 @@
		1	// Copyright 2019 yuzu Emulator Project
		2	// Licensed under GPLv2 or any later version
		3	// Refer to the license.txt file included.
		4
		5	#include <algorithm>
		6	#include <cstring>
		7	#include <tuple>
		8	#include <vector>
		9
		10	#include "common/assert.h"
		11	#include "common/common_types.h"
		12	#include "common/logging/log.h"
		13	#include "video_core/textures/astc.h"
		14	#include "video_core/textures/convert.h"
		15
		16	namespace Tegra::Texture {
		17
		18	using VideoCore::Surface::PixelFormat;
		19
		20	template <bool reverse>
		21	void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
		22	union S8Z24 {
		23	BitField<0, 24, u32> z24;
		24	BitField<24, 8, u32> s8;
		25	};
		26	static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
		27
		28	union Z24S8 {
		29	BitField<0, 8, u32> s8;
		30	BitField<8, 24, u32> z24;
		31	};
		32	static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
		33
		34	S8Z24 s8z24_pixel{};
		35	Z24S8 z24s8_pixel{};
		36	constexpr auto bpp{
		37	VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)};
		38	for (std::size_t y = 0; y < height; ++y) {
		39	for (std::size_t x = 0; x < width; ++x) {
		40	const std::size_t offset{bpp * (y * width + x)};
		41	if constexpr (reverse) {
		42	std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
		43	s8z24_pixel.s8.Assign(z24s8_pixel.s8);
		44	s8z24_pixel.z24.Assign(z24s8_pixel.z24);
		45	std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
		46	} else {
		47	std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
		48	z24s8_pixel.s8.Assign(s8z24_pixel.s8);
		49	z24s8_pixel.z24.Assign(s8z24_pixel.z24);
		50	std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
		51	}
		52	}
		53	}
		54	}
		55
		56	static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
		57	SwapS8Z24ToZ24S8<false>(data, width, height);
		58	}
		59
		60	static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
		61	SwapS8Z24ToZ24S8<true>(data, width, height);
		62	}
		63
		64	void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
		65	bool convert_astc, bool convert_s8z24) {
		66	if (convert_astc && IsPixelFormatASTC(pixel_format)) {
		67	// Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
		68	u32 block_width{};
		69	u32 block_height{};
		70	std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
		71	const std::vector<u8> rgba8_data =
		72	Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height);
		73	std::copy(rgba8_data.begin(), rgba8_data.end(), data);
		74
		75	} else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
		76	Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height);
		77	}
		78	}
		79
		80	void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
		81	bool convert_astc, bool convert_s8z24) {
		82	if (convert_astc && IsPixelFormatASTC(pixel_format)) {
		83	LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
		84	static_cast<u32>(pixel_format));
		85	UNREACHABLE();
		86
		87	} else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) {
		88	Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
		89	}
		90	}
		91
		92	} // namespace Tegra::Texture \ No newline at end of file


diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h new file mode 100644 index 000000000..07cd8b5da --- /dev/null +++ b/src/video_core/textures/convert.h
@@ -0,0 +1,18 @@
		1	// Copyright 2019 yuzu Emulator Project
		2	// Licensed under GPLv2 or any later version
		3	// Refer to the license.txt file included.
		4
		5	#pragma once
		6
		7	#include "common/common_types.h"
		8	#include "video_core/surface.h"
		9
		10	namespace Tegra::Texture {
		11
		12	void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
		13	u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
		14
		15	void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
		16	u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
		17
		18	} // namespace Tegra::Texture \ No newline at end of file


diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 5db75de22..cad7340f5 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp
@@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const
103	const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};	103	const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]};
104	const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;	104	const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
105	const u32 pixel_index{out_x + pixel_base};	105	const u32 pixel_index{out_x + pixel_base};
106	data_ptrs[unswizzle] = swizzled_data + swizzle_offset;	106	data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
107	data_ptrs[!unswizzle] = unswizzled_data + pixel_index;	107	data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
108	std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);	108	std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align);
109	}	109	}
110	pixel_base += stride_x;	110	pixel_base += stride_x;
@@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool
154	for (u32 xb = 0; xb < blocks_on_x; xb++) {	154	for (u32 xb = 0; xb < blocks_on_x; xb++) {
155	const u32 x_start = xb * block_x_elements;	155	const u32 x_start = xb * block_x_elements;
156	const u32 x_end = std::min(width, x_start + block_x_elements);	156	const u32 x_end = std::min(width, x_start + block_x_elements);
157	if (fast) {	157	if constexpr (fast) {
158	FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,	158	FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
159	z_start, x_end, y_end, z_end, tile_offset, xy_block_size,	159	z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
160	layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);	160	layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);


diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 85b7e9f7b..65df86890 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h
@@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() {
16	return 512;	16	return 512;
17	}	17	}
18		18
19	/**	19	/// Unswizzles a swizzled texture without changing its format.
20	* Unswizzles a swizzled texture without changing its format.
21	*/
22	void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,	20	void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y,
23	u32 bytes_per_pixel, u32 width, u32 height, u32 depth,	21	u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
24	u32 block_height = TICEntry::DefaultBlockHeight,	22	u32 block_height = TICEntry::DefaultBlockHeight,
25	u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);	23	u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
26	/**	24
27	* Unswizzles a swizzled texture without changing its format.	25	/// Unswizzles a swizzled texture without changing its format.
28	*/
29	std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,	26	std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y,
30	u32 bytes_per_pixel, u32 width, u32 height, u32 depth,	27	u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
31	u32 block_height = TICEntry::DefaultBlockHeight,	28	u32 block_height = TICEntry::DefaultBlockHeight,
@@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
37	u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,	34	u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
38	bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);	35	bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
39		36
40	/**	37	/// Decodes an unswizzled texture into a A8R8G8B8 texture.
41	* Decodes an unswizzled texture into a A8R8G8B8 texture.
42	*/
43	std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,	38	std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width,
44	u32 height);	39	u32 height);
45		40
46	/**	41	/// This function calculates the correct size of a texture depending if it's tiled or not.
47	* This function calculates the correct size of a texture depending if it's tiled or not.
48	*/
49	std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,	42	std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
50	u32 block_height, u32 block_depth);	43	u32 block_height, u32 block_depth);
51		44
@@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
53	void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,	46	void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
54	u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,	47	u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,
55	u32 block_height);	48	u32 block_height);
		49
56	/// Copies a tiled subrectangle into a linear surface.	50	/// Copies a tiled subrectangle into a linear surface.
57	void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,	51	void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
58	u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,	52	u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data,