diff options
| author | 2021-01-10 22:09:56 -0700 | |
|---|---|---|
| committer | 2021-01-10 22:09:56 -0700 | |
| commit | 7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch) | |
| tree | 5056f9406dec188439cb0deb87603498243a9412 /src/video_core/textures | |
| parent | More forgetting... duh (diff) | |
| parent | Merge pull request #5229 from Morph1984/fullscreen-opt (diff) | |
| download | yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip | |
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/textures')
| -rw-r--r-- | src/video_core/textures/astc.cpp | 58 | ||||
| -rw-r--r-- | src/video_core/textures/astc.h | 5 | ||||
| -rw-r--r-- | src/video_core/textures/convert.cpp | 93 | ||||
| -rw-r--r-- | src/video_core/textures/convert.h | 22 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 249 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 44 | ||||
| -rw-r--r-- | src/video_core/textures/texture.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/textures/texture.h | 239 |
8 files changed, 274 insertions, 452 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 365bde2f1..acd5bdd78 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <algorithm> | 18 | #include <algorithm> |
| 19 | #include <cassert> | 19 | #include <cassert> |
| 20 | #include <cstring> | 20 | #include <cstring> |
| 21 | #include <span> | ||
| 21 | #include <vector> | 22 | #include <vector> |
| 22 | 23 | ||
| 23 | #include <boost/container/static_vector.hpp> | 24 | #include <boost/container/static_vector.hpp> |
| @@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 600 | return params; | 601 | return params; |
| 601 | } | 602 | } |
| 602 | 603 | ||
| 603 | static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, | 604 | static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth, |
| 604 | u32 blockHeight) { | 605 | u32 blockHeight) { |
| 605 | // Don't actually care about the void extent, just read the bits... | 606 | // Don't actually care about the void extent, just read the bits... |
| 606 | for (s32 i = 0; i < 4; ++i) { | 607 | for (s32 i = 0; i < 4; ++i) { |
| @@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block | |||
| 623 | } | 624 | } |
| 624 | } | 625 | } |
| 625 | 626 | ||
| 626 | static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { | 627 | static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) { |
| 627 | for (u32 j = 0; j < blockHeight; j++) { | 628 | for (u32 j = 0; j < blockHeight; j++) { |
| 628 | for (u32 i = 0; i < blockWidth; i++) { | 629 | for (u32 i = 0; i < blockWidth; i++) { |
| 629 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | 630 | outBuf[j * blockWidth + i] = 0xFFFF00FF; |
| @@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, | |||
| 1438 | #undef READ_INT_VALUES | 1439 | #undef READ_INT_VALUES |
| 1439 | } | 1440 | } |
| 1440 | 1441 | ||
| 1441 | static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, | 1442 | static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, |
| 1442 | u32* outBuf) { | 1443 | const u32 blockHeight, std::span<u32, 12 * 12> outBuf) { |
| 1443 | InputBitStream strm(inBuf); | 1444 | InputBitStream strm(inBuf.data()); |
| 1444 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1445 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1445 | 1446 | ||
| 1446 | // Was there an error? | 1447 | // Was there an error? |
| @@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1601 | } | 1602 | } |
| 1602 | 1603 | ||
| 1603 | // Read the texel weight data.. | 1604 | // Read the texel weight data.. |
| 1604 | u8 texelWeightData[16]; | 1605 | std::array<u8, 16> texelWeightData; |
| 1605 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); | 1606 | std::ranges::copy(inBuf, texelWeightData.begin()); |
| 1606 | 1607 | ||
| 1607 | // Reverse everything | 1608 | // Reverse everything |
| 1608 | for (u32 i = 0; i < 8; i++) { | 1609 | for (u32 i = 0; i < 8; i++) { |
| @@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1618 | 1619 | ||
| 1619 | // Make sure that higher non-texel bits are set to zero | 1620 | // Make sure that higher non-texel bits are set to zero |
| 1620 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1621 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1621 | texelWeightData[clearByteStart - 1] = | 1622 | if (clearByteStart > 0) { |
| 1622 | texelWeightData[clearByteStart - 1] & | 1623 | texelWeightData[clearByteStart - 1] &= |
| 1623 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1624 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1624 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1625 | } |
| 1626 | std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U)); | ||
| 1625 | 1627 | ||
| 1626 | IntegerEncodedVector texelWeightValues; | 1628 | IntegerEncodedVector texelWeightValues; |
| 1627 | 1629 | ||
| 1628 | InputBitStream weightStream(texelWeightData); | 1630 | InputBitStream weightStream(texelWeightData.data()); |
| 1629 | 1631 | ||
| 1630 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, | 1632 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, |
| 1631 | weightParams.GetNumWeightValues()); | 1633 | weightParams.GetNumWeightValues()); |
| @@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1672 | 1674 | ||
| 1673 | namespace Tegra::Texture::ASTC { | 1675 | namespace Tegra::Texture::ASTC { |
| 1674 | 1676 | ||
| 1675 | std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, | 1677 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 1676 | u32 block_height) { | 1678 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { |
| 1677 | u32 blockIdx = 0; | 1679 | u32 block_index = 0; |
| 1678 | std::size_t depth_offset = 0; | 1680 | std::size_t depth_offset = 0; |
| 1679 | std::vector<u8> outData(height * width * depth * 4); | 1681 | for (u32 z = 0; z < depth; z++) { |
| 1680 | for (u32 k = 0; k < depth; k++) { | 1682 | for (u32 y = 0; y < height; y += block_height) { |
| 1681 | for (u32 j = 0; j < height; j += block_height) { | 1683 | for (u32 x = 0; x < width; x += block_width) { |
| 1682 | for (u32 i = 0; i < width; i += block_width) { | 1684 | const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; |
| 1683 | |||
| 1684 | const u8* blockPtr = data + blockIdx * 16; | ||
| 1685 | 1685 | ||
| 1686 | // Blocks can be at most 12x12 | 1686 | // Blocks can be at most 12x12 |
| 1687 | u32 uncompData[144]; | 1687 | std::array<u32, 12 * 12> uncompData; |
| 1688 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1688 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); |
| 1689 | 1689 | ||
| 1690 | u32 decompWidth = std::min(block_width, width - i); | 1690 | u32 decompWidth = std::min(block_width, width - x); |
| 1691 | u32 decompHeight = std::min(block_height, height - j); | 1691 | u32 decompHeight = std::min(block_height, height - y); |
| 1692 | 1692 | ||
| 1693 | u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; | 1693 | const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); |
| 1694 | for (u32 jj = 0; jj < decompHeight; jj++) { | 1694 | for (u32 jj = 0; jj < decompHeight; jj++) { |
| 1695 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); | 1695 | std::memcpy(outRow.data() + jj * width * 4, |
| 1696 | uncompData.data() + jj * block_width, decompWidth * 4); | ||
| 1696 | } | 1697 | } |
| 1697 | 1698 | ++block_index; | |
| 1698 | blockIdx++; | ||
| 1699 | } | 1699 | } |
| 1700 | } | 1700 | } |
| 1701 | depth_offset += height * width * 4; | 1701 | depth_offset += height * width * 4; |
| 1702 | } | 1702 | } |
| 1703 | |||
| 1704 | return outData; | ||
| 1705 | } | 1703 | } |
| 1706 | 1704 | ||
| 1707 | } // namespace Tegra::Texture::ASTC | 1705 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index 991cdba72..9105119bc 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -5,11 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstdint> | 7 | #include <cstdint> |
| 8 | #include <vector> | ||
| 9 | 8 | ||
| 10 | namespace Tegra::Texture::ASTC { | 9 | namespace Tegra::Texture::ASTC { |
| 11 | 10 | ||
| 12 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, | 11 | void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, |
| 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); | 12 | uint32_t block_width, uint32_t block_height, std::span<uint8_t> output); |
| 14 | 13 | ||
| 15 | } // namespace Tegra::Texture::ASTC | 14 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp deleted file mode 100644 index 962921483..000000000 --- a/src/video_core/textures/convert.cpp +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/textures/astc.h" | ||
| 15 | #include "video_core/textures/convert.h" | ||
| 16 | |||
| 17 | namespace Tegra::Texture { | ||
| 18 | |||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | |||
| 21 | template <bool reverse> | ||
| 22 | void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 23 | union S8Z24 { | ||
| 24 | BitField<0, 24, u32> z24; | ||
| 25 | BitField<24, 8, u32> s8; | ||
| 26 | }; | ||
| 27 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 28 | |||
| 29 | union Z24S8 { | ||
| 30 | BitField<0, 8, u32> s8; | ||
| 31 | BitField<8, 24, u32> z24; | ||
| 32 | }; | ||
| 33 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 34 | |||
| 35 | S8Z24 s8z24_pixel{}; | ||
| 36 | Z24S8 z24s8_pixel{}; | ||
| 37 | constexpr auto bpp{ | ||
| 38 | VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)}; | ||
| 39 | for (std::size_t y = 0; y < height; ++y) { | ||
| 40 | for (std::size_t x = 0; x < width; ++x) { | ||
| 41 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 42 | if constexpr (reverse) { | ||
| 43 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 44 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 45 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 46 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 47 | } else { | ||
| 48 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 49 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 50 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 51 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 58 | SwapS8Z24ToZ24S8<false>(data, width, height); | ||
| 59 | } | ||
| 60 | |||
| 61 | static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | ||
| 62 | SwapS8Z24ToZ24S8<true>(data, width, height); | ||
| 63 | } | ||
| 64 | |||
| 65 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, | ||
| 66 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { | ||
| 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 69 | u32 block_width{}; | ||
| 70 | u32 block_height{}; | ||
| 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 72 | const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( | ||
| 73 | in_data, width, height, depth, block_width, block_height); | ||
| 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); | ||
| 75 | |||
| 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 82 | bool convert_astc, bool convert_s8z24) { | ||
| 83 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 84 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 85 | static_cast<u32>(pixel_format)); | ||
| 86 | UNREACHABLE(); | ||
| 87 | |||
| 88 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) { | ||
| 89 | Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h deleted file mode 100644 index d5d6c77bb..000000000 --- a/src/video_core/textures/convert.h +++ /dev/null | |||
| @@ -1,22 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCore::Surface { | ||
| 10 | enum class PixelFormat; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Tegra::Texture { | ||
| 14 | |||
| 15 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, | ||
| 16 | u32 width, u32 height, u32 depth, bool convert_astc, | ||
| 17 | bool convert_s8z24); | ||
| 18 | |||
| 19 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 20 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 21 | |||
| 22 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 16d46a018..9f5181318 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -2,204 +2,111 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 5 | #include <cmath> | 6 | #include <cmath> |
| 6 | #include <cstring> | 7 | #include <cstring> |
| 8 | #include <span> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 7 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 9 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 14 | #include "common/div_ceil.h" | ||
| 10 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 11 | #include "video_core/textures/decoders.h" | 16 | #include "video_core/textures/decoders.h" |
| 12 | #include "video_core/textures/texture.h" | 17 | #include "video_core/textures/texture.h" |
| 13 | 18 | ||
| 14 | namespace Tegra::Texture { | 19 | namespace Tegra::Texture { |
| 15 | namespace { | ||
| 16 | 20 | ||
| 21 | namespace { | ||
| 17 | /** | 22 | /** |
| 18 | * This table represents the internal swizzle of a gob, | 23 | * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing. |
| 19 | * in format 16 bytes x 2 sector packing. | ||
| 20 | * Calculates the offset of an (x, y) position within a swizzled texture. | 24 | * Calculates the offset of an (x, y) position within a swizzled texture. |
| 21 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 | 25 | * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 |
| 22 | */ | 26 | */ |
| 23 | template <std::size_t N, std::size_t M, u32 Align> | 27 | constexpr SwizzleTable MakeSwizzleTableConst() { |
| 24 | struct alignas(64) SwizzleTable { | 28 | SwizzleTable table{}; |
| 25 | static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); | 29 | for (u32 y = 0; y < table.size(); ++y) { |
| 26 | constexpr SwizzleTable() { | 30 | for (u32 x = 0; x < table[0].size(); ++x) { |
| 27 | for (u32 y = 0; y < N; ++y) { | 31 | table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 + |
| 28 | for (u32 x = 0; x < M; ++x) { | 32 | (y % 2) * 16 + (x % 16); |
| 29 | const u32 x2 = x * Align; | ||
| 30 | values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + | ||
| 31 | ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16)); | ||
| 32 | } | ||
| 33 | } | 33 | } |
| 34 | } | 34 | } |
| 35 | const std::array<u16, M>& operator[](std::size_t index) const { | 35 | return table; |
| 36 | return values[index]; | 36 | } |
| 37 | } | ||
| 38 | std::array<std::array<u16, M>, N> values{}; | ||
| 39 | }; | ||
| 40 | 37 | ||
| 41 | constexpr u32 FAST_SWIZZLE_ALIGN = 16; | 38 | constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst(); |
| 42 | 39 | ||
| 43 | constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); | 40 | template <bool TO_LINEAR> |
| 44 | constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); | 41 | void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 42 | u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { | ||
| 43 | // The origin of the transformation can be configured here, leave it as zero as the current API | ||
| 44 | // doesn't expose it. | ||
| 45 | static constexpr u32 origin_x = 0; | ||
| 46 | static constexpr u32 origin_y = 0; | ||
| 47 | static constexpr u32 origin_z = 0; | ||
| 45 | 48 | ||
| 46 | /** | 49 | // We can configure here a custom pitch |
| 47 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 50 | // As it's not exposed 'width * bpp' will be the expected pitch. |
| 48 | * Instead of going gob by gob, we map the coordinates inside a block and manage from | 51 | const u32 pitch = width * bytes_per_pixel; |
| 49 | * those. Block_Width is assumed to be 1. | 52 | const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel; |
| 50 | */ | ||
| 51 | void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | ||
| 52 | const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, | ||
| 53 | const u32 y_end, const u32 z_end, const u32 tile_offset, | ||
| 54 | const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||
| 55 | const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||
| 56 | std::array<u8*, 2> data_ptrs; | ||
| 57 | u32 z_address = tile_offset; | ||
| 58 | |||
| 59 | for (u32 z = z_start; z < z_end; z++) { | ||
| 60 | u32 y_address = z_address; | ||
| 61 | u32 pixel_base = layer_z * z + y_start * stride_x; | ||
| 62 | for (u32 y = y_start; y < y_end; y++) { | ||
| 63 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | ||
| 64 | for (u32 x = x_start; x < x_end; x++) { | ||
| 65 | const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]}; | ||
| 66 | const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; | ||
| 67 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||
| 68 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | ||
| 69 | std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel); | ||
| 70 | } | ||
| 71 | pixel_base += stride_x; | ||
| 72 | if ((y + 1) % GOB_SIZE_Y == 0) | ||
| 73 | y_address += GOB_SIZE; | ||
| 74 | } | ||
| 75 | z_address += xy_block_size; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | 53 | ||
| 79 | /** | 54 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); |
| 80 | * This function manages ALL the GOBs(Group of Bytes) Inside a single block. | 55 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); |
| 81 | * Instead of going gob by gob, we map the coordinates inside a block and manage from | 56 | const u32 slice_size = |
| 82 | * those. Block_Width is assumed to be 1. | 57 | Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size; |
| 83 | */ | ||
| 84 | void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | ||
| 85 | const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end, | ||
| 86 | const u32 y_end, const u32 z_end, const u32 tile_offset, | ||
| 87 | const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||
| 88 | const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||
| 89 | std::array<u8*, 2> data_ptrs; | ||
| 90 | u32 z_address = tile_offset; | ||
| 91 | const u32 x_startb = x_start * bytes_per_pixel; | ||
| 92 | const u32 x_endb = x_end * bytes_per_pixel; | ||
| 93 | |||
| 94 | for (u32 z = z_start; z < z_end; z++) { | ||
| 95 | u32 y_address = z_address; | ||
| 96 | u32 pixel_base = layer_z * z + y_start * stride_x; | ||
| 97 | for (u32 y = y_start; y < y_end; y++) { | ||
| 98 | const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | ||
| 99 | for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) { | ||
| 100 | const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]}; | ||
| 101 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | ||
| 102 | const u32 pixel_index{out_x + pixel_base}; | ||
| 103 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; | ||
| 104 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; | ||
| 105 | std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN); | ||
| 106 | } | ||
| 107 | pixel_base += stride_x; | ||
| 108 | if ((y + 1) % GOB_SIZE_Y == 0) | ||
| 109 | y_address += GOB_SIZE; | ||
| 110 | } | ||
| 111 | z_address += xy_block_size; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | 58 | ||
| 115 | /** | 59 | const u32 block_height_mask = (1U << block_height) - 1; |
| 116 | * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. | 60 | const u32 block_depth_mask = (1U << block_depth) - 1; |
| 117 | * The body of this function takes care of splitting the swizzled texture into blocks, | 61 | const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth; |
| 118 | * and managing the extents of it. Once all the parameters of a single block are obtained, | 62 | |
| 119 | * the function calls 'ProcessBlock' to process that particular Block. | 63 | for (u32 slice = 0; slice < depth; ++slice) { |
| 120 | * | 64 | const u32 z = slice + origin_z; |
| 121 | * Documentation for the memory layout and decoding can be found at: | 65 | const u32 offset_z = (z >> block_depth) * slice_size + |
| 122 | * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces | 66 | ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height)); |
| 123 | */ | 67 | for (u32 line = 0; line < height; ++line) { |
| 124 | template <bool fast> | 68 | const u32 y = line + origin_y; |
| 125 | void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, | 69 | const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 126 | const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, | 70 | |
| 127 | const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, | 71 | const u32 block_y = y >> GOB_SIZE_Y_SHIFT; |
| 128 | const u32 width_spacing) { | 72 | const u32 offset_y = (block_y >> block_height) * block_size + |
| 129 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | 73 | ((block_y & block_height_mask) << GOB_SIZE_SHIFT); |
| 130 | const u32 stride_x = width * out_bytes_per_pixel; | 74 | |
| 131 | const u32 layer_z = height * stride_x; | 75 | for (u32 column = 0; column < width; ++column) { |
| 132 | const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; | 76 | const u32 x = (column + origin_x) * bytes_per_pixel; |
| 133 | constexpr u32 gob_elements_y = GOB_SIZE_Y; | 77 | const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift; |
| 134 | constexpr u32 gob_elements_z = GOB_SIZE_Z; | 78 | |
| 135 | const u32 block_x_elements = gob_elements_x; | 79 | const u32 base_swizzled_offset = offset_z + offset_y + offset_x; |
| 136 | const u32 block_y_elements = gob_elements_y * block_height; | 80 | const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X]; |
| 137 | const u32 block_z_elements = gob_elements_z * block_depth; | 81 | |
| 138 | const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); | 82 | const u32 unswizzled_offset = |
| 139 | const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); | 83 | slice * pitch * height + line * pitch + column * bytes_per_pixel; |
| 140 | const u32 blocks_on_y = div_ceil(height, block_y_elements); | 84 | |
| 141 | const u32 blocks_on_z = div_ceil(depth, block_z_elements); | 85 | u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; |
| 142 | const u32 xy_block_size = GOB_SIZE * block_height; | 86 | const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; |
| 143 | const u32 block_size = xy_block_size * block_depth; | 87 | std::memcpy(dst, src, bytes_per_pixel); |
| 144 | u32 tile_offset = 0; | ||
| 145 | for (u32 zb = 0; zb < blocks_on_z; zb++) { | ||
| 146 | const u32 z_start = zb * block_z_elements; | ||
| 147 | const u32 z_end = std::min(depth, z_start + block_z_elements); | ||
| 148 | for (u32 yb = 0; yb < blocks_on_y; yb++) { | ||
| 149 | const u32 y_start = yb * block_y_elements; | ||
| 150 | const u32 y_end = std::min(height, y_start + block_y_elements); | ||
| 151 | for (u32 xb = 0; xb < blocks_on_x; xb++) { | ||
| 152 | const u32 x_start = xb * block_x_elements; | ||
| 153 | const u32 x_end = std::min(width, x_start + block_x_elements); | ||
| 154 | if constexpr (fast) { | ||
| 155 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | ||
| 156 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | ||
| 157 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | ||
| 158 | } else { | ||
| 159 | PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | ||
| 160 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | ||
| 161 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | ||
| 162 | } | ||
| 163 | tile_offset += block_size; | ||
| 164 | } | 88 | } |
| 165 | } | 89 | } |
| 166 | } | 90 | } |
| 167 | } | 91 | } |
| 168 | |||
| 169 | } // Anonymous namespace | 92 | } // Anonymous namespace |
| 170 | 93 | ||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 94 | SwizzleTable MakeSwizzleTable() { |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 95 | return SWIZZLE_TABLE; |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | ||
| 174 | const u32 block_height_size{1U << block_height}; | ||
| 175 | const u32 block_depth_size{1U << block_depth}; | ||
| 176 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) { | ||
| 177 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||
| 178 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | ||
| 179 | block_depth_size, width_spacing); | ||
| 180 | } else { | ||
| 181 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||
| 182 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, | ||
| 183 | block_depth_size, width_spacing); | ||
| 184 | } | ||
| 185 | } | 96 | } |
| 186 | 97 | ||
| 187 | void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 98 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 188 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, | 99 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 189 | u32 block_depth, u32 width_spacing) { | 100 | u32 stride_alignment) { |
| 190 | CopySwizzledData((width + tile_size_x - 1) / tile_size_x, | 101 | Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, |
| 191 | (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, | 102 | stride_alignment); |
| 192 | bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth, | ||
| 193 | width_spacing); | ||
| 194 | } | 103 | } |
| 195 | 104 | ||
| 196 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | 105 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 197 | u32 width, u32 height, u32 depth, u32 block_height, | 106 | u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 198 | u32 block_depth, u32 width_spacing) { | 107 | u32 stride_alignment) { |
| 199 | std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); | 108 | Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, |
| 200 | UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, | 109 | stride_alignment); |
| 201 | width, height, depth, block_height, block_depth, width_spacing); | ||
| 202 | return unswizzled_data; | ||
| 203 | } | 110 | } |
| 204 | 111 | ||
| 205 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 112 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| @@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 213 | const u32 gob_address_y = | 120 | const u32 gob_address_y = |
| 214 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | 121 | (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 215 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | 122 | ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 216 | const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; | 123 | const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; |
| 217 | for (u32 x = 0; x < subrect_width; ++x) { | 124 | for (u32 x = 0; x < subrect_width; ++x) { |
| 218 | const u32 dst_x = x + offset_x; | 125 | const u32 dst_x = x + offset_x; |
| 219 | const u32 gob_address = | 126 | const u32 gob_address = |
| @@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, | |||
| 235 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); | 142 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); |
| 236 | 143 | ||
| 237 | const u32 block_height_mask = (1U << block_height) - 1; | 144 | const u32 block_height_mask = (1U << block_height) - 1; |
| 238 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; | 145 | const u32 x_shift = GOB_SIZE_SHIFT + block_height; |
| 239 | 146 | ||
| 240 | for (u32 line = 0; line < line_count; ++line) { | 147 | for (u32 line = 0; line < line_count; ++line) { |
| 241 | const u32 src_y = line + origin_y; | 148 | const u32 src_y = line + origin_y; |
| 242 | const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; | 149 | const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; |
| 243 | 150 | ||
| 244 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; | 151 | const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; |
| 245 | const u32 src_offset_y = (block_y >> block_height) * block_size + | 152 | const u32 src_offset_y = (block_y >> block_height) * block_size + |
| @@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt | |||
| 270 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; | 177 | const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; |
| 271 | 178 | ||
| 272 | for (u32 line = 0; line < line_count; ++line) { | 179 | for (u32 line = 0; line < line_count; ++line) { |
| 273 | const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; | 180 | const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y]; |
| 274 | const u32 block_y = line / GOB_SIZE_Y; | 181 | const u32 block_y = line / GOB_SIZE_Y; |
| 275 | const u32 dst_offset_y = | 182 | const u32 dst_offset_y = |
| 276 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; | 183 | (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; |
| @@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 | |||
| 293 | const std::size_t gob_address_y = | 200 | const std::size_t gob_address_y = |
| 294 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + | 201 | (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + |
| 295 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; | 202 | ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; |
| 296 | const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; | 203 | const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y]; |
| 297 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | 204 | for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { |
| 298 | const std::size_t gob_address = | 205 | const std::size_t gob_address = |
| 299 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; | 206 | gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 01e156bc8..d7cdc81e8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -4,7 +4,8 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | 7 | #include <span> |
| 8 | |||
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 9 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 10 | 11 | ||
| @@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8; | |||
| 15 | constexpr u32 GOB_SIZE_Z = 1; | 16 | constexpr u32 GOB_SIZE_Z = 1; |
| 16 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; | 17 | constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; |
| 17 | 18 | ||
| 18 | constexpr std::size_t GOB_SIZE_X_SHIFT = 6; | 19 | constexpr u32 GOB_SIZE_X_SHIFT = 6; |
| 19 | constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; | 20 | constexpr u32 GOB_SIZE_Y_SHIFT = 3; |
| 20 | constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; | 21 | constexpr u32 GOB_SIZE_Z_SHIFT = 0; |
| 21 | constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; | 22 | constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; |
| 22 | 23 | ||
| 23 | /// Unswizzles a swizzled texture without changing its format. | 24 | using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>; |
| 24 | void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, | 25 | |
| 25 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 26 | /// Returns a z-order swizzle table |
| 26 | u32 block_height = TICEntry::DefaultBlockHeight, | 27 | SwizzleTable MakeSwizzleTable(); |
| 27 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 28 | |
| 28 | 29 | /// Unswizzles a block linear texture into linear memory. | |
| 29 | /// Unswizzles a swizzled texture without changing its format. | 30 | void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, |
| 30 | std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, | 31 | u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, |
| 31 | u32 width, u32 height, u32 depth, | 32 | u32 stride_alignment = 1); |
| 32 | u32 block_height = TICEntry::DefaultBlockHeight, | 33 | |
| 33 | u32 block_depth = TICEntry::DefaultBlockHeight, | 34 | /// Swizzles linear memory into a block linear texture. |
| 34 | u32 width_spacing = 0); | 35 | void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, |
| 35 | 36 | u32 height, u32 depth, u32 block_height, u32 block_depth, | |
| 36 | /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. | 37 | u32 stride_alignment = 1); |
| 37 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | ||
| 38 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | ||
| 39 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); | ||
| 40 | 38 | ||
| 41 | /// This function calculates the correct size of a texture depending if it's tiled or not. | 39 | /// This function calculates the correct size of a texture depending if it's tiled or not. |
| 42 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 40 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp index 4171e3ef2..ae5621a7d 100644 --- a/src/video_core/textures/texture.cpp +++ b/src/video_core/textures/texture.cpp | |||
| @@ -5,9 +5,13 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include "common/cityhash.h" | ||
| 8 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 9 | #include "video_core/textures/texture.h" | 10 | #include "video_core/textures/texture.h" |
| 10 | 11 | ||
| 12 | using Tegra::Texture::TICEntry; | ||
| 13 | using Tegra::Texture::TSCEntry; | ||
| 14 | |||
| 11 | namespace Tegra::Texture { | 15 | namespace Tegra::Texture { |
| 12 | 16 | ||
| 13 | namespace { | 17 | namespace { |
| @@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept { | |||
| 65 | 69 | ||
| 66 | } // Anonymous namespace | 70 | } // Anonymous namespace |
| 67 | 71 | ||
| 68 | std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | 72 | std::array<float, 4> TSCEntry::BorderColor() const noexcept { |
| 69 | if (!srgb_conversion) { | 73 | if (!srgb_conversion) { |
| 70 | return border_color; | 74 | return border_color; |
| 71 | } | 75 | } |
| @@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | |||
| 73 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; | 77 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; |
| 74 | } | 78 | } |
| 75 | 79 | ||
| 76 | float TSCEntry::GetMaxAnisotropy() const noexcept { | 80 | float TSCEntry::MaxAnisotropy() const noexcept { |
| 77 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); | 81 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); |
| 78 | } | 82 | } |
| 79 | 83 | ||
| 80 | } // namespace Tegra::Texture | 84 | } // namespace Tegra::Texture |
| 85 | |||
| 86 | size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept { | ||
| 87 | return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic); | ||
| 88 | } | ||
| 89 | |||
| 90 | size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept { | ||
| 91 | return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc); | ||
| 92 | } | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 0574fef12..c1d14335e 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -53,27 +53,27 @@ enum class TextureFormat : u32 { | |||
| 53 | BC4 = 0x27, | 53 | BC4 = 0x27, |
| 54 | BC5 = 0x28, | 54 | BC5 = 0x28, |
| 55 | S8D24 = 0x29, | 55 | S8D24 = 0x29, |
| 56 | X8Z24 = 0x2a, | 56 | X8D24 = 0x2a, |
| 57 | D24S8 = 0x2b, | 57 | D24S8 = 0x2b, |
| 58 | X4V4Z24__COV4R4V = 0x2c, | 58 | X4V4D24__COV4R4V = 0x2c, |
| 59 | X4V4Z24__COV8R8V = 0x2d, | 59 | X4V4D24__COV8R8V = 0x2d, |
| 60 | V8Z24__COV4R12V = 0x2e, | 60 | V8D24__COV4R12V = 0x2e, |
| 61 | D32 = 0x2f, | 61 | D32 = 0x2f, |
| 62 | D32S8 = 0x30, | 62 | D32S8 = 0x30, |
| 63 | X8Z24_X20V4S8__COV4R4V = 0x31, | 63 | X8D24_X20V4S8__COV4R4V = 0x31, |
| 64 | X8Z24_X20V4S8__COV8R8V = 0x32, | 64 | X8D24_X20V4S8__COV8R8V = 0x32, |
| 65 | ZF32_X20V4X8__COV4R4V = 0x33, | 65 | D32_X20V4X8__COV4R4V = 0x33, |
| 66 | ZF32_X20V4X8__COV8R8V = 0x34, | 66 | D32_X20V4X8__COV8R8V = 0x34, |
| 67 | ZF32_X20V4S8__COV4R4V = 0x35, | 67 | D32_X20V4S8__COV4R4V = 0x35, |
| 68 | ZF32_X20V4S8__COV8R8V = 0x36, | 68 | D32_X20V4S8__COV8R8V = 0x36, |
| 69 | X8Z24_X16V8S8__COV4R12V = 0x37, | 69 | X8D24_X16V8S8__COV4R12V = 0x37, |
| 70 | ZF32_X16V8X8__COV4R12V = 0x38, | 70 | D32_X16V8X8__COV4R12V = 0x38, |
| 71 | ZF32_X16V8S8__COV4R12V = 0x39, | 71 | D32_X16V8S8__COV4R12V = 0x39, |
| 72 | D16 = 0x3a, | 72 | D16 = 0x3a, |
| 73 | V8Z24__COV8R24V = 0x3b, | 73 | V8D24__COV8R24V = 0x3b, |
| 74 | X8Z24_X16V8S8__COV8R24V = 0x3c, | 74 | X8D24_X16V8S8__COV8R24V = 0x3c, |
| 75 | ZF32_X16V8X8__COV8R24V = 0x3d, | 75 | D32_X16V8X8__COV8R24V = 0x3d, |
| 76 | ZF32_X16V8S8__COV8R24V = 0x3e, | 76 | D32_X16V8S8__COV8R24V = 0x3e, |
| 77 | ASTC_2D_4X4 = 0x40, | 77 | ASTC_2D_4X4 = 0x40, |
| 78 | ASTC_2D_5X5 = 0x41, | 78 | ASTC_2D_5X5 = 0x41, |
| 79 | ASTC_2D_6X6 = 0x42, | 79 | ASTC_2D_6X6 = 0x42, |
| @@ -146,7 +146,7 @@ enum class MsaaMode : u32 { | |||
| 146 | }; | 146 | }; |
| 147 | 147 | ||
| 148 | union TextureHandle { | 148 | union TextureHandle { |
| 149 | TextureHandle(u32 raw) : raw{raw} {} | 149 | /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {} |
| 150 | 150 | ||
| 151 | u32 raw; | 151 | u32 raw; |
| 152 | BitField<0, 20, u32> tic_id; | 152 | BitField<0, 20, u32> tic_id; |
| @@ -155,124 +155,124 @@ union TextureHandle { | |||
| 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); |
| 156 | 156 | ||
| 157 | struct TICEntry { | 157 | struct TICEntry { |
| 158 | static constexpr u32 DefaultBlockHeight = 16; | ||
| 159 | static constexpr u32 DefaultBlockDepth = 1; | ||
| 160 | |||
| 161 | union { | ||
| 162 | u32 raw; | ||
| 163 | BitField<0, 7, TextureFormat> format; | ||
| 164 | BitField<7, 3, ComponentType> r_type; | ||
| 165 | BitField<10, 3, ComponentType> g_type; | ||
| 166 | BitField<13, 3, ComponentType> b_type; | ||
| 167 | BitField<16, 3, ComponentType> a_type; | ||
| 168 | |||
| 169 | BitField<19, 3, SwizzleSource> x_source; | ||
| 170 | BitField<22, 3, SwizzleSource> y_source; | ||
| 171 | BitField<25, 3, SwizzleSource> z_source; | ||
| 172 | BitField<28, 3, SwizzleSource> w_source; | ||
| 173 | }; | ||
| 174 | u32 address_low; | ||
| 175 | union { | 158 | union { |
| 176 | BitField<0, 16, u32> address_high; | 159 | struct { |
| 177 | BitField<21, 3, TICHeaderVersion> header_version; | 160 | union { |
| 178 | }; | 161 | BitField<0, 7, TextureFormat> format; |
| 179 | union { | 162 | BitField<7, 3, ComponentType> r_type; |
| 180 | BitField<0, 3, u32> block_width; | 163 | BitField<10, 3, ComponentType> g_type; |
| 181 | BitField<3, 3, u32> block_height; | 164 | BitField<13, 3, ComponentType> b_type; |
| 182 | BitField<6, 3, u32> block_depth; | 165 | BitField<16, 3, ComponentType> a_type; |
| 166 | |||
| 167 | BitField<19, 3, SwizzleSource> x_source; | ||
| 168 | BitField<22, 3, SwizzleSource> y_source; | ||
| 169 | BitField<25, 3, SwizzleSource> z_source; | ||
| 170 | BitField<28, 3, SwizzleSource> w_source; | ||
| 171 | }; | ||
| 172 | u32 address_low; | ||
| 173 | union { | ||
| 174 | BitField<0, 16, u32> address_high; | ||
| 175 | BitField<16, 5, u32> layer_base_3_7; | ||
| 176 | BitField<21, 3, TICHeaderVersion> header_version; | ||
| 177 | BitField<24, 1, u32> load_store_hint; | ||
| 178 | BitField<25, 4, u32> view_coherency_hash; | ||
| 179 | BitField<29, 3, u32> layer_base_8_10; | ||
| 180 | }; | ||
| 181 | union { | ||
| 182 | BitField<0, 3, u32> block_width; | ||
| 183 | BitField<3, 3, u32> block_height; | ||
| 184 | BitField<6, 3, u32> block_depth; | ||
| 183 | 185 | ||
| 184 | BitField<10, 3, u32> tile_width_spacing; | 186 | BitField<10, 3, u32> tile_width_spacing; |
| 185 | 187 | ||
| 186 | // High 16 bits of the pitch value | 188 | // High 16 bits of the pitch value |
| 187 | BitField<0, 16, u32> pitch_high; | 189 | BitField<0, 16, u32> pitch_high; |
| 188 | BitField<26, 1, u32> use_header_opt_control; | 190 | BitField<26, 1, u32> use_header_opt_control; |
| 189 | BitField<27, 1, u32> depth_texture; | 191 | BitField<27, 1, u32> depth_texture; |
| 190 | BitField<28, 4, u32> max_mip_level; | 192 | BitField<28, 4, u32> max_mip_level; |
| 191 | 193 | ||
| 192 | BitField<0, 16, u32> buffer_high_width_minus_one; | 194 | BitField<0, 16, u32> buffer_high_width_minus_one; |
| 193 | }; | 195 | }; |
| 194 | union { | 196 | union { |
| 195 | BitField<0, 16, u32> width_minus_1; | 197 | BitField<0, 16, u32> width_minus_one; |
| 196 | BitField<22, 1, u32> srgb_conversion; | 198 | BitField<16, 3, u32> layer_base_0_2; |
| 197 | BitField<23, 4, TextureType> texture_type; | 199 | BitField<22, 1, u32> srgb_conversion; |
| 198 | BitField<29, 3, u32> border_size; | 200 | BitField<23, 4, TextureType> texture_type; |
| 201 | BitField<29, 3, u32> border_size; | ||
| 199 | 202 | ||
| 200 | BitField<0, 16, u32> buffer_low_width_minus_one; | 203 | BitField<0, 16, u32> buffer_low_width_minus_one; |
| 201 | }; | 204 | }; |
| 202 | union { | 205 | union { |
| 203 | BitField<0, 16, u32> height_minus_1; | 206 | BitField<0, 16, u32> height_minus_1; |
| 204 | BitField<16, 14, u32> depth_minus_1; | 207 | BitField<16, 14, u32> depth_minus_1; |
| 205 | }; | 208 | BitField<30, 1, u32> is_sparse; |
| 206 | union { | 209 | BitField<31, 1, u32> normalized_coords; |
| 207 | BitField<6, 13, u32> mip_lod_bias; | 210 | }; |
| 208 | BitField<27, 3, u32> max_anisotropy; | 211 | union { |
| 212 | BitField<6, 13, u32> mip_lod_bias; | ||
| 213 | BitField<27, 3, u32> max_anisotropy; | ||
| 214 | }; | ||
| 215 | union { | ||
| 216 | BitField<0, 4, u32> res_min_mip_level; | ||
| 217 | BitField<4, 4, u32> res_max_mip_level; | ||
| 218 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 219 | BitField<12, 12, u32> min_lod_clamp; | ||
| 220 | }; | ||
| 221 | }; | ||
| 222 | std::array<u64, 4> raw; | ||
| 209 | }; | 223 | }; |
| 210 | 224 | ||
| 211 | union { | 225 | constexpr bool operator==(const TICEntry& rhs) const noexcept { |
| 212 | BitField<0, 4, u32> res_min_mip_level; | 226 | return raw == rhs.raw; |
| 213 | BitField<4, 4, u32> res_max_mip_level; | 227 | } |
| 214 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 215 | BitField<12, 12, u32> min_lod_clamp; | ||
| 216 | }; | ||
| 217 | 228 | ||
| 218 | GPUVAddr Address() const { | 229 | constexpr bool operator!=(const TICEntry& rhs) const noexcept { |
| 230 | return raw != rhs.raw; | ||
| 231 | } | ||
| 232 | |||
| 233 | constexpr GPUVAddr Address() const { | ||
| 219 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); | 234 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); |
| 220 | } | 235 | } |
| 221 | 236 | ||
| 222 | u32 Pitch() const { | 237 | constexpr u32 Pitch() const { |
| 223 | ASSERT(header_version == TICHeaderVersion::Pitch || | 238 | ASSERT(header_version == TICHeaderVersion::Pitch || |
| 224 | header_version == TICHeaderVersion::PitchColorKey); | 239 | header_version == TICHeaderVersion::PitchColorKey); |
| 225 | // The pitch value is 21 bits, and is 32B aligned. | 240 | // The pitch value is 21 bits, and is 32B aligned. |
| 226 | return pitch_high << 5; | 241 | return pitch_high << 5; |
| 227 | } | 242 | } |
| 228 | 243 | ||
| 229 | u32 Width() const { | 244 | constexpr u32 Width() const { |
| 230 | if (header_version != TICHeaderVersion::OneDBuffer) { | 245 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 231 | return width_minus_1 + 1; | 246 | return width_minus_one + 1; |
| 232 | } | 247 | } |
| 233 | return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; | 248 | return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1; |
| 234 | } | 249 | } |
| 235 | 250 | ||
| 236 | u32 Height() const { | 251 | constexpr u32 Height() const { |
| 237 | return height_minus_1 + 1; | 252 | return height_minus_1 + 1; |
| 238 | } | 253 | } |
| 239 | 254 | ||
| 240 | u32 Depth() const { | 255 | constexpr u32 Depth() const { |
| 241 | return depth_minus_1 + 1; | 256 | return depth_minus_1 + 1; |
| 242 | } | 257 | } |
| 243 | 258 | ||
| 244 | u32 BlockWidth() const { | 259 | constexpr u32 BaseLayer() const { |
| 245 | ASSERT(IsTiled()); | 260 | return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8; |
| 246 | return block_width; | ||
| 247 | } | ||
| 248 | |||
| 249 | u32 BlockHeight() const { | ||
| 250 | ASSERT(IsTiled()); | ||
| 251 | return block_height; | ||
| 252 | } | ||
| 253 | |||
| 254 | u32 BlockDepth() const { | ||
| 255 | ASSERT(IsTiled()); | ||
| 256 | return block_depth; | ||
| 257 | } | 261 | } |
| 258 | 262 | ||
| 259 | bool IsTiled() const { | 263 | constexpr bool IsBlockLinear() const { |
| 260 | return header_version == TICHeaderVersion::BlockLinear || | 264 | return header_version == TICHeaderVersion::BlockLinear || |
| 261 | header_version == TICHeaderVersion::BlockLinearColorKey; | 265 | header_version == TICHeaderVersion::BlockLinearColorKey; |
| 262 | } | 266 | } |
| 263 | 267 | ||
| 264 | bool IsLineal() const { | 268 | constexpr bool IsPitchLinear() const { |
| 265 | return header_version == TICHeaderVersion::Pitch || | 269 | return header_version == TICHeaderVersion::Pitch || |
| 266 | header_version == TICHeaderVersion::PitchColorKey; | 270 | header_version == TICHeaderVersion::PitchColorKey; |
| 267 | } | 271 | } |
| 268 | 272 | ||
| 269 | bool IsBuffer() const { | 273 | constexpr bool IsBuffer() const { |
| 270 | return header_version == TICHeaderVersion::OneDBuffer; | 274 | return header_version == TICHeaderVersion::OneDBuffer; |
| 271 | } | 275 | } |
| 272 | |||
| 273 | bool IsSrgbConversionEnabled() const { | ||
| 274 | return srgb_conversion != 0; | ||
| 275 | } | ||
| 276 | }; | 276 | }; |
| 277 | static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); | 277 | static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); |
| 278 | 278 | ||
| @@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 { | |||
| 309 | Linear = 3, | 309 | Linear = 3, |
| 310 | }; | 310 | }; |
| 311 | 311 | ||
| 312 | enum class SamplerReduction : u32 { | ||
| 313 | WeightedAverage = 0, | ||
| 314 | Min = 1, | ||
| 315 | Max = 2, | ||
| 316 | }; | ||
| 317 | |||
| 312 | enum class Anisotropy { | 318 | enum class Anisotropy { |
| 313 | Default, | 319 | Default, |
| 314 | Filter2x, | 320 | Filter2x, |
| @@ -333,8 +339,12 @@ struct TSCEntry { | |||
| 333 | BitField<0, 2, TextureFilter> mag_filter; | 339 | BitField<0, 2, TextureFilter> mag_filter; |
| 334 | BitField<4, 2, TextureFilter> min_filter; | 340 | BitField<4, 2, TextureFilter> min_filter; |
| 335 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; | 341 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; |
| 342 | BitField<8, 1, u32> cubemap_anisotropy; | ||
| 336 | BitField<9, 1, u32> cubemap_interface_filtering; | 343 | BitField<9, 1, u32> cubemap_interface_filtering; |
| 344 | BitField<10, 2, SamplerReduction> reduction_filter; | ||
| 337 | BitField<12, 13, u32> mip_lod_bias; | 345 | BitField<12, 13, u32> mip_lod_bias; |
| 346 | BitField<25, 1, u32> float_coord_normalization; | ||
| 347 | BitField<26, 5, u32> trilin_opt; | ||
| 338 | }; | 348 | }; |
| 339 | union { | 349 | union { |
| 340 | BitField<0, 12, u32> min_lod_clamp; | 350 | BitField<0, 12, u32> min_lod_clamp; |
| @@ -347,32 +357,45 @@ struct TSCEntry { | |||
| 347 | }; | 357 | }; |
| 348 | std::array<f32, 4> border_color; | 358 | std::array<f32, 4> border_color; |
| 349 | }; | 359 | }; |
| 350 | std::array<u8, 0x20> raw; | 360 | std::array<u64, 4> raw; |
| 351 | }; | 361 | }; |
| 352 | 362 | ||
| 353 | std::array<float, 4> GetBorderColor() const noexcept; | 363 | constexpr bool operator==(const TSCEntry& rhs) const noexcept { |
| 364 | return raw == rhs.raw; | ||
| 365 | } | ||
| 366 | |||
| 367 | constexpr bool operator!=(const TSCEntry& rhs) const noexcept { | ||
| 368 | return raw != rhs.raw; | ||
| 369 | } | ||
| 370 | |||
| 371 | std::array<float, 4> BorderColor() const noexcept; | ||
| 354 | 372 | ||
| 355 | float GetMaxAnisotropy() const noexcept; | 373 | float MaxAnisotropy() const noexcept; |
| 356 | 374 | ||
| 357 | float GetMinLod() const { | 375 | float MinLod() const { |
| 358 | return static_cast<float>(min_lod_clamp) / 256.0f; | 376 | return static_cast<float>(min_lod_clamp) / 256.0f; |
| 359 | } | 377 | } |
| 360 | 378 | ||
| 361 | float GetMaxLod() const { | 379 | float MaxLod() const { |
| 362 | return static_cast<float>(max_lod_clamp) / 256.0f; | 380 | return static_cast<float>(max_lod_clamp) / 256.0f; |
| 363 | } | 381 | } |
| 364 | 382 | ||
| 365 | float GetLodBias() const { | 383 | float LodBias() const { |
| 366 | // Sign extend the 13-bit value. | 384 | // Sign extend the 13-bit value. |
| 367 | constexpr u32 mask = 1U << (13 - 1); | 385 | static constexpr u32 mask = 1U << (13 - 1); |
| 368 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; | 386 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; |
| 369 | } | 387 | } |
| 370 | }; | 388 | }; |
| 371 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); | 389 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); |
| 372 | 390 | ||
| 373 | struct FullTextureInfo { | 391 | } // namespace Tegra::Texture |
| 374 | TICEntry tic; | 392 | |
| 375 | TSCEntry tsc; | 393 | template <> |
| 394 | struct std::hash<Tegra::Texture::TICEntry> { | ||
| 395 | size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept; | ||
| 376 | }; | 396 | }; |
| 377 | 397 | ||
| 378 | } // namespace Tegra::Texture | 398 | template <> |
| 399 | struct std::hash<Tegra::Texture::TSCEntry> { | ||
| 400 | size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept; | ||
| 401 | }; | ||