summaryrefslogtreecommitdiff
path: root/src/video_core/textures
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-12-30 02:25:23 -0300
committerGravatar ReinUsesLisp2020-12-30 03:38:50 -0300
commit9764c13d6d2977903f407761b27d847c0056e1c4 (patch)
treef6f5d6d6379b0404147969e7d1f548ed3d49ca01 /src/video_core/textures
parentvideo_core: Add a delayed destruction ring abstraction (diff)
downloadyuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.gz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.xz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.zip
video_core: Rewrite the texture cache
The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
Diffstat (limited to 'src/video_core/textures')
-rw-r--r--src/video_core/textures/astc.cpp58
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
8 files changed, 274 insertions, 452 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstring> 20#include <cstring>
21#include <span>
21#include <vector> 22#include <vector>
22 23
23#include <boost/container/static_vector.hpp> 24#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
600 return params; 601 return params;
601} 602}
602 603
603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, 604static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
604 u32 blockHeight) { 605 u32 blockHeight) {
605 // Don't actually care about the void extent, just read the bits... 606 // Don't actually care about the void extent, just read the bits...
606 for (s32 i = 0; i < 4; ++i) { 607 for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
623 } 624 }
624} 625}
625 626
626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { 627static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
627 for (u32 j = 0; j < blockHeight; j++) { 628 for (u32 j = 0; j < blockHeight; j++) {
628 for (u32 i = 0; i < blockWidth; i++) { 629 for (u32 i = 0; i < blockWidth; i++) {
629 outBuf[j * blockWidth + i] = 0xFFFF00FF; 630 outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1438#undef READ_INT_VALUES 1439#undef READ_INT_VALUES
1439} 1440}
1440 1441
1441static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, 1442static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1442 u32* outBuf) { 1443 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1443 InputBitStream strm(inBuf); 1444 InputBitStream strm(inBuf.data());
1444 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1445 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1445 1446
1446 // Was there an error? 1447 // Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1601 } 1602 }
1602 1603
1603 // Read the texel weight data.. 1604 // Read the texel weight data..
1604 u8 texelWeightData[16]; 1605 std::array<u8, 16> texelWeightData;
1605 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1606 std::ranges::copy(inBuf, texelWeightData.begin());
1606 1607
1607 // Reverse everything 1608 // Reverse everything
1608 for (u32 i = 0; i < 8; i++) { 1609 for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1618 1619
1619 // Make sure that higher non-texel bits are set to zero 1620 // Make sure that higher non-texel bits are set to zero
1620 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1621 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1621 texelWeightData[clearByteStart - 1] = 1622 if (clearByteStart > 0) {
1622 texelWeightData[clearByteStart - 1] & 1623 texelWeightData[clearByteStart - 1] &=
1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1624 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1625 }
1626 std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1625 1627
1626 IntegerEncodedVector texelWeightValues; 1628 IntegerEncodedVector texelWeightValues;
1627 1629
1628 InputBitStream weightStream(texelWeightData); 1630 InputBitStream weightStream(texelWeightData.data());
1629 1631
1630 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, 1632 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1631 weightParams.GetNumWeightValues()); 1633 weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1672 1674
1673namespace Tegra::Texture::ASTC { 1675namespace Tegra::Texture::ASTC {
1674 1676
1675std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, 1677void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1676 u32 block_height) { 1678 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1677 u32 blockIdx = 0; 1679 u32 block_index = 0;
1678 std::size_t depth_offset = 0; 1680 std::size_t depth_offset = 0;
1679 std::vector<u8> outData(height * width * depth * 4); 1681 for (u32 z = 0; z < depth; z++) {
1680 for (u32 k = 0; k < depth; k++) { 1682 for (u32 y = 0; y < height; y += block_height) {
1681 for (u32 j = 0; j < height; j += block_height) { 1683 for (u32 x = 0; x < width; x += block_width) {
1682 for (u32 i = 0; i < width; i += block_width) { 1684 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1683
1684 const u8* blockPtr = data + blockIdx * 16;
1685 1685
1686 // Blocks can be at most 12x12 1686 // Blocks can be at most 12x12
1687 u32 uncompData[144]; 1687 std::array<u32, 12 * 12> uncompData;
1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1689 1689
1690 u32 decompWidth = std::min(block_width, width - i); 1690 u32 decompWidth = std::min(block_width, width - x);
1691 u32 decompHeight = std::min(block_height, height - j); 1691 u32 decompHeight = std::min(block_height, height - y);
1692 1692
1693 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1693 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1694 for (u32 jj = 0; jj < decompHeight; jj++) { 1694 for (u32 jj = 0; jj < decompHeight; jj++) {
1695 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1695 std::memcpy(outRow.data() + jj * width * 4,
1696 uncompData.data() + jj * block_width, decompWidth * 4);
1696 } 1697 }
1697 1698 ++block_index;
1698 blockIdx++;
1699 } 1699 }
1700 } 1700 }
1701 depth_offset += height * width * 4; 1701 depth_offset += height * width * 4;
1702 } 1702 }
1703
1704 return outData;
1705} 1703}
1706 1704
1707} // namespace Tegra::Texture::ASTC 1705} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <cstdint> 7#include <cstdint>
8#include <vector>
9 8
10namespace Tegra::Texture::ASTC { 9namespace Tegra::Texture::ASTC {
11 10
12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 11void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 12 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
14 13
15} // namespace Tegra::Texture::ASTC 14} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index bd1aebf02..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/surface.h"
14#include "video_core/textures/astc.h"
15#include "video_core/textures/convert.h"
16
17namespace Tegra::Texture {
18
19using VideoCore::Surface::PixelFormat;
20
21template <bool reverse>
22void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
23 union S8Z24 {
24 BitField<0, 24, u32> z24;
25 BitField<24, 8, u32> s8;
26 };
27 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
28
29 union Z24S8 {
30 BitField<0, 8, u32> s8;
31 BitField<8, 24, u32> z24;
32 };
33 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
34
35 S8Z24 s8z24_pixel{};
36 Z24S8 z24s8_pixel{};
37 constexpr auto bpp{
38 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
39 for (std::size_t y = 0; y < height; ++y) {
40 for (std::size_t x = 0; x < width; ++x) {
41 const std::size_t offset{bpp * (y * width + x)};
42 if constexpr (reverse) {
43 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
44 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
45 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
46 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
47 } else {
48 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
49 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
50 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
51 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
52 }
53 }
54 }
55}
56
57static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
58 SwapS8Z24ToZ24S8<false>(data, width, height);
59}
60
61static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height);
63}
64
65void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
66 u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{};
70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
73 in_data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
78 }
79}
80
81void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
82 bool convert_astc, bool convert_s8z24) {
83 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
84 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
85 pixel_format);
86 UNREACHABLE();
87
88 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
89 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
90 }
91}
92
93} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore::Surface {
10enum class PixelFormat;
11}
12
13namespace Tegra::Texture {
14
15void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
16 u32 width, u32 height, u32 depth, bool convert_astc,
17 bool convert_s8z24);
18
19void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
20 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
21
22} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..9f5181318 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <cmath> 6#include <cmath>
6#include <cstring> 7#include <cstring>
8#include <span>
9#include <utility>
10
7#include "common/alignment.h" 11#include "common/alignment.h"
8#include "common/assert.h" 12#include "common/assert.h"
9#include "common/bit_util.h" 13#include "common/bit_util.h"
14#include "common/div_ceil.h"
10#include "video_core/gpu.h" 15#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 16#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
13 18
14namespace Tegra::Texture { 19namespace Tegra::Texture {
15namespace {
16 20
21namespace {
17/** 22/**
18 * This table represents the internal swizzle of a gob, 23 * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
19 * in format 16 bytes x 2 sector packing.
20 * Calculates the offset of an (x, y) position within a swizzled texture. 24 * Calculates the offset of an (x, y) position within a swizzled texture.
21 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 25 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
22 */ 26 */
23template <std::size_t N, std::size_t M, u32 Align> 27constexpr SwizzleTable MakeSwizzleTableConst() {
24struct alignas(64) SwizzleTable { 28 SwizzleTable table{};
25 static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); 29 for (u32 y = 0; y < table.size(); ++y) {
26 constexpr SwizzleTable() { 30 for (u32 x = 0; x < table[0].size(); ++x) {
27 for (u32 y = 0; y < N; ++y) { 31 table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
28 for (u32 x = 0; x < M; ++x) { 32 (y % 2) * 16 + (x % 16);
29 const u32 x2 = x * Align;
30 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
31 ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
32 }
33 } 33 }
34 } 34 }
35 const std::array<u16, M>& operator[](std::size_t index) const { 35 return table;
36 return values[index]; 36}
37 }
38 std::array<std::array<u16, M>, N> values{};
39};
40 37
41constexpr u32 FAST_SWIZZLE_ALIGN = 16; 38constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
42 39
43constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); 40template <bool TO_LINEAR>
44constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); 41void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
42 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
43 // The origin of the transformation can be configured here, leave it as zero as the current API
44 // doesn't expose it.
45 static constexpr u32 origin_x = 0;
46 static constexpr u32 origin_y = 0;
47 static constexpr u32 origin_z = 0;
45 48
46/** 49 // We can configure here a custom pitch
47 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 50 // As it's not exposed 'width * bpp' will be the expected pitch.
48 * Instead of going gob by gob, we map the coordinates inside a block and manage from 51 const u32 pitch = width * bytes_per_pixel;
49 * those. Block_Width is assumed to be 1. 52 const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
50 */
51void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
52 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
53 const u32 y_end, const u32 z_end, const u32 tile_offset,
54 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
55 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
56 std::array<u8*, 2> data_ptrs;
57 u32 z_address = tile_offset;
58
59 for (u32 z = z_start; z < z_end; z++) {
60 u32 y_address = z_address;
61 u32 pixel_base = layer_z * z + y_start * stride_x;
62 for (u32 y = y_start; y < y_end; y++) {
63 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
64 for (u32 x = x_start; x < x_end; x++) {
65 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
70 }
71 pixel_base += stride_x;
72 if ((y + 1) % GOB_SIZE_Y == 0)
73 y_address += GOB_SIZE;
74 }
75 z_address += xy_block_size;
76 }
77}
78 53
79/** 54 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 55 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
81 * Instead of going gob by gob, we map the coordinates inside a block and manage from 56 const u32 slice_size =
82 * those. Block_Width is assumed to be 1. 57 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
83 */
84void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
86 const u32 y_end, const u32 z_end, const u32 tile_offset,
87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
89 std::array<u8*, 2> data_ptrs;
90 u32 z_address = tile_offset;
91 const u32 x_startb = x_start * bytes_per_pixel;
92 const u32 x_endb = x_end * bytes_per_pixel;
93
94 for (u32 z = z_start; z < z_end; z++) {
95 u32 y_address = z_address;
96 u32 pixel_base = layer_z * z + y_start * stride_x;
97 for (u32 y = y_start; y < y_end; y++) {
98 const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
99 for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
100 const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
101 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
102 const u32 pixel_index{out_x + pixel_base};
103 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
104 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
105 std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
106 }
107 pixel_base += stride_x;
108 if ((y + 1) % GOB_SIZE_Y == 0)
109 y_address += GOB_SIZE;
110 }
111 z_address += xy_block_size;
112 }
113}
114 58
115/** 59 const u32 block_height_mask = (1U << block_height) - 1;
116 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. 60 const u32 block_depth_mask = (1U << block_depth) - 1;
117 * The body of this function takes care of splitting the swizzled texture into blocks, 61 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
118 * and managing the extents of it. Once all the parameters of a single block are obtained, 62
119 * the function calls 'ProcessBlock' to process that particular Block. 63 for (u32 slice = 0; slice < depth; ++slice) {
120 * 64 const u32 z = slice + origin_z;
121 * Documentation for the memory layout and decoding can be found at: 65 const u32 offset_z = (z >> block_depth) * slice_size +
122 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces 66 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
123 */ 67 for (u32 line = 0; line < height; ++line) {
124template <bool fast> 68 const u32 y = line + origin_y;
125void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, 69 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
126 const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, 70
127 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, 71 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
128 const u32 width_spacing) { 72 const u32 offset_y = (block_y >> block_height) * block_size +
129 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; 73 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
130 const u32 stride_x = width * out_bytes_per_pixel; 74
131 const u32 layer_z = height * stride_x; 75 for (u32 column = 0; column < width; ++column) {
132 const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; 76 const u32 x = (column + origin_x) * bytes_per_pixel;
133 constexpr u32 gob_elements_y = GOB_SIZE_Y; 77 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
134 constexpr u32 gob_elements_z = GOB_SIZE_Z; 78
135 const u32 block_x_elements = gob_elements_x; 79 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
136 const u32 block_y_elements = gob_elements_y * block_height; 80 const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
137 const u32 block_z_elements = gob_elements_z * block_depth; 81
138 const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); 82 const u32 unswizzled_offset =
139 const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); 83 slice * pitch * height + line * pitch + column * bytes_per_pixel;
140 const u32 blocks_on_y = div_ceil(height, block_y_elements); 84
141 const u32 blocks_on_z = div_ceil(depth, block_z_elements); 85 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
142 const u32 xy_block_size = GOB_SIZE * block_height; 86 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
143 const u32 block_size = xy_block_size * block_depth; 87 std::memcpy(dst, src, bytes_per_pixel);
144 u32 tile_offset = 0;
145 for (u32 zb = 0; zb < blocks_on_z; zb++) {
146 const u32 z_start = zb * block_z_elements;
147 const u32 z_end = std::min(depth, z_start + block_z_elements);
148 for (u32 yb = 0; yb < blocks_on_y; yb++) {
149 const u32 y_start = yb * block_y_elements;
150 const u32 y_end = std::min(height, y_start + block_y_elements);
151 for (u32 xb = 0; xb < blocks_on_x; xb++) {
152 const u32 x_start = xb * block_x_elements;
153 const u32 x_end = std::min(width, x_start + block_x_elements);
154 if constexpr (fast) {
155 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
156 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
157 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
158 } else {
159 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
160 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
161 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
162 }
163 tile_offset += block_size;
164 } 88 }
165 } 89 }
166 } 90 }
167} 91}
168
169} // Anonymous namespace 92} // Anonymous namespace
170 93
171void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 94SwizzleTable MakeSwizzleTable() {
172 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 95 return SWIZZLE_TABLE;
173 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
174 const u32 block_height_size{1U << block_height};
175 const u32 block_depth_size{1U << block_depth};
176 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
177 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
178 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
179 block_depth_size, width_spacing);
180 } else {
181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
182 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
183 block_depth_size, width_spacing);
184 }
185} 96}
186 97
187void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 98void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
188 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 99 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
189 u32 block_depth, u32 width_spacing) { 100 u32 stride_alignment) {
190 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 101 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
191 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 102 stride_alignment);
192 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
193 width_spacing);
194} 103}
195 104
196std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 105void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
197 u32 width, u32 height, u32 depth, u32 block_height, 106 u32 height, u32 depth, u32 block_height, u32 block_depth,
198 u32 block_depth, u32 width_spacing) { 107 u32 stride_alignment) {
199 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 108 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
200 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 109 stride_alignment);
201 width, height, depth, block_height, block_depth, width_spacing);
202 return unswizzled_data;
203} 110}
204 111
205void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 112void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
213 const u32 gob_address_y = 120 const u32 gob_address_y =
214 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 121 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
215 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 122 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
216 const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; 123 const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
217 for (u32 x = 0; x < subrect_width; ++x) { 124 for (u32 x = 0; x < subrect_width; ++x) {
218 const u32 dst_x = x + offset_x; 125 const u32 dst_x = x + offset_x;
219 const u32 gob_address = 126 const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); 142 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 143
237 const u32 block_height_mask = (1U << block_height) - 1; 144 const u32 block_height_mask = (1U << block_height) - 1;
238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; 145 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
239 146
240 for (u32 line = 0; line < line_count; ++line) { 147 for (u32 line = 0; line < line_count; ++line) {
241 const u32 src_y = line + origin_y; 148 const u32 src_y = line + origin_y;
242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; 149 const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 150
244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; 151 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 const u32 src_offset_y = (block_y >> block_height) * block_size + 152 const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; 177 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
271 178
272 for (u32 line = 0; line < line_count; ++line) { 179 for (u32 line = 0; line < line_count; ++line) {
273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 180 const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
274 const u32 block_y = line / GOB_SIZE_Y; 181 const u32 block_y = line / GOB_SIZE_Y;
275 const u32 dst_offset_y = 182 const u32 dst_offset_y =
276 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; 183 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
293 const std::size_t gob_address_y = 200 const std::size_t gob_address_y =
294 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 201 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
295 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 202 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
296 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; 203 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
297 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { 204 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
298 const std::size_t gob_address = 205 const std::size_t gob_address =
299 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; 206 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector> 7#include <span>
8
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
15constexpr u32 GOB_SIZE_Z = 1; 16constexpr u32 GOB_SIZE_Z = 1;
16constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; 17constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
17 18
18constexpr std::size_t GOB_SIZE_X_SHIFT = 6; 19constexpr u32 GOB_SIZE_X_SHIFT = 6;
19constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; 20constexpr u32 GOB_SIZE_Y_SHIFT = 3;
20constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; 21constexpr u32 GOB_SIZE_Z_SHIFT = 0;
21constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; 22constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
22 23
23/// Unswizzles a swizzled texture without changing its format. 24using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
24void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 25
25 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 26/// Returns a z-order swizzle table
26 u32 block_height = TICEntry::DefaultBlockHeight, 27SwizzleTable MakeSwizzleTable();
27 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 28
28 29/// Unswizzles a block linear texture into linear memory.
29/// Unswizzles a swizzled texture without changing its format. 30void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
30std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 31 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
31 u32 width, u32 height, u32 depth, 32 u32 stride_alignment = 1);
32 u32 block_height = TICEntry::DefaultBlockHeight, 33
33 u32 block_depth = TICEntry::DefaultBlockHeight, 34/// Swizzles linear memory into a block linear texture.
34 u32 width_spacing = 0); 35void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
35 36 u32 height, u32 depth, u32 block_height, u32 block_depth,
36/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. 37 u32 stride_alignment = 1);
37void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
38 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
39 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
40 38
41/// This function calculates the correct size of a texture depending if it's tiled or not. 39/// This function calculates the correct size of a texture depending if it's tiled or not.
42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 40std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7 7
8#include "common/cityhash.h"
8#include "core/settings.h" 9#include "core/settings.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
12using Tegra::Texture::TICEntry;
13using Tegra::Texture::TSCEntry;
14
11namespace Tegra::Texture { 15namespace Tegra::Texture {
12 16
13namespace { 17namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
65 69
66} // Anonymous namespace 70} // Anonymous namespace
67 71
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { 72std::array<float, 4> TSCEntry::BorderColor() const noexcept {
69 if (!srgb_conversion) { 73 if (!srgb_conversion) {
70 return border_color; 74 return border_color;
71 } 75 }
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; 77 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74} 78}
75 79
76float TSCEntry::GetMaxAnisotropy() const noexcept { 80float TSCEntry::MaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); 81 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78} 82}
79 83
80} // namespace Tegra::Texture 84} // namespace Tegra::Texture
85
86size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
87 return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
88}
89
90size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
91 return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
92}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index bbc7e3eaf..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
53 BC4 = 0x27, 53 BC4 = 0x27,
54 BC5 = 0x28, 54 BC5 = 0x28,
55 S8D24 = 0x29, 55 S8D24 = 0x29,
56 X8Z24 = 0x2a, 56 X8D24 = 0x2a,
57 D24S8 = 0x2b, 57 D24S8 = 0x2b,
58 X4V4Z24__COV4R4V = 0x2c, 58 X4V4D24__COV4R4V = 0x2c,
59 X4V4Z24__COV8R8V = 0x2d, 59 X4V4D24__COV8R8V = 0x2d,
60 V8Z24__COV4R12V = 0x2e, 60 V8D24__COV4R12V = 0x2e,
61 D32 = 0x2f, 61 D32 = 0x2f,
62 D32S8 = 0x30, 62 D32S8 = 0x30,
63 X8Z24_X20V4S8__COV4R4V = 0x31, 63 X8D24_X20V4S8__COV4R4V = 0x31,
64 X8Z24_X20V4S8__COV8R8V = 0x32, 64 X8D24_X20V4S8__COV8R8V = 0x32,
65 ZF32_X20V4X8__COV4R4V = 0x33, 65 D32_X20V4X8__COV4R4V = 0x33,
66 ZF32_X20V4X8__COV8R8V = 0x34, 66 D32_X20V4X8__COV8R8V = 0x34,
67 ZF32_X20V4S8__COV4R4V = 0x35, 67 D32_X20V4S8__COV4R4V = 0x35,
68 ZF32_X20V4S8__COV8R8V = 0x36, 68 D32_X20V4S8__COV8R8V = 0x36,
69 X8Z24_X16V8S8__COV4R12V = 0x37, 69 X8D24_X16V8S8__COV4R12V = 0x37,
70 ZF32_X16V8X8__COV4R12V = 0x38, 70 D32_X16V8X8__COV4R12V = 0x38,
71 ZF32_X16V8S8__COV4R12V = 0x39, 71 D32_X16V8S8__COV4R12V = 0x39,
72 D16 = 0x3a, 72 D16 = 0x3a,
73 V8Z24__COV8R24V = 0x3b, 73 V8D24__COV8R24V = 0x3b,
74 X8Z24_X16V8S8__COV8R24V = 0x3c, 74 X8D24_X16V8S8__COV8R24V = 0x3c,
75 ZF32_X16V8X8__COV8R24V = 0x3d, 75 D32_X16V8X8__COV8R24V = 0x3d,
76 ZF32_X16V8S8__COV8R24V = 0x3e, 76 D32_X16V8S8__COV8R24V = 0x3e,
77 ASTC_2D_4X4 = 0x40, 77 ASTC_2D_4X4 = 0x40,
78 ASTC_2D_5X5 = 0x41, 78 ASTC_2D_5X5 = 0x41,
79 ASTC_2D_6X6 = 0x42, 79 ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
146}; 146};
147 147
148union TextureHandle { 148union TextureHandle {
149 /* implicit */ TextureHandle(u32 raw_) : raw{raw_} {} 149 /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
150 150
151 u32 raw; 151 u32 raw;
152 BitField<0, 20, u32> tic_id; 152 BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157struct TICEntry { 157struct TICEntry {
158 static constexpr u32 DefaultBlockHeight = 16;
159 static constexpr u32 DefaultBlockDepth = 1;
160
161 union {
162 u32 raw;
163 BitField<0, 7, TextureFormat> format;
164 BitField<7, 3, ComponentType> r_type;
165 BitField<10, 3, ComponentType> g_type;
166 BitField<13, 3, ComponentType> b_type;
167 BitField<16, 3, ComponentType> a_type;
168
169 BitField<19, 3, SwizzleSource> x_source;
170 BitField<22, 3, SwizzleSource> y_source;
171 BitField<25, 3, SwizzleSource> z_source;
172 BitField<28, 3, SwizzleSource> w_source;
173 };
174 u32 address_low;
175 union { 158 union {
176 BitField<0, 16, u32> address_high; 159 struct {
177 BitField<21, 3, TICHeaderVersion> header_version; 160 union {
178 }; 161 BitField<0, 7, TextureFormat> format;
179 union { 162 BitField<7, 3, ComponentType> r_type;
180 BitField<0, 3, u32> block_width; 163 BitField<10, 3, ComponentType> g_type;
181 BitField<3, 3, u32> block_height; 164 BitField<13, 3, ComponentType> b_type;
182 BitField<6, 3, u32> block_depth; 165 BitField<16, 3, ComponentType> a_type;
166
167 BitField<19, 3, SwizzleSource> x_source;
168 BitField<22, 3, SwizzleSource> y_source;
169 BitField<25, 3, SwizzleSource> z_source;
170 BitField<28, 3, SwizzleSource> w_source;
171 };
172 u32 address_low;
173 union {
174 BitField<0, 16, u32> address_high;
175 BitField<16, 5, u32> layer_base_3_7;
176 BitField<21, 3, TICHeaderVersion> header_version;
177 BitField<24, 1, u32> load_store_hint;
178 BitField<25, 4, u32> view_coherency_hash;
179 BitField<29, 3, u32> layer_base_8_10;
180 };
181 union {
182 BitField<0, 3, u32> block_width;
183 BitField<3, 3, u32> block_height;
184 BitField<6, 3, u32> block_depth;
183 185
184 BitField<10, 3, u32> tile_width_spacing; 186 BitField<10, 3, u32> tile_width_spacing;
185 187
186 // High 16 bits of the pitch value 188 // High 16 bits of the pitch value
187 BitField<0, 16, u32> pitch_high; 189 BitField<0, 16, u32> pitch_high;
188 BitField<26, 1, u32> use_header_opt_control; 190 BitField<26, 1, u32> use_header_opt_control;
189 BitField<27, 1, u32> depth_texture; 191 BitField<27, 1, u32> depth_texture;
190 BitField<28, 4, u32> max_mip_level; 192 BitField<28, 4, u32> max_mip_level;
191 193
192 BitField<0, 16, u32> buffer_high_width_minus_one; 194 BitField<0, 16, u32> buffer_high_width_minus_one;
193 }; 195 };
194 union { 196 union {
195 BitField<0, 16, u32> width_minus_1; 197 BitField<0, 16, u32> width_minus_one;
196 BitField<22, 1, u32> srgb_conversion; 198 BitField<16, 3, u32> layer_base_0_2;
197 BitField<23, 4, TextureType> texture_type; 199 BitField<22, 1, u32> srgb_conversion;
198 BitField<29, 3, u32> border_size; 200 BitField<23, 4, TextureType> texture_type;
201 BitField<29, 3, u32> border_size;
199 202
200 BitField<0, 16, u32> buffer_low_width_minus_one; 203 BitField<0, 16, u32> buffer_low_width_minus_one;
201 }; 204 };
202 union { 205 union {
203 BitField<0, 16, u32> height_minus_1; 206 BitField<0, 16, u32> height_minus_1;
204 BitField<16, 14, u32> depth_minus_1; 207 BitField<16, 14, u32> depth_minus_1;
205 }; 208 BitField<30, 1, u32> is_sparse;
206 union { 209 BitField<31, 1, u32> normalized_coords;
207 BitField<6, 13, u32> mip_lod_bias; 210 };
208 BitField<27, 3, u32> max_anisotropy; 211 union {
212 BitField<6, 13, u32> mip_lod_bias;
213 BitField<27, 3, u32> max_anisotropy;
214 };
215 union {
216 BitField<0, 4, u32> res_min_mip_level;
217 BitField<4, 4, u32> res_max_mip_level;
218 BitField<8, 4, MsaaMode> msaa_mode;
219 BitField<12, 12, u32> min_lod_clamp;
220 };
221 };
222 std::array<u64, 4> raw;
209 }; 223 };
210 224
211 union { 225 constexpr bool operator==(const TICEntry& rhs) const noexcept {
212 BitField<0, 4, u32> res_min_mip_level; 226 return raw == rhs.raw;
213 BitField<4, 4, u32> res_max_mip_level; 227 }
214 BitField<8, 4, MsaaMode> msaa_mode;
215 BitField<12, 12, u32> min_lod_clamp;
216 };
217 228
218 GPUVAddr Address() const { 229 constexpr bool operator!=(const TICEntry& rhs) const noexcept {
230 return raw != rhs.raw;
231 }
232
233 constexpr GPUVAddr Address() const {
219 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); 234 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
220 } 235 }
221 236
222 u32 Pitch() const { 237 constexpr u32 Pitch() const {
223 ASSERT(header_version == TICHeaderVersion::Pitch || 238 ASSERT(header_version == TICHeaderVersion::Pitch ||
224 header_version == TICHeaderVersion::PitchColorKey); 239 header_version == TICHeaderVersion::PitchColorKey);
225 // The pitch value is 21 bits, and is 32B aligned. 240 // The pitch value is 21 bits, and is 32B aligned.
226 return pitch_high << 5; 241 return pitch_high << 5;
227 } 242 }
228 243
229 u32 Width() const { 244 constexpr u32 Width() const {
230 if (header_version != TICHeaderVersion::OneDBuffer) { 245 if (header_version != TICHeaderVersion::OneDBuffer) {
231 return width_minus_1 + 1; 246 return width_minus_one + 1;
232 } 247 }
233 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; 248 return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
234 } 249 }
235 250
236 u32 Height() const { 251 constexpr u32 Height() const {
237 return height_minus_1 + 1; 252 return height_minus_1 + 1;
238 } 253 }
239 254
240 u32 Depth() const { 255 constexpr u32 Depth() const {
241 return depth_minus_1 + 1; 256 return depth_minus_1 + 1;
242 } 257 }
243 258
244 u32 BlockWidth() const { 259 constexpr u32 BaseLayer() const {
245 ASSERT(IsTiled()); 260 return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
246 return block_width;
247 }
248
249 u32 BlockHeight() const {
250 ASSERT(IsTiled());
251 return block_height;
252 }
253
254 u32 BlockDepth() const {
255 ASSERT(IsTiled());
256 return block_depth;
257 } 261 }
258 262
259 bool IsTiled() const { 263 constexpr bool IsBlockLinear() const {
260 return header_version == TICHeaderVersion::BlockLinear || 264 return header_version == TICHeaderVersion::BlockLinear ||
261 header_version == TICHeaderVersion::BlockLinearColorKey; 265 header_version == TICHeaderVersion::BlockLinearColorKey;
262 } 266 }
263 267
264 bool IsLineal() const { 268 constexpr bool IsPitchLinear() const {
265 return header_version == TICHeaderVersion::Pitch || 269 return header_version == TICHeaderVersion::Pitch ||
266 header_version == TICHeaderVersion::PitchColorKey; 270 header_version == TICHeaderVersion::PitchColorKey;
267 } 271 }
268 272
269 bool IsBuffer() const { 273 constexpr bool IsBuffer() const {
270 return header_version == TICHeaderVersion::OneDBuffer; 274 return header_version == TICHeaderVersion::OneDBuffer;
271 } 275 }
272
273 bool IsSrgbConversionEnabled() const {
274 return srgb_conversion != 0;
275 }
276}; 276};
277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); 277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
278 278
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
309 Linear = 3, 309 Linear = 3,
310}; 310};
311 311
312enum class SamplerReduction : u32 {
313 WeightedAverage = 0,
314 Min = 1,
315 Max = 2,
316};
317
312enum class Anisotropy { 318enum class Anisotropy {
313 Default, 319 Default,
314 Filter2x, 320 Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
333 BitField<0, 2, TextureFilter> mag_filter; 339 BitField<0, 2, TextureFilter> mag_filter;
334 BitField<4, 2, TextureFilter> min_filter; 340 BitField<4, 2, TextureFilter> min_filter;
335 BitField<6, 2, TextureMipmapFilter> mipmap_filter; 341 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
342 BitField<8, 1, u32> cubemap_anisotropy;
336 BitField<9, 1, u32> cubemap_interface_filtering; 343 BitField<9, 1, u32> cubemap_interface_filtering;
344 BitField<10, 2, SamplerReduction> reduction_filter;
337 BitField<12, 13, u32> mip_lod_bias; 345 BitField<12, 13, u32> mip_lod_bias;
346 BitField<25, 1, u32> float_coord_normalization;
347 BitField<26, 5, u32> trilin_opt;
338 }; 348 };
339 union { 349 union {
340 BitField<0, 12, u32> min_lod_clamp; 350 BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
347 }; 357 };
348 std::array<f32, 4> border_color; 358 std::array<f32, 4> border_color;
349 }; 359 };
350 std::array<u8, 0x20> raw; 360 std::array<u64, 4> raw;
351 }; 361 };
352 362
353 std::array<float, 4> GetBorderColor() const noexcept; 363 constexpr bool operator==(const TSCEntry& rhs) const noexcept {
364 return raw == rhs.raw;
365 }
366
367 constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
368 return raw != rhs.raw;
369 }
370
371 std::array<float, 4> BorderColor() const noexcept;
354 372
355 float GetMaxAnisotropy() const noexcept; 373 float MaxAnisotropy() const noexcept;
356 374
357 float GetMinLod() const { 375 float MinLod() const {
358 return static_cast<float>(min_lod_clamp) / 256.0f; 376 return static_cast<float>(min_lod_clamp) / 256.0f;
359 } 377 }
360 378
361 float GetMaxLod() const { 379 float MaxLod() const {
362 return static_cast<float>(max_lod_clamp) / 256.0f; 380 return static_cast<float>(max_lod_clamp) / 256.0f;
363 } 381 }
364 382
365 float GetLodBias() const { 383 float LodBias() const {
366 // Sign extend the 13-bit value. 384 // Sign extend the 13-bit value.
367 constexpr u32 mask = 1U << (13 - 1); 385 static constexpr u32 mask = 1U << (13 - 1);
368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 386 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
369 } 387 }
370}; 388};
371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 389static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
372 390
373struct FullTextureInfo { 391} // namespace Tegra::Texture
374 TICEntry tic; 392
375 TSCEntry tsc; 393template <>
394struct std::hash<Tegra::Texture::TICEntry> {
395 size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
376}; 396};
377 397
378} // namespace Tegra::Texture 398template <>
399struct std::hash<Tegra::Texture::TSCEntry> {
400 size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
401};