summaryrefslogtreecommitdiff
path: root/src/video_core/textures
diff options
context:
space:
mode:
authorGravatar Levi2021-01-10 22:09:56 -0700
committerGravatar Levi2021-01-10 22:09:56 -0700
commit7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch)
tree5056f9406dec188439cb0deb87603498243a9412 /src/video_core/textures
parentMore forgetting... duh (diff)
parentMerge pull request #5229 from Morph1984/fullscreen-opt (diff)
downloadyuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz
yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz
yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/textures')
-rw-r--r--src/video_core/textures/astc.cpp58
-rw-r--r--src/video_core/textures/astc.h5
-rw-r--r--src/video_core/textures/convert.cpp93
-rw-r--r--src/video_core/textures/convert.h22
-rw-r--r--src/video_core/textures/decoders.cpp249
-rw-r--r--src/video_core/textures/decoders.h44
-rw-r--r--src/video_core/textures/texture.cpp16
-rw-r--r--src/video_core/textures/texture.h239
8 files changed, 274 insertions, 452 deletions
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 365bde2f1..acd5bdd78 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -18,6 +18,7 @@
18#include <algorithm> 18#include <algorithm>
19#include <cassert> 19#include <cassert>
20#include <cstring> 20#include <cstring>
21#include <span>
21#include <vector> 22#include <vector>
22 23
23#include <boost/container/static_vector.hpp> 24#include <boost/container/static_vector.hpp>
@@ -600,7 +601,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
600 return params; 601 return params;
601} 602}
602 603
603static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, 604static void FillVoidExtentLDR(InputBitStream& strm, std::span<u32> outBuf, u32 blockWidth,
604 u32 blockHeight) { 605 u32 blockHeight) {
605 // Don't actually care about the void extent, just read the bits... 606 // Don't actually care about the void extent, just read the bits...
606 for (s32 i = 0; i < 4; ++i) { 607 for (s32 i = 0; i < 4; ++i) {
@@ -623,7 +624,7 @@ static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 block
623 } 624 }
624} 625}
625 626
626static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { 627static void FillError(std::span<u32> outBuf, u32 blockWidth, u32 blockHeight) {
627 for (u32 j = 0; j < blockHeight; j++) { 628 for (u32 j = 0; j < blockHeight; j++) {
628 for (u32 i = 0; i < blockWidth; i++) { 629 for (u32 i = 0; i < blockWidth; i++) {
629 outBuf[j * blockWidth + i] = 0xFFFF00FF; 630 outBuf[j * blockWidth + i] = 0xFFFF00FF;
@@ -1438,9 +1439,9 @@ static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues,
1438#undef READ_INT_VALUES 1439#undef READ_INT_VALUES
1439} 1440}
1440 1441
1441static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, 1442static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth,
1442 u32* outBuf) { 1443 const u32 blockHeight, std::span<u32, 12 * 12> outBuf) {
1443 InputBitStream strm(inBuf); 1444 InputBitStream strm(inBuf.data());
1444 TexelWeightParams weightParams = DecodeBlockInfo(strm); 1445 TexelWeightParams weightParams = DecodeBlockInfo(strm);
1445 1446
1446 // Was there an error? 1447 // Was there an error?
@@ -1601,8 +1602,8 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1601 } 1602 }
1602 1603
1603 // Read the texel weight data.. 1604 // Read the texel weight data..
1604 u8 texelWeightData[16]; 1605 std::array<u8, 16> texelWeightData;
1605 memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); 1606 std::ranges::copy(inBuf, texelWeightData.begin());
1606 1607
1607 // Reverse everything 1608 // Reverse everything
1608 for (u32 i = 0; i < 8; i++) { 1609 for (u32 i = 0; i < 8; i++) {
@@ -1618,14 +1619,15 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1618 1619
1619 // Make sure that higher non-texel bits are set to zero 1620 // Make sure that higher non-texel bits are set to zero
1620 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; 1621 const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
1621 texelWeightData[clearByteStart - 1] = 1622 if (clearByteStart > 0) {
1622 texelWeightData[clearByteStart - 1] & 1623 texelWeightData[clearByteStart - 1] &=
1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1624 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1625 }
1626 std::memset(texelWeightData.data() + clearByteStart, 0, std::min(16U - clearByteStart, 16U));
1625 1627
1626 IntegerEncodedVector texelWeightValues; 1628 IntegerEncodedVector texelWeightValues;
1627 1629
1628 InputBitStream weightStream(texelWeightData); 1630 InputBitStream weightStream(texelWeightData.data());
1629 1631
1630 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, 1632 DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight,
1631 weightParams.GetNumWeightValues()); 1633 weightParams.GetNumWeightValues());
@@ -1672,36 +1674,32 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1672 1674
1673namespace Tegra::Texture::ASTC { 1675namespace Tegra::Texture::ASTC {
1674 1676
1675std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, 1677void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
1676 u32 block_height) { 1678 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) {
1677 u32 blockIdx = 0; 1679 u32 block_index = 0;
1678 std::size_t depth_offset = 0; 1680 std::size_t depth_offset = 0;
1679 std::vector<u8> outData(height * width * depth * 4); 1681 for (u32 z = 0; z < depth; z++) {
1680 for (u32 k = 0; k < depth; k++) { 1682 for (u32 y = 0; y < height; y += block_height) {
1681 for (u32 j = 0; j < height; j += block_height) { 1683 for (u32 x = 0; x < width; x += block_width) {
1682 for (u32 i = 0; i < width; i += block_width) { 1684 const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)};
1683
1684 const u8* blockPtr = data + blockIdx * 16;
1685 1685
1686 // Blocks can be at most 12x12 1686 // Blocks can be at most 12x12
1687 u32 uncompData[144]; 1687 std::array<u32, 12 * 12> uncompData;
1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); 1688 ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
1689 1689
1690 u32 decompWidth = std::min(block_width, width - i); 1690 u32 decompWidth = std::min(block_width, width - x);
1691 u32 decompHeight = std::min(block_height, height - j); 1691 u32 decompHeight = std::min(block_height, height - y);
1692 1692
1693 u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; 1693 const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4);
1694 for (u32 jj = 0; jj < decompHeight; jj++) { 1694 for (u32 jj = 0; jj < decompHeight; jj++) {
1695 memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); 1695 std::memcpy(outRow.data() + jj * width * 4,
1696 uncompData.data() + jj * block_width, decompWidth * 4);
1696 } 1697 }
1697 1698 ++block_index;
1698 blockIdx++;
1699 } 1699 }
1700 } 1700 }
1701 depth_offset += height * width * 4; 1701 depth_offset += height * width * 4;
1702 } 1702 }
1703
1704 return outData;
1705} 1703}
1706 1704
1707} // namespace Tegra::Texture::ASTC 1705} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
index 991cdba72..9105119bc 100644
--- a/src/video_core/textures/astc.h
+++ b/src/video_core/textures/astc.h
@@ -5,11 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <cstdint> 7#include <cstdint>
8#include <vector>
9 8
10namespace Tegra::Texture::ASTC { 9namespace Tegra::Texture::ASTC {
11 10
12std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, 11void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
13 uint32_t depth, uint32_t block_width, uint32_t block_height); 12 uint32_t block_width, uint32_t block_height, std::span<uint8_t> output);
14 13
15} // namespace Tegra::Texture::ASTC 14} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp
deleted file mode 100644
index 962921483..000000000
--- a/src/video_core/textures/convert.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstring>
7#include <tuple>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/surface.h"
14#include "video_core/textures/astc.h"
15#include "video_core/textures/convert.h"
16
17namespace Tegra::Texture {
18
19using VideoCore::Surface::PixelFormat;
20
21template <bool reverse>
22void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
23 union S8Z24 {
24 BitField<0, 24, u32> z24;
25 BitField<24, 8, u32> s8;
26 };
27 static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size");
28
29 union Z24S8 {
30 BitField<0, 8, u32> s8;
31 BitField<8, 24, u32> z24;
32 };
33 static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size");
34
35 S8Z24 s8z24_pixel{};
36 Z24S8 z24s8_pixel{};
37 constexpr auto bpp{
38 VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8_UINT_D24_UNORM)};
39 for (std::size_t y = 0; y < height; ++y) {
40 for (std::size_t x = 0; x < width; ++x) {
41 const std::size_t offset{bpp * (y * width + x)};
42 if constexpr (reverse) {
43 std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8));
44 s8z24_pixel.s8.Assign(z24s8_pixel.s8);
45 s8z24_pixel.z24.Assign(z24s8_pixel.z24);
46 std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24));
47 } else {
48 std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24));
49 z24s8_pixel.s8.Assign(s8z24_pixel.s8);
50 z24s8_pixel.z24.Assign(s8z24_pixel.z24);
51 std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8));
52 }
53 }
54 }
55}
56
57static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) {
58 SwapS8Z24ToZ24S8<false>(data, width, height);
59}
60
61static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) {
62 SwapS8Z24ToZ24S8<true>(data, width, height);
63}
64
65void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width,
66 u32 height, u32 depth, bool convert_astc, bool convert_s8z24) {
67 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
68 // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC.
69 u32 block_width{};
70 u32 block_height{};
71 std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format);
72 const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress(
73 in_data, width, height, depth, block_width, block_height);
74 std::copy(rgba8_data.begin(), rgba8_data.end(), out_data);
75
76 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
77 Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height);
78 }
79}
80
81void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth,
82 bool convert_astc, bool convert_s8z24) {
83 if (convert_astc && IsPixelFormatASTC(pixel_format)) {
84 LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented",
85 static_cast<u32>(pixel_format));
86 UNREACHABLE();
87
88 } else if (convert_s8z24 && pixel_format == PixelFormat::S8_UINT_D24_UNORM) {
89 Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height);
90 }
91}
92
93} // namespace Tegra::Texture
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h
deleted file mode 100644
index d5d6c77bb..000000000
--- a/src/video_core/textures/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCore::Surface {
10enum class PixelFormat;
11}
12
13namespace Tegra::Texture {
14
15void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format,
16 u32 width, u32 height, u32 depth, bool convert_astc,
17 bool convert_s8z24);
18
19void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width,
20 u32 height, u32 depth, bool convert_astc, bool convert_s8z24);
21
22} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 16d46a018..9f5181318 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -2,204 +2,111 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <cmath> 6#include <cmath>
6#include <cstring> 7#include <cstring>
8#include <span>
9#include <utility>
10
7#include "common/alignment.h" 11#include "common/alignment.h"
8#include "common/assert.h" 12#include "common/assert.h"
9#include "common/bit_util.h" 13#include "common/bit_util.h"
14#include "common/div_ceil.h"
10#include "video_core/gpu.h" 15#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 16#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 17#include "video_core/textures/texture.h"
13 18
14namespace Tegra::Texture { 19namespace Tegra::Texture {
15namespace {
16 20
21namespace {
17/** 22/**
18 * This table represents the internal swizzle of a gob, 23 * This table represents the internal swizzle of a gob, in format 16 bytes x 2 sector packing.
19 * in format 16 bytes x 2 sector packing.
20 * Calculates the offset of an (x, y) position within a swizzled texture. 24 * Calculates the offset of an (x, y) position within a swizzled texture.
21 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188 25 * Taken from the Tegra X1 Technical Reference Manual. pages 1187-1188
22 */ 26 */
23template <std::size_t N, std::size_t M, u32 Align> 27constexpr SwizzleTable MakeSwizzleTableConst() {
24struct alignas(64) SwizzleTable { 28 SwizzleTable table{};
25 static_assert(M * Align == 64, "Swizzle Table does not align to GOB"); 29 for (u32 y = 0; y < table.size(); ++y) {
26 constexpr SwizzleTable() { 30 for (u32 x = 0; x < table[0].size(); ++x) {
27 for (u32 y = 0; y < N; ++y) { 31 table[y][x] = ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + ((x % 32) / 16) * 32 +
28 for (u32 x = 0; x < M; ++x) { 32 (y % 2) * 16 + (x % 16);
29 const u32 x2 = x * Align;
30 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 +
31 ((x2 % 32) / 16) * 32 + (y % 2) * 16 + (x2 % 16));
32 }
33 } 33 }
34 } 34 }
35 const std::array<u16, M>& operator[](std::size_t index) const { 35 return table;
36 return values[index]; 36}
37 }
38 std::array<std::array<u16, M>, N> values{};
39};
40 37
41constexpr u32 FAST_SWIZZLE_ALIGN = 16; 38constexpr SwizzleTable SWIZZLE_TABLE = MakeSwizzleTableConst();
42 39
43constexpr auto LEGACY_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_X, GOB_SIZE_X, GOB_SIZE_Z>(); 40template <bool TO_LINEAR>
44constexpr auto FAST_SWIZZLE_TABLE = SwizzleTable<GOB_SIZE_Y, 4, FAST_SWIZZLE_ALIGN>(); 41void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
42 u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) {
43 // The origin of the transformation can be configured here, leave it as zero as the current API
44 // doesn't expose it.
45 static constexpr u32 origin_x = 0;
46 static constexpr u32 origin_y = 0;
47 static constexpr u32 origin_z = 0;
45 48
46/** 49 // We can configure here a custom pitch
47 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 50 // As it's not exposed 'width * bpp' will be the expected pitch.
48 * Instead of going gob by gob, we map the coordinates inside a block and manage from 51 const u32 pitch = width * bytes_per_pixel;
49 * those. Block_Width is assumed to be 1. 52 const u32 stride = Common::AlignBits(width, stride_alignment) * bytes_per_pixel;
50 */
51void PreciseProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
52 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
53 const u32 y_end, const u32 z_end, const u32 tile_offset,
54 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
55 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
56 std::array<u8*, 2> data_ptrs;
57 u32 z_address = tile_offset;
58
59 for (u32 z = z_start; z < z_end; z++) {
60 u32 y_address = z_address;
61 u32 pixel_base = layer_z * z + y_start * stride_x;
62 for (u32 y = y_start; y < y_end; y++) {
63 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y];
64 for (u32 x = x_start; x < x_end; x++) {
65 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % GOB_SIZE_X]};
66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
70 }
71 pixel_base += stride_x;
72 if ((y + 1) % GOB_SIZE_Y == 0)
73 y_address += GOB_SIZE;
74 }
75 z_address += xy_block_size;
76 }
77}
78 53
79/** 54 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block. 55 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
81 * Instead of going gob by gob, we map the coordinates inside a block and manage from 56 const u32 slice_size =
82 * those. Block_Width is assumed to be 1. 57 Common::DivCeilLog2(height, block_height + GOB_SIZE_Y_SHIFT) * block_size;
83 */
84void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle,
85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
86 const u32 y_end, const u32 z_end, const u32 tile_offset,
87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
89 std::array<u8*, 2> data_ptrs;
90 u32 z_address = tile_offset;
91 const u32 x_startb = x_start * bytes_per_pixel;
92 const u32 x_endb = x_end * bytes_per_pixel;
93
94 for (u32 z = z_start; z < z_end; z++) {
95 u32 y_address = z_address;
96 u32 pixel_base = layer_z * z + y_start * stride_x;
97 for (u32 y = y_start; y < y_end; y++) {
98 const auto& table = FAST_SWIZZLE_TABLE[y % GOB_SIZE_Y];
99 for (u32 xb = x_startb; xb < x_endb; xb += FAST_SWIZZLE_ALIGN) {
100 const u32 swizzle_offset{y_address + table[(xb / FAST_SWIZZLE_ALIGN) % 4]};
101 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
102 const u32 pixel_index{out_x + pixel_base};
103 data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset;
104 data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index;
105 std::memcpy(data_ptrs[0], data_ptrs[1], FAST_SWIZZLE_ALIGN);
106 }
107 pixel_base += stride_x;
108 if ((y + 1) % GOB_SIZE_Y == 0)
109 y_address += GOB_SIZE;
110 }
111 z_address += xy_block_size;
112 }
113}
114 58
115/** 59 const u32 block_height_mask = (1U << block_height) - 1;
116 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue. 60 const u32 block_depth_mask = (1U << block_depth) - 1;
117 * The body of this function takes care of splitting the swizzled texture into blocks, 61 const u32 x_shift = GOB_SIZE_SHIFT + block_height + block_depth;
118 * and managing the extents of it. Once all the parameters of a single block are obtained, 62
119 * the function calls 'ProcessBlock' to process that particular Block. 63 for (u32 slice = 0; slice < depth; ++slice) {
120 * 64 const u32 z = slice + origin_z;
121 * Documentation for the memory layout and decoding can be found at: 65 const u32 offset_z = (z >> block_depth) * slice_size +
122 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces 66 ((z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height));
123 */ 67 for (u32 line = 0; line < height; ++line) {
124template <bool fast> 68 const u32 y = line + origin_y;
125void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool unswizzle, 69 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
126 const u32 width, const u32 height, const u32 depth, const u32 bytes_per_pixel, 70
127 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth, 71 const u32 block_y = y >> GOB_SIZE_Y_SHIFT;
128 const u32 width_spacing) { 72 const u32 offset_y = (block_y >> block_height) * block_size +
129 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; 73 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
130 const u32 stride_x = width * out_bytes_per_pixel; 74
131 const u32 layer_z = height * stride_x; 75 for (u32 column = 0; column < width; ++column) {
132 const u32 gob_elements_x = GOB_SIZE_X / bytes_per_pixel; 76 const u32 x = (column + origin_x) * bytes_per_pixel;
133 constexpr u32 gob_elements_y = GOB_SIZE_Y; 77 const u32 offset_x = (x >> GOB_SIZE_X_SHIFT) << x_shift;
134 constexpr u32 gob_elements_z = GOB_SIZE_Z; 78
135 const u32 block_x_elements = gob_elements_x; 79 const u32 base_swizzled_offset = offset_z + offset_y + offset_x;
136 const u32 block_y_elements = gob_elements_y * block_height; 80 const u32 swizzled_offset = base_swizzled_offset + table[x % GOB_SIZE_X];
137 const u32 block_z_elements = gob_elements_z * block_depth; 81
138 const u32 aligned_width = Common::AlignUp(width, gob_elements_x * width_spacing); 82 const u32 unswizzled_offset =
139 const u32 blocks_on_x = div_ceil(aligned_width, block_x_elements); 83 slice * pitch * height + line * pitch + column * bytes_per_pixel;
140 const u32 blocks_on_y = div_ceil(height, block_y_elements); 84
141 const u32 blocks_on_z = div_ceil(depth, block_z_elements); 85 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
142 const u32 xy_block_size = GOB_SIZE * block_height; 86 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
143 const u32 block_size = xy_block_size * block_depth; 87 std::memcpy(dst, src, bytes_per_pixel);
144 u32 tile_offset = 0;
145 for (u32 zb = 0; zb < blocks_on_z; zb++) {
146 const u32 z_start = zb * block_z_elements;
147 const u32 z_end = std::min(depth, z_start + block_z_elements);
148 for (u32 yb = 0; yb < blocks_on_y; yb++) {
149 const u32 y_start = yb * block_y_elements;
150 const u32 y_end = std::min(height, y_start + block_y_elements);
151 for (u32 xb = 0; xb < blocks_on_x; xb++) {
152 const u32 x_start = xb * block_x_elements;
153 const u32 x_end = std::min(width, x_start + block_x_elements);
154 if constexpr (fast) {
155 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
156 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
157 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
158 } else {
159 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
160 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
161 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
162 }
163 tile_offset += block_size;
164 } 88 }
165 } 89 }
166 } 90 }
167} 91}
168
169} // Anonymous namespace 92} // Anonymous namespace
170 93
171void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, 94SwizzleTable MakeSwizzleTable() {
172 u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, 95 return SWIZZLE_TABLE;
173 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) {
174 const u32 block_height_size{1U << block_height};
175 const u32 block_depth_size{1U << block_depth};
176 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % FAST_SWIZZLE_ALIGN == 0) {
177 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
178 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
179 block_depth_size, width_spacing);
180 } else {
181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
182 bytes_per_pixel, out_bytes_per_pixel, block_height_size,
183 block_depth_size, width_spacing);
184 }
185} 96}
186 97
187void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 98void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
188 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 99 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
189 u32 block_depth, u32 width_spacing) { 100 u32 stride_alignment) {
190 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 101 Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
191 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 102 stride_alignment);
192 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
193 width_spacing);
194} 103}
195 104
196std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 105void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
197 u32 width, u32 height, u32 depth, u32 block_height, 106 u32 height, u32 depth, u32 block_height, u32 block_depth,
198 u32 block_depth, u32 width_spacing) { 107 u32 stride_alignment) {
199 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 108 Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
200 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 109 stride_alignment);
201 width, height, depth, block_height, block_depth, width_spacing);
202 return unswizzled_data;
203} 110}
204 111
205void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 112void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
@@ -213,7 +120,7 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
213 const u32 gob_address_y = 120 const u32 gob_address_y =
214 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 121 (dst_y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
215 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 122 ((dst_y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
216 const auto& table = LEGACY_SWIZZLE_TABLE[dst_y % GOB_SIZE_Y]; 123 const auto& table = SWIZZLE_TABLE[dst_y % GOB_SIZE_Y];
217 for (u32 x = 0; x < subrect_width; ++x) { 124 for (u32 x = 0; x < subrect_width; ++x) {
218 const u32 dst_x = x + offset_x; 125 const u32 dst_x = x + offset_x;
219 const u32 gob_address = 126 const u32 gob_address =
@@ -235,11 +142,11 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width,
235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); 142 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 143
237 const u32 block_height_mask = (1U << block_height) - 1; 144 const u32 block_height_mask = (1U << block_height) - 1;
238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; 145 const u32 x_shift = GOB_SIZE_SHIFT + block_height;
239 146
240 for (u32 line = 0; line < line_count; ++line) { 147 for (u32 line = 0; line < line_count; ++line) {
241 const u32 src_y = line + origin_y; 148 const u32 src_y = line + origin_y;
242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; 149 const auto& table = SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 150
244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; 151 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 const u32 src_offset_y = (block_y >> block_height) * block_size + 152 const u32 src_offset_y = (block_y >> block_height) * block_size +
@@ -270,7 +177,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; 177 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
271 178
272 for (u32 line = 0; line < line_count; ++line) { 179 for (u32 line = 0; line < line_count; ++line) {
273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 180 const auto& table = SWIZZLE_TABLE[line % GOB_SIZE_Y];
274 const u32 block_y = line / GOB_SIZE_Y; 181 const u32 block_y = line / GOB_SIZE_Y;
275 const u32 dst_offset_y = 182 const u32 dst_offset_y =
276 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE; 183 (block_y >> block_height) * block_size + (block_y & block_height_mask) * GOB_SIZE;
@@ -293,7 +200,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32
293 const std::size_t gob_address_y = 200 const std::size_t gob_address_y =
294 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs + 201 (y / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height * image_width_in_gobs +
295 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 202 ((y % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE;
296 const auto& table = LEGACY_SWIZZLE_TABLE[y % GOB_SIZE_Y]; 203 const auto& table = SWIZZLE_TABLE[y % GOB_SIZE_Y];
297 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { 204 for (std::size_t x = dst_x; x < width && count < copy_size; ++x) {
298 const std::size_t gob_address = 205 const std::size_t gob_address =
299 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height; 206 gob_address_y + (x / GOB_SIZE_X) * GOB_SIZE * block_height;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 01e156bc8..d7cdc81e8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -4,7 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vector> 7#include <span>
8
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
@@ -15,28 +16,25 @@ constexpr u32 GOB_SIZE_Y = 8;
15constexpr u32 GOB_SIZE_Z = 1; 16constexpr u32 GOB_SIZE_Z = 1;
16constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z; 17constexpr u32 GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
17 18
18constexpr std::size_t GOB_SIZE_X_SHIFT = 6; 19constexpr u32 GOB_SIZE_X_SHIFT = 6;
19constexpr std::size_t GOB_SIZE_Y_SHIFT = 3; 20constexpr u32 GOB_SIZE_Y_SHIFT = 3;
20constexpr std::size_t GOB_SIZE_Z_SHIFT = 0; 21constexpr u32 GOB_SIZE_Z_SHIFT = 0;
21constexpr std::size_t GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT; 22constexpr u32 GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
22 23
23/// Unswizzles a swizzled texture without changing its format. 24using SwizzleTable = std::array<std::array<u32, GOB_SIZE_X>, GOB_SIZE_Y>;
24void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y, 25
25 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 26/// Returns a z-order swizzle table
26 u32 block_height = TICEntry::DefaultBlockHeight, 27SwizzleTable MakeSwizzleTable();
27 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 28
28 29/// Unswizzles a block linear texture into linear memory.
29/// Unswizzles a swizzled texture without changing its format. 30void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
30std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel, 31 u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
31 u32 width, u32 height, u32 depth, 32 u32 stride_alignment = 1);
32 u32 block_height = TICEntry::DefaultBlockHeight, 33
33 u32 block_depth = TICEntry::DefaultBlockHeight, 34/// Swizzles linear memory into a block linear texture.
34 u32 width_spacing = 0); 35void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
35 36 u32 height, u32 depth, u32 block_height, u32 block_depth,
36/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. 37 u32 stride_alignment = 1);
37void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
38 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
39 bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing);
40 38
41/// This function calculates the correct size of a texture depending if it's tiled or not. 39/// This function calculates the correct size of a texture depending if it's tiled or not.
42std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 40std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
index 4171e3ef2..ae5621a7d 100644
--- a/src/video_core/textures/texture.cpp
+++ b/src/video_core/textures/texture.cpp
@@ -5,9 +5,13 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7 7
8#include "common/cityhash.h"
8#include "core/settings.h" 9#include "core/settings.h"
9#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
10 11
12using Tegra::Texture::TICEntry;
13using Tegra::Texture::TSCEntry;
14
11namespace Tegra::Texture { 15namespace Tegra::Texture {
12 16
13namespace { 17namespace {
@@ -65,7 +69,7 @@ unsigned SettingsMinimumAnisotropy() noexcept {
65 69
66} // Anonymous namespace 70} // Anonymous namespace
67 71
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { 72std::array<float, 4> TSCEntry::BorderColor() const noexcept {
69 if (!srgb_conversion) { 73 if (!srgb_conversion) {
70 return border_color; 74 return border_color;
71 } 75 }
@@ -73,8 +77,16 @@ std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; 77 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74} 78}
75 79
76float TSCEntry::GetMaxAnisotropy() const noexcept { 80float TSCEntry::MaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); 81 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78} 82}
79 83
80} // namespace Tegra::Texture 84} // namespace Tegra::Texture
85
86size_t std::hash<TICEntry>::operator()(const TICEntry& tic) const noexcept {
87 return Common::CityHash64(reinterpret_cast<const char*>(&tic), sizeof tic);
88}
89
90size_t std::hash<TSCEntry>::operator()(const TSCEntry& tsc) const noexcept {
91 return Common::CityHash64(reinterpret_cast<const char*>(&tsc), sizeof tsc);
92}
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 0574fef12..c1d14335e 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -53,27 +53,27 @@ enum class TextureFormat : u32 {
53 BC4 = 0x27, 53 BC4 = 0x27,
54 BC5 = 0x28, 54 BC5 = 0x28,
55 S8D24 = 0x29, 55 S8D24 = 0x29,
56 X8Z24 = 0x2a, 56 X8D24 = 0x2a,
57 D24S8 = 0x2b, 57 D24S8 = 0x2b,
58 X4V4Z24__COV4R4V = 0x2c, 58 X4V4D24__COV4R4V = 0x2c,
59 X4V4Z24__COV8R8V = 0x2d, 59 X4V4D24__COV8R8V = 0x2d,
60 V8Z24__COV4R12V = 0x2e, 60 V8D24__COV4R12V = 0x2e,
61 D32 = 0x2f, 61 D32 = 0x2f,
62 D32S8 = 0x30, 62 D32S8 = 0x30,
63 X8Z24_X20V4S8__COV4R4V = 0x31, 63 X8D24_X20V4S8__COV4R4V = 0x31,
64 X8Z24_X20V4S8__COV8R8V = 0x32, 64 X8D24_X20V4S8__COV8R8V = 0x32,
65 ZF32_X20V4X8__COV4R4V = 0x33, 65 D32_X20V4X8__COV4R4V = 0x33,
66 ZF32_X20V4X8__COV8R8V = 0x34, 66 D32_X20V4X8__COV8R8V = 0x34,
67 ZF32_X20V4S8__COV4R4V = 0x35, 67 D32_X20V4S8__COV4R4V = 0x35,
68 ZF32_X20V4S8__COV8R8V = 0x36, 68 D32_X20V4S8__COV8R8V = 0x36,
69 X8Z24_X16V8S8__COV4R12V = 0x37, 69 X8D24_X16V8S8__COV4R12V = 0x37,
70 ZF32_X16V8X8__COV4R12V = 0x38, 70 D32_X16V8X8__COV4R12V = 0x38,
71 ZF32_X16V8S8__COV4R12V = 0x39, 71 D32_X16V8S8__COV4R12V = 0x39,
72 D16 = 0x3a, 72 D16 = 0x3a,
73 V8Z24__COV8R24V = 0x3b, 73 V8D24__COV8R24V = 0x3b,
74 X8Z24_X16V8S8__COV8R24V = 0x3c, 74 X8D24_X16V8S8__COV8R24V = 0x3c,
75 ZF32_X16V8X8__COV8R24V = 0x3d, 75 D32_X16V8X8__COV8R24V = 0x3d,
76 ZF32_X16V8S8__COV8R24V = 0x3e, 76 D32_X16V8S8__COV8R24V = 0x3e,
77 ASTC_2D_4X4 = 0x40, 77 ASTC_2D_4X4 = 0x40,
78 ASTC_2D_5X5 = 0x41, 78 ASTC_2D_5X5 = 0x41,
79 ASTC_2D_6X6 = 0x42, 79 ASTC_2D_6X6 = 0x42,
@@ -146,7 +146,7 @@ enum class MsaaMode : u32 {
146}; 146};
147 147
148union TextureHandle { 148union TextureHandle {
149 TextureHandle(u32 raw) : raw{raw} {} 149 /* implicit */ constexpr TextureHandle(u32 raw_) : raw{raw_} {}
150 150
151 u32 raw; 151 u32 raw;
152 BitField<0, 20, u32> tic_id; 152 BitField<0, 20, u32> tic_id;
@@ -155,124 +155,124 @@ union TextureHandle {
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157struct TICEntry { 157struct TICEntry {
158 static constexpr u32 DefaultBlockHeight = 16;
159 static constexpr u32 DefaultBlockDepth = 1;
160
161 union {
162 u32 raw;
163 BitField<0, 7, TextureFormat> format;
164 BitField<7, 3, ComponentType> r_type;
165 BitField<10, 3, ComponentType> g_type;
166 BitField<13, 3, ComponentType> b_type;
167 BitField<16, 3, ComponentType> a_type;
168
169 BitField<19, 3, SwizzleSource> x_source;
170 BitField<22, 3, SwizzleSource> y_source;
171 BitField<25, 3, SwizzleSource> z_source;
172 BitField<28, 3, SwizzleSource> w_source;
173 };
174 u32 address_low;
175 union { 158 union {
176 BitField<0, 16, u32> address_high; 159 struct {
177 BitField<21, 3, TICHeaderVersion> header_version; 160 union {
178 }; 161 BitField<0, 7, TextureFormat> format;
179 union { 162 BitField<7, 3, ComponentType> r_type;
180 BitField<0, 3, u32> block_width; 163 BitField<10, 3, ComponentType> g_type;
181 BitField<3, 3, u32> block_height; 164 BitField<13, 3, ComponentType> b_type;
182 BitField<6, 3, u32> block_depth; 165 BitField<16, 3, ComponentType> a_type;
166
167 BitField<19, 3, SwizzleSource> x_source;
168 BitField<22, 3, SwizzleSource> y_source;
169 BitField<25, 3, SwizzleSource> z_source;
170 BitField<28, 3, SwizzleSource> w_source;
171 };
172 u32 address_low;
173 union {
174 BitField<0, 16, u32> address_high;
175 BitField<16, 5, u32> layer_base_3_7;
176 BitField<21, 3, TICHeaderVersion> header_version;
177 BitField<24, 1, u32> load_store_hint;
178 BitField<25, 4, u32> view_coherency_hash;
179 BitField<29, 3, u32> layer_base_8_10;
180 };
181 union {
182 BitField<0, 3, u32> block_width;
183 BitField<3, 3, u32> block_height;
184 BitField<6, 3, u32> block_depth;
183 185
184 BitField<10, 3, u32> tile_width_spacing; 186 BitField<10, 3, u32> tile_width_spacing;
185 187
186 // High 16 bits of the pitch value 188 // High 16 bits of the pitch value
187 BitField<0, 16, u32> pitch_high; 189 BitField<0, 16, u32> pitch_high;
188 BitField<26, 1, u32> use_header_opt_control; 190 BitField<26, 1, u32> use_header_opt_control;
189 BitField<27, 1, u32> depth_texture; 191 BitField<27, 1, u32> depth_texture;
190 BitField<28, 4, u32> max_mip_level; 192 BitField<28, 4, u32> max_mip_level;
191 193
192 BitField<0, 16, u32> buffer_high_width_minus_one; 194 BitField<0, 16, u32> buffer_high_width_minus_one;
193 }; 195 };
194 union { 196 union {
195 BitField<0, 16, u32> width_minus_1; 197 BitField<0, 16, u32> width_minus_one;
196 BitField<22, 1, u32> srgb_conversion; 198 BitField<16, 3, u32> layer_base_0_2;
197 BitField<23, 4, TextureType> texture_type; 199 BitField<22, 1, u32> srgb_conversion;
198 BitField<29, 3, u32> border_size; 200 BitField<23, 4, TextureType> texture_type;
201 BitField<29, 3, u32> border_size;
199 202
200 BitField<0, 16, u32> buffer_low_width_minus_one; 203 BitField<0, 16, u32> buffer_low_width_minus_one;
201 }; 204 };
202 union { 205 union {
203 BitField<0, 16, u32> height_minus_1; 206 BitField<0, 16, u32> height_minus_1;
204 BitField<16, 14, u32> depth_minus_1; 207 BitField<16, 14, u32> depth_minus_1;
205 }; 208 BitField<30, 1, u32> is_sparse;
206 union { 209 BitField<31, 1, u32> normalized_coords;
207 BitField<6, 13, u32> mip_lod_bias; 210 };
208 BitField<27, 3, u32> max_anisotropy; 211 union {
212 BitField<6, 13, u32> mip_lod_bias;
213 BitField<27, 3, u32> max_anisotropy;
214 };
215 union {
216 BitField<0, 4, u32> res_min_mip_level;
217 BitField<4, 4, u32> res_max_mip_level;
218 BitField<8, 4, MsaaMode> msaa_mode;
219 BitField<12, 12, u32> min_lod_clamp;
220 };
221 };
222 std::array<u64, 4> raw;
209 }; 223 };
210 224
211 union { 225 constexpr bool operator==(const TICEntry& rhs) const noexcept {
212 BitField<0, 4, u32> res_min_mip_level; 226 return raw == rhs.raw;
213 BitField<4, 4, u32> res_max_mip_level; 227 }
214 BitField<8, 4, MsaaMode> msaa_mode;
215 BitField<12, 12, u32> min_lod_clamp;
216 };
217 228
218 GPUVAddr Address() const { 229 constexpr bool operator!=(const TICEntry& rhs) const noexcept {
230 return raw != rhs.raw;
231 }
232
233 constexpr GPUVAddr Address() const {
219 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); 234 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
220 } 235 }
221 236
222 u32 Pitch() const { 237 constexpr u32 Pitch() const {
223 ASSERT(header_version == TICHeaderVersion::Pitch || 238 ASSERT(header_version == TICHeaderVersion::Pitch ||
224 header_version == TICHeaderVersion::PitchColorKey); 239 header_version == TICHeaderVersion::PitchColorKey);
225 // The pitch value is 21 bits, and is 32B aligned. 240 // The pitch value is 21 bits, and is 32B aligned.
226 return pitch_high << 5; 241 return pitch_high << 5;
227 } 242 }
228 243
229 u32 Width() const { 244 constexpr u32 Width() const {
230 if (header_version != TICHeaderVersion::OneDBuffer) { 245 if (header_version != TICHeaderVersion::OneDBuffer) {
231 return width_minus_1 + 1; 246 return width_minus_one + 1;
232 } 247 }
233 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1; 248 return (buffer_high_width_minus_one << 16 | buffer_low_width_minus_one) + 1;
234 } 249 }
235 250
236 u32 Height() const { 251 constexpr u32 Height() const {
237 return height_minus_1 + 1; 252 return height_minus_1 + 1;
238 } 253 }
239 254
240 u32 Depth() const { 255 constexpr u32 Depth() const {
241 return depth_minus_1 + 1; 256 return depth_minus_1 + 1;
242 } 257 }
243 258
244 u32 BlockWidth() const { 259 constexpr u32 BaseLayer() const {
245 ASSERT(IsTiled()); 260 return layer_base_0_2 | layer_base_3_7 << 3 | layer_base_8_10 << 8;
246 return block_width;
247 }
248
249 u32 BlockHeight() const {
250 ASSERT(IsTiled());
251 return block_height;
252 }
253
254 u32 BlockDepth() const {
255 ASSERT(IsTiled());
256 return block_depth;
257 } 261 }
258 262
259 bool IsTiled() const { 263 constexpr bool IsBlockLinear() const {
260 return header_version == TICHeaderVersion::BlockLinear || 264 return header_version == TICHeaderVersion::BlockLinear ||
261 header_version == TICHeaderVersion::BlockLinearColorKey; 265 header_version == TICHeaderVersion::BlockLinearColorKey;
262 } 266 }
263 267
264 bool IsLineal() const { 268 constexpr bool IsPitchLinear() const {
265 return header_version == TICHeaderVersion::Pitch || 269 return header_version == TICHeaderVersion::Pitch ||
266 header_version == TICHeaderVersion::PitchColorKey; 270 header_version == TICHeaderVersion::PitchColorKey;
267 } 271 }
268 272
269 bool IsBuffer() const { 273 constexpr bool IsBuffer() const {
270 return header_version == TICHeaderVersion::OneDBuffer; 274 return header_version == TICHeaderVersion::OneDBuffer;
271 } 275 }
272
273 bool IsSrgbConversionEnabled() const {
274 return srgb_conversion != 0;
275 }
276}; 276};
277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); 277static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size");
278 278
@@ -309,6 +309,12 @@ enum class TextureMipmapFilter : u32 {
309 Linear = 3, 309 Linear = 3,
310}; 310};
311 311
312enum class SamplerReduction : u32 {
313 WeightedAverage = 0,
314 Min = 1,
315 Max = 2,
316};
317
312enum class Anisotropy { 318enum class Anisotropy {
313 Default, 319 Default,
314 Filter2x, 320 Filter2x,
@@ -333,8 +339,12 @@ struct TSCEntry {
333 BitField<0, 2, TextureFilter> mag_filter; 339 BitField<0, 2, TextureFilter> mag_filter;
334 BitField<4, 2, TextureFilter> min_filter; 340 BitField<4, 2, TextureFilter> min_filter;
335 BitField<6, 2, TextureMipmapFilter> mipmap_filter; 341 BitField<6, 2, TextureMipmapFilter> mipmap_filter;
342 BitField<8, 1, u32> cubemap_anisotropy;
336 BitField<9, 1, u32> cubemap_interface_filtering; 343 BitField<9, 1, u32> cubemap_interface_filtering;
344 BitField<10, 2, SamplerReduction> reduction_filter;
337 BitField<12, 13, u32> mip_lod_bias; 345 BitField<12, 13, u32> mip_lod_bias;
346 BitField<25, 1, u32> float_coord_normalization;
347 BitField<26, 5, u32> trilin_opt;
338 }; 348 };
339 union { 349 union {
340 BitField<0, 12, u32> min_lod_clamp; 350 BitField<0, 12, u32> min_lod_clamp;
@@ -347,32 +357,45 @@ struct TSCEntry {
347 }; 357 };
348 std::array<f32, 4> border_color; 358 std::array<f32, 4> border_color;
349 }; 359 };
350 std::array<u8, 0x20> raw; 360 std::array<u64, 4> raw;
351 }; 361 };
352 362
353 std::array<float, 4> GetBorderColor() const noexcept; 363 constexpr bool operator==(const TSCEntry& rhs) const noexcept {
364 return raw == rhs.raw;
365 }
366
367 constexpr bool operator!=(const TSCEntry& rhs) const noexcept {
368 return raw != rhs.raw;
369 }
370
371 std::array<float, 4> BorderColor() const noexcept;
354 372
355 float GetMaxAnisotropy() const noexcept; 373 float MaxAnisotropy() const noexcept;
356 374
357 float GetMinLod() const { 375 float MinLod() const {
358 return static_cast<float>(min_lod_clamp) / 256.0f; 376 return static_cast<float>(min_lod_clamp) / 256.0f;
359 } 377 }
360 378
361 float GetMaxLod() const { 379 float MaxLod() const {
362 return static_cast<float>(max_lod_clamp) / 256.0f; 380 return static_cast<float>(max_lod_clamp) / 256.0f;
363 } 381 }
364 382
365 float GetLodBias() const { 383 float LodBias() const {
366 // Sign extend the 13-bit value. 384 // Sign extend the 13-bit value.
367 constexpr u32 mask = 1U << (13 - 1); 385 static constexpr u32 mask = 1U << (13 - 1);
368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 386 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
369 } 387 }
370}; 388};
371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 389static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
372 390
373struct FullTextureInfo { 391} // namespace Tegra::Texture
374 TICEntry tic; 392
375 TSCEntry tsc; 393template <>
394struct std::hash<Tegra::Texture::TICEntry> {
395 size_t operator()(const Tegra::Texture::TICEntry& tic) const noexcept;
376}; 396};
377 397
378} // namespace Tegra::Texture 398template <>
399struct std::hash<Tegra::Texture::TSCEntry> {
400 size_t operator()(const Tegra::Texture::TSCEntry& tsc) const noexcept;
401};