summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar Levi2021-01-10 22:09:56 -0700
committerGravatar Levi2021-01-10 22:09:56 -0700
commit7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch)
tree5056f9406dec188439cb0deb87603498243a9412 /src/video_core/texture_cache
parentMore forgetting... duh (diff)
parentMerge pull request #5229 from Morph1984/fullscreen-opt (diff)
downloadyuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz
yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz
yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp218
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp298
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp444
-rw-r--r--src/video_core/texture_cache/surface_params.h294
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2405
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1233
-rw-r--r--src/video_core/texture_cache/util.h109
31 files changed, 4644 insertions, 2849 deletions
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..a4fc1184b
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/alignment.h"
9#include "common/common_types.h"
10#include "common/div_ceil.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/accelerated_swizzle.h"
13#include "video_core/texture_cache/util.h"
14#include "video_core/textures/decoders.h"
15
16namespace VideoCommon::Accelerated {
17
18using Tegra::Texture::GOB_SIZE_SHIFT;
19using Tegra::Texture::GOB_SIZE_X;
20using Tegra::Texture::GOB_SIZE_X_SHIFT;
21using Tegra::Texture::GOB_SIZE_Y_SHIFT;
22using VideoCore::Surface::BytesPerBlock;
23
24BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
25 const ImageInfo& info) {
26 const Extent3D block = swizzle.block;
27 const Extent3D num_tiles = swizzle.num_tiles;
28 const u32 bytes_per_block = BytesPerBlock(info.format);
29 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
30 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
31 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
32 return BlockLinearSwizzle2DParams{
33 .origin{0, 0, 0},
34 .destination{0, 0, 0},
35 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
36 .layer_stride = info.layer_stride,
37 .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
38 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
39 .block_height = block.height,
40 .block_height_mask = (1U << block.height) - 1,
41 };
42}
43
44BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
45 const ImageInfo& info) {
46 const Extent3D block = swizzle.block;
47 const Extent3D num_tiles = swizzle.num_tiles;
48 const u32 bytes_per_block = BytesPerBlock(info.format);
49 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
50 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
51
52 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
53 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
54 const u32 slice_size =
55 Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
56 return BlockLinearSwizzle3DParams{
57 .origin{0, 0, 0},
58 .destination{0, 0, 0},
59 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
60 .slice_size = slice_size,
61 .block_size = block_size,
62 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
63 .block_height = block.height,
64 .block_height_mask = (1U << block.height) - 1,
65 .block_depth = block.depth,
66 .block_depth_mask = (1U << block.depth) - 1,
67 };
68}
69
70} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/types.h"
12
13namespace VideoCommon::Accelerated {
14
15struct BlockLinearSwizzle2DParams {
16 std::array<u32, 3> origin;
17 std::array<s32, 3> destination;
18 u32 bytes_per_block_log2;
19 u32 layer_stride;
20 u32 block_size;
21 u32 x_shift;
22 u32 block_height;
23 u32 block_height_mask;
24};
25
26struct BlockLinearSwizzle3DParams {
27 std::array<u32, 3> origin;
28 std::array<s32, 3> destination;
29 u32 bytes_per_block_log2;
30 u32 slice_size;
31 u32 block_size;
32 u32 x_shift;
33 u32 block_height;
34 u32 block_height_mask;
35 u32 block_depth;
36 u32 block_depth_mask;
37};
38
39[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
40 const SwizzleParameters& swizzle, const ImageInfo& info);
41
42[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
43 const SwizzleParameters& swizzle, const ImageInfo& info);
44
45} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 9c21a0649..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
13 u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
14 u32 depth)
15 : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x},
16 dest_y{dest_y}, dest_z{dest_z}, source_level{source_level},
17 dest_level{dest_level}, width{width}, height{height}, depth{depth} {}
18
19 constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level},
21 dest_level{level}, width{width}, height{height}, depth{depth} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <span>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/texture_cache/decode_bc4.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
18 const u32 code_offset = 16 + 3 * (4 * y + x);
19 const u32 code = (bits >> code_offset) & 7;
20 const u32 red0 = (bits >> 0) & 0xff;
21 const u32 red1 = (bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
65 UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
66 UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
67 static constexpr u32 BLOCK_SIZE = 4;
68 size_t input_offset = 0;
69 for (u32 slice = 0; slice < extent.depth; ++slice) {
70 for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
71 for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
72 u64 bits;
73 std::memcpy(&bits, &input[input_offset], sizeof(bits));
74 input_offset += sizeof(bits);
75
76 for (u32 y = 0; y < BLOCK_SIZE; ++y) {
77 for (u32 x = 0; x < BLOCK_SIZE; ++x) {
78 const u32 linear_z = slice;
79 const u32 linear_y = block_y * BLOCK_SIZE + y;
80 const u32 linear_x = block_x * BLOCK_SIZE + x;
81 const u32 offset_z = linear_z * extent.width * extent.height;
82 const u32 offset_y = linear_y * extent.width;
83 const u32 offset_x = linear_x;
84 const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
85 const u32 color = DecompressBlock(bits, x, y);
86 output[output_offset + 0] = static_cast<u8>(color);
87 output[output_offset + 1] = 0;
88 output[output_offset + 2] = 0;
89 output[output_offset + 3] = 0xff;
90 }
91 }
92 }
93 }
94 }
95}
96
97} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
15
16} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/div_ceil.h"
12#include "common/logging/log.h"
13#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h"
15
16namespace VideoCommon {
17
18template <typename Descriptor>
19class DescriptorTable {
20public:
21 explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
22
23 [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
24 [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
25 return false;
26 }
27 Refresh(gpu_addr, limit);
28 return true;
29 }
30
31 void Invalidate() noexcept {
32 std::ranges::fill(read_descriptors, 0);
33 }
34
35 [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
36 DEBUG_ASSERT(index <= current_limit);
37 const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
38 std::pair<Descriptor, bool> result;
39 gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
40 if (IsDescriptorRead(index)) {
41 result.second = result.first != descriptors[index];
42 } else {
43 MarkDescriptorAsRead(index);
44 result.second = true;
45 }
46 if (result.second) {
47 descriptors[index] = result.first;
48 }
49 return result;
50 }
51
52 [[nodiscard]] u32 Limit() const noexcept {
53 return current_limit;
54 }
55
56private:
57 void Refresh(GPUVAddr gpu_addr, u32 limit) {
58 current_gpu_addr = gpu_addr;
59 current_limit = limit;
60
61 const size_t num_descriptors = static_cast<size_t>(limit) + 1;
62 read_descriptors.clear();
63 read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
64 descriptors.resize(num_descriptors);
65 }
66
67 void MarkDescriptorAsRead(u32 index) noexcept {
68 read_descriptors[index / 64] |= 1ULL << (index % 64);
69 }
70
71 [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
72 return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
73 }
74
75 Tegra::MemoryManager& gpu_memory;
76 GPUVAddr current_gpu_addr{};
77 u32 current_limit{};
78 std::vector<u64> read_descriptors;
79 std::vector<Descriptor> descriptors;
80};
81
82} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7d5a75648..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include "common/common_types.h" 5#include "common/common_types.h"
7#include "common/logging/log.h" 6#include "common/logging/log.h"
8#include "video_core/texture_cache/format_lookup_table.h" 7#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
20constexpr auto SINT = ComponentType::SINT; 19constexpr auto SINT = ComponentType::SINT;
21constexpr auto UINT = ComponentType::UINT; 20constexpr auto UINT = ComponentType::UINT;
22constexpr auto FLOAT = ComponentType::FLOAT; 21constexpr auto FLOAT = ComponentType::FLOAT;
23constexpr bool C = false; // Normal color 22constexpr bool LINEAR = false;
24constexpr bool S = true; // Srgb 23constexpr bool SRGB = true;
25 24
26struct Table { 25constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
27 constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component, 26 ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
28 ComponentType green_component, ComponentType blue_component, 27 u32 hash = is_srgb ? 1 : 0;
29 ComponentType alpha_component, PixelFormat pixel_format) 28 hash |= static_cast<u32>(red_component) << 1;
30 : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component}, 29 hash |= static_cast<u32>(green_component) << 4;
31 green_component{green_component}, blue_component{blue_component}, 30 hash |= static_cast<u32>(blue_component) << 7;
32 alpha_component{alpha_component}, is_srgb{is_srgb} {} 31 hash |= static_cast<u32>(alpha_component) << 10;
33 32 hash |= static_cast<u32>(format) << 13;
34 TextureFormat texture_format; 33 return hash;
35 PixelFormat pixel_format; 34}
36 ComponentType red_component;
37 ComponentType green_component;
38 ComponentType blue_component;
39 ComponentType alpha_component;
40 bool is_srgb;
41};
42constexpr std::array<Table, 86> DefinitionTable = {{
43 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
44 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
45 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
46 {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
47 {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
48
49 {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
50
51 {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
52 {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
53
54 {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
55
56 {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
57
58 {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
59 {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
60 {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
61 {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
62
63 {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
64 {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
65 {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
66 {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
67
68 {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
69 {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
70 {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
71 {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
72 {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
73
74 {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
75 {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
76 {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
77 {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
78 {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
79
80 {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
81 {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
82 {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
83 {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
84 {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
85
86 {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
87
88 {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
89 {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
90 {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
91
92 {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
93
94 {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
95 {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
96 {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
97
98 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
99 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
100 {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
101
102 {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
103
104 {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
105 {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
106 {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
107 {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
108 {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
109
110 {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
111 {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
112
113 {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
114 {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
115
116 {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
117 {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
118
119 {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
120 {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
121
122 {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
123 {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
124
125 {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
126 {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
127
128 {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
129 {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
130
131 {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
132 {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
133
134 {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
135 {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
136
137 {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
138 {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
139
140 {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
141 {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
142
143 {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
144 {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
145
146 {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
147 {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
148
149 {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
150 {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
151
152 {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
153 {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
154
155 {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
156 {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
157
158 {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
159 {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
160 35
161 {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, 36constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
162 {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, 37 return Hash(format, component, component, component, component, is_srgb);
163}}; 38}
164 39
165} // Anonymous namespace 40} // Anonymous namespace
166 41
167FormatLookupTable::FormatLookupTable() { 42PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
168 table.fill(static_cast<u8>(PixelFormat::Invalid)); 43 ComponentType blue, ComponentType alpha,
169 44 bool is_srgb) noexcept {
170 for (const auto& entry : DefinitionTable) { 45 switch (Hash(format, red, green, blue, alpha, is_srgb)) {
171 table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, 46 case Hash(TextureFormat::A8R8G8B8, UNORM):
172 entry.green_component, entry.blue_component, entry.alpha_component)] = 47 return PixelFormat::A8B8G8R8_UNORM;
173 static_cast<u8>(entry.pixel_format); 48 case Hash(TextureFormat::A8R8G8B8, SNORM):
174 } 49 return PixelFormat::A8B8G8R8_SNORM;
175} 50 case Hash(TextureFormat::A8R8G8B8, UINT):
176 51 return PixelFormat::A8B8G8R8_UINT;
177PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, 52 case Hash(TextureFormat::A8R8G8B8, SINT):
178 ComponentType red_component, 53 return PixelFormat::A8B8G8R8_SINT;
179 ComponentType green_component, 54 case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
180 ComponentType blue_component, 55 return PixelFormat::A8B8G8R8_SRGB;
181 ComponentType alpha_component) const noexcept { 56 case Hash(TextureFormat::B5G6R5, UNORM):
182 const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( 57 return PixelFormat::B5G6R5_UNORM;
183 format, is_srgb, red_component, green_component, blue_component, alpha_component)]); 58 case Hash(TextureFormat::A2B10G10R10, UNORM):
184 // [[likely]] 59 return PixelFormat::A2B10G10R10_UNORM;
185 if (pixel_format != PixelFormat::Invalid) { 60 case Hash(TextureFormat::A2B10G10R10, UINT):
186 return pixel_format; 61 return PixelFormat::A2B10G10R10_UINT;
62 case Hash(TextureFormat::A1B5G5R5, UNORM):
63 return PixelFormat::A1B5G5R5_UNORM;
64 case Hash(TextureFormat::A4B4G4R4, UNORM):
65 return PixelFormat::A4B4G4R4_UNORM;
66 case Hash(TextureFormat::R8, UNORM):
67 return PixelFormat::R8_UNORM;
68 case Hash(TextureFormat::R8, SNORM):
69 return PixelFormat::R8_SNORM;
70 case Hash(TextureFormat::R8, UINT):
71 return PixelFormat::R8_UINT;
72 case Hash(TextureFormat::R8, SINT):
73 return PixelFormat::R8_SINT;
74 case Hash(TextureFormat::R8G8, UNORM):
75 return PixelFormat::R8G8_UNORM;
76 case Hash(TextureFormat::R8G8, SNORM):
77 return PixelFormat::R8G8_SNORM;
78 case Hash(TextureFormat::R8G8, UINT):
79 return PixelFormat::R8G8_UINT;
80 case Hash(TextureFormat::R8G8, SINT):
81 return PixelFormat::R8G8_SINT;
82 case Hash(TextureFormat::R16G16B16A16, FLOAT):
83 return PixelFormat::R16G16B16A16_FLOAT;
84 case Hash(TextureFormat::R16G16B16A16, UNORM):
85 return PixelFormat::R16G16B16A16_UNORM;
86 case Hash(TextureFormat::R16G16B16A16, SNORM):
87 return PixelFormat::R16G16B16A16_SNORM;
88 case Hash(TextureFormat::R16G16B16A16, UINT):
89 return PixelFormat::R16G16B16A16_UINT;
90 case Hash(TextureFormat::R16G16B16A16, SINT):
91 return PixelFormat::R16G16B16A16_SINT;
92 case Hash(TextureFormat::R16G16, FLOAT):
93 return PixelFormat::R16G16_FLOAT;
94 case Hash(TextureFormat::R16G16, UNORM):
95 return PixelFormat::R16G16_UNORM;
96 case Hash(TextureFormat::R16G16, SNORM):
97 return PixelFormat::R16G16_SNORM;
98 case Hash(TextureFormat::R16G16, UINT):
99 return PixelFormat::R16G16_UINT;
100 case Hash(TextureFormat::R16G16, SINT):
101 return PixelFormat::R16G16_SINT;
102 case Hash(TextureFormat::R16, FLOAT):
103 return PixelFormat::R16_FLOAT;
104 case Hash(TextureFormat::R16, UNORM):
105 return PixelFormat::R16_UNORM;
106 case Hash(TextureFormat::R16, SNORM):
107 return PixelFormat::R16_SNORM;
108 case Hash(TextureFormat::R16, UINT):
109 return PixelFormat::R16_UINT;
110 case Hash(TextureFormat::R16, SINT):
111 return PixelFormat::R16_SINT;
112 case Hash(TextureFormat::B10G11R11, FLOAT):
113 return PixelFormat::B10G11R11_FLOAT;
114 case Hash(TextureFormat::R32G32B32A32, FLOAT):
115 return PixelFormat::R32G32B32A32_FLOAT;
116 case Hash(TextureFormat::R32G32B32A32, UINT):
117 return PixelFormat::R32G32B32A32_UINT;
118 case Hash(TextureFormat::R32G32B32A32, SINT):
119 return PixelFormat::R32G32B32A32_SINT;
120 case Hash(TextureFormat::R32G32B32, FLOAT):
121 return PixelFormat::R32G32B32_FLOAT;
122 case Hash(TextureFormat::R32G32, FLOAT):
123 return PixelFormat::R32G32_FLOAT;
124 case Hash(TextureFormat::R32G32, UINT):
125 return PixelFormat::R32G32_UINT;
126 case Hash(TextureFormat::R32G32, SINT):
127 return PixelFormat::R32G32_SINT;
128 case Hash(TextureFormat::R32, FLOAT):
129 return PixelFormat::R32_FLOAT;
130 case Hash(TextureFormat::R32, UINT):
131 return PixelFormat::R32_UINT;
132 case Hash(TextureFormat::R32, SINT):
133 return PixelFormat::R32_SINT;
134 case Hash(TextureFormat::E5B9G9R9, FLOAT):
135 return PixelFormat::E5B9G9R9_FLOAT;
136 case Hash(TextureFormat::D32, FLOAT):
137 return PixelFormat::D32_FLOAT;
138 case Hash(TextureFormat::D16, UNORM):
139 return PixelFormat::D16_UNORM;
140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
141 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM;
144 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
145 return PixelFormat::D32_FLOAT_S8_UINT;
146 case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
147 return PixelFormat::BC1_RGBA_UNORM;
148 case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
149 return PixelFormat::BC1_RGBA_SRGB;
150 case Hash(TextureFormat::BC2, UNORM, LINEAR):
151 return PixelFormat::BC2_UNORM;
152 case Hash(TextureFormat::BC2, UNORM, SRGB):
153 return PixelFormat::BC2_SRGB;
154 case Hash(TextureFormat::BC3, UNORM, LINEAR):
155 return PixelFormat::BC3_UNORM;
156 case Hash(TextureFormat::BC3, UNORM, SRGB):
157 return PixelFormat::BC3_SRGB;
158 case Hash(TextureFormat::BC4, UNORM):
159 return PixelFormat::BC4_UNORM;
160 case Hash(TextureFormat::BC4, SNORM):
161 return PixelFormat::BC4_SNORM;
162 case Hash(TextureFormat::BC5, UNORM):
163 return PixelFormat::BC5_UNORM;
164 case Hash(TextureFormat::BC5, SNORM):
165 return PixelFormat::BC5_SNORM;
166 case Hash(TextureFormat::BC7, UNORM, LINEAR):
167 return PixelFormat::BC7_UNORM;
168 case Hash(TextureFormat::BC7, UNORM, SRGB):
169 return PixelFormat::BC7_SRGB;
170 case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
171 return PixelFormat::BC6H_SFLOAT;
172 case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
173 return PixelFormat::BC6H_UFLOAT;
174 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
175 return PixelFormat::ASTC_2D_4X4_UNORM;
176 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
177 return PixelFormat::ASTC_2D_4X4_SRGB;
178 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
179 return PixelFormat::ASTC_2D_5X4_UNORM;
180 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
181 return PixelFormat::ASTC_2D_5X4_SRGB;
182 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
183 return PixelFormat::ASTC_2D_5X5_UNORM;
184 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
185 return PixelFormat::ASTC_2D_5X5_SRGB;
186 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
187 return PixelFormat::ASTC_2D_8X8_UNORM;
188 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
189 return PixelFormat::ASTC_2D_8X8_SRGB;
190 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
191 return PixelFormat::ASTC_2D_8X5_UNORM;
192 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
193 return PixelFormat::ASTC_2D_8X5_SRGB;
194 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
195 return PixelFormat::ASTC_2D_10X8_UNORM;
196 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
197 return PixelFormat::ASTC_2D_10X8_SRGB;
198 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
199 return PixelFormat::ASTC_2D_6X6_UNORM;
200 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
201 return PixelFormat::ASTC_2D_6X6_SRGB;
202 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
203 return PixelFormat::ASTC_2D_10X10_UNORM;
204 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
205 return PixelFormat::ASTC_2D_10X10_SRGB;
206 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
207 return PixelFormat::ASTC_2D_12X12_UNORM;
208 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
209 return PixelFormat::ASTC_2D_12X12_SRGB;
210 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
211 return PixelFormat::ASTC_2D_8X6_UNORM;
212 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
213 return PixelFormat::ASTC_2D_8X6_SRGB;
214 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
215 return PixelFormat::ASTC_2D_6X5_UNORM;
216 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
217 return PixelFormat::ASTC_2D_6X5_SRGB;
187 } 218 }
188 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", 219 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
189 static_cast<int>(format), is_srgb, static_cast<int>(red_component), 220 static_cast<int>(format), is_srgb, static_cast<int>(red),
190 static_cast<int>(green_component), static_cast<int>(blue_component), 221 static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
191 static_cast<int>(alpha_component));
192 return PixelFormat::A8B8G8R8_UNORM; 222 return PixelFormat::A8B8G8R8_UNORM;
193} 223}
194 224
195void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
196 ComponentType green_component, ComponentType blue_component,
197 ComponentType alpha_component, PixelFormat pixel_format) {}
198
199std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
200 ComponentType red_component,
201 ComponentType green_component,
202 ComponentType blue_component,
203 ComponentType alpha_component) noexcept {
204 const auto format_index = static_cast<std::size_t>(format);
205 const auto red_index = static_cast<std::size_t>(red_component);
206 const auto green_index = static_cast<std::size_t>(green_component);
207 const auto blue_index = static_cast<std::size_t>(blue_component);
208 const auto alpha_index = static_cast<std::size_t>(alpha_component);
209 const std::size_t srgb_index = is_srgb ? 1 : 0;
210
211 return format_index * PerFormat +
212 srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
213 alpha_index * PerComponent * PerComponent * PerComponent +
214 blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
215}
216
217} // namespace VideoCommon 225} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <limits>
9#include "video_core/surface.h" 7#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 8#include "video_core/textures/texture.h"
11 9
12namespace VideoCommon { 10namespace VideoCommon {
13 11
14class FormatLookupTable { 12VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
15public: 13 Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
16 explicit FormatLookupTable(); 14 Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
17 15 Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
18 VideoCore::Surface::PixelFormat GetPixelFormat(
19 Tegra::Texture::TextureFormat format, bool is_srgb,
20 Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
21 Tegra::Texture::ComponentType blue_component,
22 Tegra::Texture::ComponentType alpha_component) const noexcept;
23
24private:
25 static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
26
27 static constexpr std::size_t NumTextureFormats = 128;
28
29 static constexpr std::size_t PerComponent = 8;
30 static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
31 static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
32 static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
33 static constexpr std::size_t PerFormat = PerComponents4 * 2;
34
35 static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
36 Tegra::Texture::ComponentType red_component,
37 Tegra::Texture::ComponentType green_component,
38 Tegra::Texture::ComponentType blue_component,
39 Tegra::Texture::ComponentType alpha_component) noexcept;
40
41 void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
42 Tegra::Texture::ComponentType red_component,
43 Tegra::Texture::ComponentType green_component,
44 Tegra::Texture::ComponentType blue_component,
45 Tegra::Texture::ComponentType alpha_component,
46 VideoCore::Surface::PixelFormat pixel_format);
47
48 std::array<u8, NumTextureFormats * PerFormat> table;
49};
50 16
51} // namespace VideoCommon 17} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7
8#include "video_core/texture_cache/formatter.h"
9#include "video_core/texture_cache/image_base.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/render_targets.h"
13
14namespace VideoCommon {
15
16std::string Name(const ImageBase& image) {
17 const GPUVAddr gpu_addr = image.gpu_addr;
18 const ImageInfo& info = image.info;
19 const u32 width = info.size.width;
20 const u32 height = info.size.height;
21 const u32 depth = info.size.depth;
22 const u32 num_layers = image.info.resources.layers;
23 const u32 num_levels = image.info.resources.levels;
24 std::string resource;
25 if (num_layers > 1) {
26 resource += fmt::format(":L{}", num_layers);
27 }
28 if (num_levels > 1) {
29 resource += fmt::format(":M{}", num_levels);
30 }
31 switch (image.info.type) {
32 case ImageType::e1D:
33 return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
34 case ImageType::e2D:
35 return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
36 case ImageType::e3D:
37 return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
38 case ImageType::Linear:
39 return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
40 case ImageType::Buffer:
41 return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
42 }
43 return "Invalid";
44}
45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth;
50 const u32 num_levels = image_view.range.extent.levels;
51 const u32 num_layers = image_view.range.extent.layers;
52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) {
55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D:
58 return fmt::format("ImageView 2D {}x{}{}", width, height, level);
59 case ImageViewType::Cube:
60 return fmt::format("ImageView Cube {}x{}{}", width, height, level);
61 case ImageViewType::e3D:
62 return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
63 case ImageViewType::e1DArray:
64 return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
65 case ImageViewType::e2DArray:
66 return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
67 case ImageViewType::CubeArray:
68 return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
69 case ImageViewType::Rect:
70 return fmt::format("ImageView Rect {}x{}{}", width, height, level);
71 case ImageViewType::Buffer:
72 return fmt::format("BufferView {}", width);
73 }
74 return "Invalid";
75}
76
77std::string Name(const RenderTargets& render_targets) {
78 std::string_view debug_prefix;
79 const auto num_color = std::ranges::count_if(
80 render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
81 if (render_targets.depth_buffer_id) {
82 debug_prefix = num_color > 0 ? "R" : "Z";
83 } else {
84 debug_prefix = num_color > 0 ? "C" : "X";
85 }
86 const Extent2D size = render_targets.size;
87 if (num_color > 0) {
88 return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
89 size.height);
90 } else {
91 return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
92 }
93}
94
95} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "video_core/surface.h"
12#include "video_core/texture_cache/types.h"
13
14template <>
15struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
16 template <typename FormatContext>
17 auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
18 using VideoCore::Surface::PixelFormat;
19 const string_view name = [format] {
20 switch (format) {
21 case PixelFormat::A8B8G8R8_UNORM:
22 return "A8B8G8R8_UNORM";
23 case PixelFormat::A8B8G8R8_SNORM:
24 return "A8B8G8R8_SNORM";
25 case PixelFormat::A8B8G8R8_SINT:
26 return "A8B8G8R8_SINT";
27 case PixelFormat::A8B8G8R8_UINT:
28 return "A8B8G8R8_UINT";
29 case PixelFormat::R5G6B5_UNORM:
30 return "R5G6B5_UNORM";
31 case PixelFormat::B5G6R5_UNORM:
32 return "B5G6R5_UNORM";
33 case PixelFormat::A1R5G5B5_UNORM:
34 return "A1R5G5B5_UNORM";
35 case PixelFormat::A2B10G10R10_UNORM:
36 return "A2B10G10R10_UNORM";
37 case PixelFormat::A2B10G10R10_UINT:
38 return "A2B10G10R10_UINT";
39 case PixelFormat::A1B5G5R5_UNORM:
40 return "A1B5G5R5_UNORM";
41 case PixelFormat::R8_UNORM:
42 return "R8_UNORM";
43 case PixelFormat::R8_SNORM:
44 return "R8_SNORM";
45 case PixelFormat::R8_SINT:
46 return "R8_SINT";
47 case PixelFormat::R8_UINT:
48 return "R8_UINT";
49 case PixelFormat::R16G16B16A16_FLOAT:
50 return "R16G16B16A16_FLOAT";
51 case PixelFormat::R16G16B16A16_UNORM:
52 return "R16G16B16A16_UNORM";
53 case PixelFormat::R16G16B16A16_SNORM:
54 return "R16G16B16A16_SNORM";
55 case PixelFormat::R16G16B16A16_SINT:
56 return "R16G16B16A16_SINT";
57 case PixelFormat::R16G16B16A16_UINT:
58 return "R16G16B16A16_UINT";
59 case PixelFormat::B10G11R11_FLOAT:
60 return "B10G11R11_FLOAT";
61 case PixelFormat::R32G32B32A32_UINT:
62 return "R32G32B32A32_UINT";
63 case PixelFormat::BC1_RGBA_UNORM:
64 return "BC1_RGBA_UNORM";
65 case PixelFormat::BC2_UNORM:
66 return "BC2_UNORM";
67 case PixelFormat::BC3_UNORM:
68 return "BC3_UNORM";
69 case PixelFormat::BC4_UNORM:
70 return "BC4_UNORM";
71 case PixelFormat::BC4_SNORM:
72 return "BC4_SNORM";
73 case PixelFormat::BC5_UNORM:
74 return "BC5_UNORM";
75 case PixelFormat::BC5_SNORM:
76 return "BC5_SNORM";
77 case PixelFormat::BC7_UNORM:
78 return "BC7_UNORM";
79 case PixelFormat::BC6H_UFLOAT:
80 return "BC6H_UFLOAT";
81 case PixelFormat::BC6H_SFLOAT:
82 return "BC6H_SFLOAT";
83 case PixelFormat::ASTC_2D_4X4_UNORM:
84 return "ASTC_2D_4X4_UNORM";
85 case PixelFormat::B8G8R8A8_UNORM:
86 return "B8G8R8A8_UNORM";
87 case PixelFormat::R32G32B32A32_FLOAT:
88 return "R32G32B32A32_FLOAT";
89 case PixelFormat::R32G32B32A32_SINT:
90 return "R32G32B32A32_SINT";
91 case PixelFormat::R32G32_FLOAT:
92 return "R32G32_FLOAT";
93 case PixelFormat::R32G32_SINT:
94 return "R32G32_SINT";
95 case PixelFormat::R32_FLOAT:
96 return "R32_FLOAT";
97 case PixelFormat::R16_FLOAT:
98 return "R16_FLOAT";
99 case PixelFormat::R16_UNORM:
100 return "R16_UNORM";
101 case PixelFormat::R16_SNORM:
102 return "R16_SNORM";
103 case PixelFormat::R16_UINT:
104 return "R16_UINT";
105 case PixelFormat::R16_SINT:
106 return "R16_SINT";
107 case PixelFormat::R16G16_UNORM:
108 return "R16G16_UNORM";
109 case PixelFormat::R16G16_FLOAT:
110 return "R16G16_FLOAT";
111 case PixelFormat::R16G16_UINT:
112 return "R16G16_UINT";
113 case PixelFormat::R16G16_SINT:
114 return "R16G16_SINT";
115 case PixelFormat::R16G16_SNORM:
116 return "R16G16_SNORM";
117 case PixelFormat::R32G32B32_FLOAT:
118 return "R32G32B32_FLOAT";
119 case PixelFormat::A8B8G8R8_SRGB:
120 return "A8B8G8R8_SRGB";
121 case PixelFormat::R8G8_UNORM:
122 return "R8G8_UNORM";
123 case PixelFormat::R8G8_SNORM:
124 return "R8G8_SNORM";
125 case PixelFormat::R8G8_SINT:
126 return "R8G8_SINT";
127 case PixelFormat::R8G8_UINT:
128 return "R8G8_UINT";
129 case PixelFormat::R32G32_UINT:
130 return "R32G32_UINT";
131 case PixelFormat::R16G16B16X16_FLOAT:
132 return "R16G16B16X16_FLOAT";
133 case PixelFormat::R32_UINT:
134 return "R32_UINT";
135 case PixelFormat::R32_SINT:
136 return "R32_SINT";
137 case PixelFormat::ASTC_2D_8X8_UNORM:
138 return "ASTC_2D_8X8_UNORM";
139 case PixelFormat::ASTC_2D_8X5_UNORM:
140 return "ASTC_2D_8X5_UNORM";
141 case PixelFormat::ASTC_2D_5X4_UNORM:
142 return "ASTC_2D_5X4_UNORM";
143 case PixelFormat::B8G8R8A8_SRGB:
144 return "B8G8R8A8_SRGB";
145 case PixelFormat::BC1_RGBA_SRGB:
146 return "BC1_RGBA_SRGB";
147 case PixelFormat::BC2_SRGB:
148 return "BC2_SRGB";
149 case PixelFormat::BC3_SRGB:
150 return "BC3_SRGB";
151 case PixelFormat::BC7_SRGB:
152 return "BC7_SRGB";
153 case PixelFormat::A4B4G4R4_UNORM:
154 return "A4B4G4R4_UNORM";
155 case PixelFormat::ASTC_2D_4X4_SRGB:
156 return "ASTC_2D_4X4_SRGB";
157 case PixelFormat::ASTC_2D_8X8_SRGB:
158 return "ASTC_2D_8X8_SRGB";
159 case PixelFormat::ASTC_2D_8X5_SRGB:
160 return "ASTC_2D_8X5_SRGB";
161 case PixelFormat::ASTC_2D_5X4_SRGB:
162 return "ASTC_2D_5X4_SRGB";
163 case PixelFormat::ASTC_2D_5X5_UNORM:
164 return "ASTC_2D_5X5_UNORM";
165 case PixelFormat::ASTC_2D_5X5_SRGB:
166 return "ASTC_2D_5X5_SRGB";
167 case PixelFormat::ASTC_2D_10X8_UNORM:
168 return "ASTC_2D_10X8_UNORM";
169 case PixelFormat::ASTC_2D_10X8_SRGB:
170 return "ASTC_2D_10X8_SRGB";
171 case PixelFormat::ASTC_2D_6X6_UNORM:
172 return "ASTC_2D_6X6_UNORM";
173 case PixelFormat::ASTC_2D_6X6_SRGB:
174 return "ASTC_2D_6X6_SRGB";
175 case PixelFormat::ASTC_2D_10X10_UNORM:
176 return "ASTC_2D_10X10_UNORM";
177 case PixelFormat::ASTC_2D_10X10_SRGB:
178 return "ASTC_2D_10X10_SRGB";
179 case PixelFormat::ASTC_2D_12X12_UNORM:
180 return "ASTC_2D_12X12_UNORM";
181 case PixelFormat::ASTC_2D_12X12_SRGB:
182 return "ASTC_2D_12X12_SRGB";
183 case PixelFormat::ASTC_2D_8X6_UNORM:
184 return "ASTC_2D_8X6_UNORM";
185 case PixelFormat::ASTC_2D_8X6_SRGB:
186 return "ASTC_2D_8X6_SRGB";
187 case PixelFormat::ASTC_2D_6X5_UNORM:
188 return "ASTC_2D_6X5_UNORM";
189 case PixelFormat::ASTC_2D_6X5_SRGB:
190 return "ASTC_2D_6X5_SRGB";
191 case PixelFormat::E5B9G9R9_FLOAT:
192 return "E5B9G9R9_FLOAT";
193 case PixelFormat::D32_FLOAT:
194 return "D32_FLOAT";
195 case PixelFormat::D16_UNORM:
196 return "D16_UNORM";
197 case PixelFormat::D24_UNORM_S8_UINT:
198 return "D24_UNORM_S8_UINT";
199 case PixelFormat::S8_UINT_D24_UNORM:
200 return "S8_UINT_D24_UNORM";
201 case PixelFormat::D32_FLOAT_S8_UINT:
202 return "D32_FLOAT_S8_UINT";
203 case PixelFormat::MaxDepthStencilFormat:
204 case PixelFormat::Invalid:
205 return "Invalid";
206 }
207 return "Invalid";
208 }();
209 return formatter<string_view>::format(name, ctx);
210 }
211};
212
213template <>
214struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
215 template <typename FormatContext>
216 auto format(VideoCommon::ImageType type, FormatContext& ctx) {
217 const string_view name = [type] {
218 using VideoCommon::ImageType;
219 switch (type) {
220 case ImageType::e1D:
221 return "1D";
222 case ImageType::e2D:
223 return "2D";
224 case ImageType::e3D:
225 return "3D";
226 case ImageType::Linear:
227 return "Linear";
228 case ImageType::Buffer:
229 return "Buffer";
230 }
231 return "Invalid";
232 }();
233 return formatter<string_view>::format(name, ctx);
234 }
235};
236
237template <>
238struct fmt::formatter<VideoCommon::Extent3D> {
239 constexpr auto parse(fmt::format_parse_context& ctx) {
240 return ctx.begin();
241 }
242
243 template <typename FormatContext>
244 auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
245 return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
246 extent.depth);
247 }
248};
249
250namespace VideoCommon {
251
252struct ImageBase;
253struct ImageViewBase;
254struct RenderTargets;
255
256[[nodiscard]] std::string Name(const ImageBase& image);
257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view,
259 std::optional<ImageViewType> type = std::nullopt);
260
261[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262
263} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..959b3f115
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,218 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/formatter.h"
13#include "video_core/texture_cache/image_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/util.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::DefaultBlockHeight;
20using VideoCore::Surface::DefaultBlockWidth;
21
22namespace {
23/// Returns the base layer and mip level offset
24[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
25 if (layer_stride == 0) {
26 return {0, diff};
27 } else {
28 return {diff / layer_stride, diff % layer_stride};
29 }
30}
31
32[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
33 return layers.base_level < info.resources.levels &&
34 layers.base_layer + layers.num_layers <= info.resources.layers;
35}
36
37[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
38 const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
39 const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
40 if (!ValidateLayers(copy.src_subresource, src)) {
41 return false;
42 }
43 if (!ValidateLayers(copy.dst_subresource, dst)) {
44 return false;
45 }
46 if (copy.src_offset.x + copy.extent.width > src_size.width ||
47 copy.src_offset.y + copy.extent.height > src_size.height ||
48 copy.src_offset.z + copy.extent.depth > src_size.depth) {
49 return false;
50 }
51 if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
52 copy.dst_offset.y + copy.extent.height > dst_size.height ||
53 copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
54 return false;
55 }
56 return true;
57}
58} // Anonymous namespace
59
60ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
61 : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
62 unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
63 converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
64 cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
65 mip_level_offsets{CalculateMipLevelOffsets(info)} {
66 if (info.type == ImageType::e3D) {
67 slice_offsets = CalculateSliceOffsets(info);
68 slice_subresources = CalculateSliceSubresources(info);
69 }
70}
71
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base
75 return std::nullopt;
76 }
77 const u32 diff = static_cast<u32>(other_addr - gpu_addr);
78 if (diff > guest_size_bytes) {
79 // This can happen when two CPU addresses are used for different GPU addresses
80 return std::nullopt;
81 }
82 if (info.type != ImageType::e3D) {
83 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
84 const auto end = mip_level_offsets.begin() + info.resources.levels;
85 const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
86 if (layer > info.resources.layers || it == end) {
87 return std::nullopt;
88 }
89 return SubresourceBase{
90 .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
91 .layer = layer,
92 };
93 } else {
94 // TODO: Consider using binary_search after a threshold
95 const auto it = std::ranges::find(slice_offsets, diff);
96 if (it == slice_offsets.cend()) {
97 return std::nullopt;
98 }
99 return slice_subresources[std::distance(slice_offsets.begin(), it)];
100 }
101}
102
103ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
104 const auto it = std::ranges::find(image_view_infos, view_info);
105 if (it == image_view_infos.end()) {
106 return ImageViewId{};
107 }
108 return image_view_ids[std::distance(image_view_infos.begin(), it)];
109}
110
111void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
112 image_view_infos.push_back(view_info);
113 image_view_ids.push_back(image_view_id);
114}
115
116void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
117 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
118 ASSERT(lhs.info.type == rhs.info.type);
119 std::optional<SubresourceBase> base;
120 if (lhs.info.type == ImageType::Linear) {
121 base = SubresourceBase{.level = 0, .layer = 0};
122 } else {
123 // We are passing relaxed formats as an option, having broken views or not won't matter
124 static constexpr bool broken_views = false;
125 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views);
126 }
127 if (!base) {
128 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
129 return;
130 }
131 const PixelFormat lhs_format = lhs.info.format;
132 const PixelFormat rhs_format = rhs.info.format;
133 const Extent2D lhs_block{
134 .width = DefaultBlockWidth(lhs_format),
135 .height = DefaultBlockHeight(lhs_format),
136 };
137 const Extent2D rhs_block{
138 .width = DefaultBlockWidth(rhs_format),
139 .height = DefaultBlockHeight(rhs_format),
140 };
141 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
142 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
143 if (is_lhs_compressed && is_rhs_compressed) {
144 LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
145 return;
146 }
147 const s32 lhs_mips = lhs.info.resources.levels;
148 const s32 rhs_mips = rhs.info.resources.levels;
149 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
150 AliasedImage lhs_alias;
151 AliasedImage rhs_alias;
152 lhs_alias.id = rhs_id;
153 rhs_alias.id = lhs_id;
154 lhs_alias.copies.reserve(num_mips);
155 rhs_alias.copies.reserve(num_mips);
156 for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
157 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
158 Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
159 if (is_lhs_compressed) {
160 lhs_size.width /= lhs_block.width;
161 lhs_size.height /= lhs_block.height;
162 }
163 if (is_rhs_compressed) {
164 rhs_size.width /= rhs_block.width;
165 rhs_size.height /= rhs_block.height;
166 }
167 const Extent3D copy_size{
168 .width = std::min(lhs_size.width, rhs_size.width),
169 .height = std::min(lhs_size.height, rhs_size.height),
170 .depth = std::min(lhs_size.depth, rhs_size.depth),
171 };
172 if (copy_size.width == 0 || copy_size.height == 0) {
173 LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
174 continue;
175 }
176 const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
177 const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
178 const Offset3D lhs_offset{0, 0, 0};
179 const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
180 const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
181 const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
182 const s32 num_layers = std::min(lhs_layers, rhs_layers);
183 const SubresourceLayers lhs_subresource{
184 .base_level = mip_level,
185 .base_layer = 0,
186 .num_layers = num_layers,
187 };
188 const SubresourceLayers rhs_subresource{
189 .base_level = base->level + mip_level,
190 .base_layer = is_rhs_3d ? 0 : base->layer,
191 .num_layers = num_layers,
192 };
193 [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
194 .src_subresource = lhs_subresource,
195 .dst_subresource = rhs_subresource,
196 .src_offset = lhs_offset,
197 .dst_offset = rhs_offset,
198 .extent = copy_size,
199 });
200 [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
201 .src_subresource = rhs_subresource,
202 .dst_subresource = lhs_subresource,
203 .src_offset = rhs_offset,
204 .dst_offset = lhs_offset,
205 .extent = copy_size,
206 });
207 ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
208 ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
209 }
210 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
211 if (lhs_alias.copies.empty()) {
212 return;
213 }
214 lhs.aliased_images.push_back(std::move(lhs_alias));
215 rhs.aliased_images.push_back(std::move(rhs_alias));
216}
217
218} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <vector>
10
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/texture_cache/image_info.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19enum class ImageFlagBits : u32 {
20 AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
21 Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
22 CpuModified = 1 << 2, ///< Contents have been modified from the CPU
23 GpuModified = 1 << 3, ///< Contents have been modified from the GPU
24 Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30
31struct ImageViewInfo;
32
33struct AliasedImage {
34 std::vector<ImageCopy> copies;
35 ImageId id;
36};
37
38struct ImageBase {
39 explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
40
41 [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
42
43 [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
44
45 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
46
47 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
48 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
49 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
50 }
51
52 ImageInfo info;
53
54 u32 guest_size_bytes = 0;
55 u32 unswizzled_size_bytes = 0;
56 u32 converted_size_bytes = 0;
57 ImageFlagBits flags = ImageFlagBits::CpuModified;
58
59 GPUVAddr gpu_addr = 0;
60 VAddr cpu_addr = 0;
61 VAddr cpu_addr_end = 0;
62
63 u64 modification_tick = 0;
64 u64 frame_tick = 0;
65
66 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
67
68 std::vector<ImageViewInfo> image_view_infos;
69 std::vector<ImageViewId> image_view_ids;
70
71 std::vector<u32> slice_offsets;
72 std::vector<SubresourceBase> slice_subresources;
73
74 std::vector<AliasedImage> aliased_images;
75};
76
77struct ImageAllocBase {
78 std::vector<ImageId> images;
79};
80
81void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
82
83} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/surface.h"
7#include "video_core/texture_cache/format_lookup_table.h"
8#include "video_core/texture_cache/image_info.h"
9#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::TextureType;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20ImageInfo::ImageInfo(const TICEntry& config) noexcept {
21 format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
22 config.a_type, config.srgb_conversion);
23 num_samples = NumSamples(config.msaa_mode);
24 resources.levels = config.max_mip_level + 1;
25 if (config.IsPitchLinear()) {
26 pitch = config.Pitch();
27 } else if (config.IsBlockLinear()) {
28 block = Extent3D{
29 .width = config.block_width,
30 .height = config.block_height,
31 .depth = config.block_depth,
32 };
33 }
34 tile_width_spacing = config.tile_width_spacing;
35 if (config.texture_type != TextureType::Texture2D &&
36 config.texture_type != TextureType::Texture2DNoMipmap) {
37 ASSERT(!config.IsPitchLinear());
38 }
39 switch (config.texture_type) {
40 case TextureType::Texture1D:
41 ASSERT(config.BaseLayer() == 0);
42 type = ImageType::e1D;
43 size.width = config.Width();
44 break;
45 case TextureType::Texture1DArray:
46 UNIMPLEMENTED_IF(config.BaseLayer() != 0);
47 type = ImageType::e1D;
48 size.width = config.Width();
49 resources.layers = config.Depth();
50 break;
51 case TextureType::Texture2D:
52 case TextureType::Texture2DNoMipmap:
53 ASSERT(config.Depth() == 1);
54 type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
55 size.width = config.Width();
56 size.height = config.Height();
57 resources.layers = config.BaseLayer() + 1;
58 break;
59 case TextureType::Texture2DArray:
60 type = ImageType::e2D;
61 size.width = config.Width();
62 size.height = config.Height();
63 resources.layers = config.BaseLayer() + config.Depth();
64 break;
65 case TextureType::TextureCubemap:
66 ASSERT(config.Depth() == 1);
67 type = ImageType::e2D;
68 size.width = config.Width();
69 size.height = config.Height();
70 resources.layers = config.BaseLayer() + 6;
71 break;
72 case TextureType::TextureCubeArray:
73 UNIMPLEMENTED_IF(config.load_store_hint != 0);
74 type = ImageType::e2D;
75 size.width = config.Width();
76 size.height = config.Height();
77 resources.layers = config.BaseLayer() + config.Depth() * 6;
78 break;
79 case TextureType::Texture3D:
80 ASSERT(config.BaseLayer() == 0);
81 type = ImageType::e3D;
82 size.width = config.Width();
83 size.height = config.Height();
84 size.depth = config.Depth();
85 break;
86 case TextureType::Texture1DBuffer:
87 type = ImageType::Buffer;
88 size.width = config.Width();
89 break;
90 default:
91 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
92 break;
93 }
94 if (type != ImageType::Linear) {
95 // FIXME: Call this without passing *this
96 layer_stride = CalculateLayerStride(*this);
97 maybe_unaligned_layer_stride = CalculateLayerSize(*this);
98 }
99}
100
101ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
102 const auto& rt = regs.rt[index];
103 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
104 if (rt.tile_mode.is_pitch_linear) {
105 ASSERT(rt.tile_mode.is_3d == 0);
106 type = ImageType::Linear;
107 pitch = rt.width;
108 size = Extent3D{
109 .width = pitch / BytesPerBlock(format),
110 .height = rt.height,
111 .depth = 1,
112 };
113 return;
114 }
115 size.width = rt.width;
116 size.height = rt.height;
117 layer_stride = rt.layer_stride * 4;
118 maybe_unaligned_layer_stride = layer_stride;
119 num_samples = NumSamples(regs.multisample_mode);
120 block = Extent3D{
121 .width = rt.tile_mode.block_width,
122 .height = rt.tile_mode.block_height,
123 .depth = rt.tile_mode.block_depth,
124 };
125 if (rt.tile_mode.is_3d) {
126 type = ImageType::e3D;
127 size.depth = rt.depth;
128 } else {
129 type = ImageType::e2D;
130 resources.layers = rt.depth;
131 }
132}
133
134ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
135 format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
136 size.width = regs.zeta_width;
137 size.height = regs.zeta_height;
138 resources.levels = 1;
139 layer_stride = regs.zeta.layer_stride * 4;
140 maybe_unaligned_layer_stride = layer_stride;
141 num_samples = NumSamples(regs.multisample_mode);
142 block = Extent3D{
143 .width = regs.zeta.tile_mode.block_width,
144 .height = regs.zeta.tile_mode.block_height,
145 .depth = regs.zeta.tile_mode.block_depth,
146 };
147 if (regs.zeta.tile_mode.is_pitch_linear) {
148 ASSERT(regs.zeta.tile_mode.is_3d == 0);
149 type = ImageType::Linear;
150 pitch = size.width * BytesPerBlock(format);
151 } else if (regs.zeta.tile_mode.is_3d) {
152 ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
153 type = ImageType::e3D;
154 size.depth = regs.zeta_depth;
155 } else {
156 type = ImageType::e2D;
157 resources.layers = regs.zeta_depth;
158 }
159}
160
161ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
162 UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
163 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
164 if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
165 type = ImageType::Linear;
166 size = Extent3D{
167 .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
168 .height = config.height,
169 .depth = 1,
170 };
171 pitch = config.pitch;
172 } else {
173 type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
174 block = Extent3D{
175 .width = config.block_width,
176 .height = config.block_height,
177 .depth = config.block_depth,
178 };
179 // 3D blits with more than once slice are not implemented for now
180 // Render to individual slices
181 size = Extent3D{
182 .width = config.width,
183 .height = config.height,
184 .depth = 1,
185 };
186 }
187}
188
189} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14using Tegra::Texture::TICEntry;
15using VideoCore::Surface::PixelFormat;
16
17struct ImageInfo {
18 explicit ImageInfo() = default;
19 explicit ImageInfo(const TICEntry& config) noexcept;
20 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
21 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
22 explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
23
24 PixelFormat format = PixelFormat::Invalid;
25 ImageType type = ImageType::e1D;
26 SubresourceExtent resources;
27 Extent3D size{1, 1, 1};
28 union {
29 Extent3D block{0, 0, 0};
30 u32 pitch;
31 };
32 u32 layer_stride = 0;
33 u32 maybe_unaligned_layer_stride = 0;
34 u32 num_samples = 1;
35 u32 tile_width_spacing = 0;
36};
37
38} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..18f72e508
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "core/settings.h"
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/formatter.h"
12#include "video_core/texture_cache/image_info.h"
13#include "video_core/texture_cache/image_view_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
20 ImageId image_id_)
21 : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
22 size{
23 .width = std::max(image_info.size.width >> range.base.level, 1u),
24 .height = std::max(image_info.size.height >> range.base.level, 1u),
25 .depth = std::max(image_info.size.depth >> range.base.level, 1u),
26 } {
27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false),
28 "Image view format {} is incompatible with image format {}", info.format,
29 image_info.format);
30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
31 if (image_info.type == ImageType::Linear && is_async) {
32 flags |= ImageViewFlagBits::PreemtiveDownload;
33 }
34 if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
35 flags |= ImageViewFlagBits::Slice;
36 }
37}
38
39ImageViewBase::ImageViewBase(const NullImageParams&) {}
40
41} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "video_core/surface.h"
9#include "video_core/texture_cache/types.h"
10
11namespace VideoCommon {
12
13using VideoCore::Surface::PixelFormat;
14
15struct ImageViewInfo;
16struct ImageInfo;
17
18struct NullImageParams {};
19
20enum class ImageViewFlagBits : u16 {
21 PreemtiveDownload = 1 << 0,
22 Strong = 1 << 1,
23 Slice = 1 << 2,
24};
25DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
26
27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id);
30 explicit ImageViewBase(const NullImageParams&);
31
32 [[nodiscard]] bool IsBuffer() const noexcept {
33 return type == ImageViewType::Buffer;
34 }
35
36 ImageId image_id{};
37 PixelFormat format{};
38 ImageViewType type{};
39 SubresourceRange range;
40 Extent3D size{0, 0, 0};
41 ImageViewFlagBits flags{};
42
43 u64 invalidation_tick = 0;
44 u64 modification_tick = 0;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/assert.h"
8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/textures/texture.h"
12
13namespace VideoCommon {
14
15namespace {
16
17constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
18
19[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
20 const u8 casted = static_cast<u8>(source);
21 ASSERT(static_cast<SwizzleSource>(casted) == source);
22 return casted;
23}
24
25} // Anonymous namespace
26
27ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
28 : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
29 y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
30 w_source{CastSwizzle(config.w_source)} {
31 range.base = SubresourceBase{
32 .level = static_cast<s32>(config.res_min_mip_level),
33 .layer = base_layer,
34 };
35 range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
36
37 switch (config.texture_type) {
38 case TextureType::Texture1D:
39 ASSERT(config.Height() == 1);
40 ASSERT(config.Depth() == 1);
41 type = ImageViewType::e1D;
42 break;
43 case TextureType::Texture2D:
44 case TextureType::Texture2DNoMipmap:
45 ASSERT(config.Depth() == 1);
46 type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
47 break;
48 case TextureType::Texture3D:
49 type = ImageViewType::e3D;
50 break;
51 case TextureType::TextureCubemap:
52 ASSERT(config.Depth() == 1);
53 type = ImageViewType::Cube;
54 range.extent.layers = 6;
55 break;
56 case TextureType::Texture1DArray:
57 type = ImageViewType::e1DArray;
58 range.extent.layers = config.Depth();
59 break;
60 case TextureType::Texture2DArray:
61 type = ImageViewType::e2DArray;
62 range.extent.layers = config.Depth();
63 break;
64 case TextureType::Texture1DBuffer:
65 type = ImageViewType::Buffer;
66 break;
67 case TextureType::TextureCubeArray:
68 type = ImageViewType::CubeArray;
69 range.extent.layers = config.Depth() * 6;
70 break;
71 default:
72 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
73 break;
74 }
75}
76
77ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
78 SubresourceRange range_) noexcept
79 : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
80 y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
81 w_source{RENDER_TARGET_SWIZZLE} {}
82
83bool ImageViewInfo::IsRenderTarget() const noexcept {
84 return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
85 z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
86}
87
88} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9
10#include "video_core/surface.h"
11#include "video_core/texture_cache/types.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::SwizzleSource;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20/// Properties used to determine a image view
21struct ImageViewInfo {
22 explicit ImageViewInfo() noexcept = default;
23 explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
24 explicit ImageViewInfo(ImageViewType type, PixelFormat format,
25 SubresourceRange range = {}) noexcept;
26
27 auto operator<=>(const ImageViewInfo&) const noexcept = default;
28
29 [[nodiscard]] bool IsRenderTarget() const noexcept;
30
31 [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
32 return std::array{
33 static_cast<SwizzleSource>(x_source),
34 static_cast<SwizzleSource>(y_source),
35 static_cast<SwizzleSource>(z_source),
36 static_cast<SwizzleSource>(w_source),
37 };
38 }
39
40 ImageViewType type{};
41 PixelFormat format{};
42 SubresourceRange range;
43 u8 x_source = static_cast<u8>(SwizzleSource::R);
44 u8 y_source = static_cast<u8>(SwizzleSource::G);
45 u8 z_source = static_cast<u8>(SwizzleSource::B);
46 u8 w_source = static_cast<u8>(SwizzleSource::A);
47};
48static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
49
50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <span>
9#include <utility>
10
11#include "common/bit_cast.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16/// Framebuffer properties used to lookup a framebuffer
17struct RenderTargets {
18 constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
19
20 constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
21 const auto contains = [elements](ImageViewId item) {
22 return std::ranges::find(elements, item) != elements.end();
23 };
24 return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
25 }
26
27 std::array<ImageViewId, NUM_RT> color_buffer_ids;
28 ImageViewId depth_buffer_id;
29 std::array<u8, NUM_RT> draw_buffers{};
30 Extent2D size;
31};
32
33} // namespace VideoCommon
34
35namespace std {
36
37template <>
38struct hash<VideoCommon::RenderTargets> {
39 size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
40 using VideoCommon::ImageViewId;
41 size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
42 for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
43 value ^= std::hash<ImageViewId>{}(color_buffer_id);
44 }
45 value ^= Common::BitCast<u64>(rt.draw_buffers);
46 value ^= Common::BitCast<u64>(rt.size);
47 return value;
48 }
49};
50
51} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/assert.h"
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
15 switch (num_samples) {
16 case 1:
17 return {0, 0};
18 case 2:
19 return {1, 0};
20 case 4:
21 return {1, 1};
22 case 8:
23 return {2, 1};
24 case 16:
25 return {2, 2};
26 }
27 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
28 return {1, 1};
29}
30
31[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
32 using Tegra::Texture::MsaaMode;
33 switch (msaa_mode) {
34 case MsaaMode::Msaa1x1:
35 return 1;
36 case MsaaMode::Msaa2x1:
37 case MsaaMode::Msaa2x1_D3D:
38 return 2;
39 case MsaaMode::Msaa2x2:
40 case MsaaMode::Msaa2x2_VC4:
41 case MsaaMode::Msaa2x2_VC12:
42 return 4;
43 case MsaaMode::Msaa4x2:
44 case MsaaMode::Msaa4x2_D3D:
45 case MsaaMode::Msaa4x2_VC8:
46 case MsaaMode::Msaa4x2_VC24:
47 return 8;
48 case MsaaMode::Msaa4x4:
49 return 16;
50 }
51 UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
52 return 1;
53}
54
55} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <concepts>
9#include <numeric>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16
17namespace VideoCommon {
18
19struct SlotId {
20 static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
21
22 constexpr auto operator<=>(const SlotId&) const noexcept = default;
23
24 constexpr explicit operator bool() const noexcept {
25 return index != INVALID_INDEX;
26 }
27
28 u32 index = INVALID_INDEX;
29};
30
31template <class T>
32requires std::is_nothrow_move_assignable_v<T>&&
33 std::is_nothrow_move_constructible_v<T> class SlotVector {
34public:
35 ~SlotVector() noexcept {
36 size_t index = 0;
37 for (u64 bits : stored_bitset) {
38 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
39 if ((bits & 1) != 0) {
40 values[index + bit].object.~T();
41 }
42 }
43 index += 64;
44 }
45 delete[] values;
46 }
47
48 [[nodiscard]] T& operator[](SlotId id) noexcept {
49 ValidateIndex(id);
50 return values[id.index].object;
51 }
52
53 [[nodiscard]] const T& operator[](SlotId id) const noexcept {
54 ValidateIndex(id);
55 return values[id.index].object;
56 }
57
58 template <typename... Args>
59 [[nodiscard]] SlotId insert(Args&&... args) noexcept {
60 const u32 index = FreeValueIndex();
61 new (&values[index].object) T(std::forward<Args>(args)...);
62 SetStorageBit(index);
63
64 return SlotId{index};
65 }
66
67 void erase(SlotId id) noexcept {
68 values[id.index].object.~T();
69 free_list.push_back(id.index);
70 ResetStorageBit(id.index);
71 }
72
73private:
74 struct NonTrivialDummy {
75 NonTrivialDummy() noexcept {}
76 };
77
78 union Entry {
79 Entry() noexcept : dummy{} {}
80 ~Entry() noexcept {}
81
82 NonTrivialDummy dummy;
83 T object;
84 };
85
86 void SetStorageBit(u32 index) noexcept {
87 stored_bitset[index / 64] |= u64(1) << (index % 64);
88 }
89
90 void ResetStorageBit(u32 index) noexcept {
91 stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
92 }
93
94 bool ReadStorageBit(u32 index) noexcept {
95 return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
96 }
97
98 void ValidateIndex(SlotId id) const noexcept {
99 DEBUG_ASSERT(id);
100 DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
101 DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
102 }
103
104 [[nodiscard]] u32 FreeValueIndex() noexcept {
105 if (free_list.empty()) {
106 Reserve(values_capacity ? (values_capacity << 1) : 1);
107 }
108 const u32 free_index = free_list.back();
109 free_list.pop_back();
110 return free_index;
111 }
112
113 void Reserve(size_t new_capacity) noexcept {
114 Entry* const new_values = new Entry[new_capacity];
115 size_t index = 0;
116 for (u64 bits : stored_bitset) {
117 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
118 const size_t i = index + bit;
119 if ((bits & 1) == 0) {
120 continue;
121 }
122 T& old_value = values[i].object;
123 new (&new_values[i].object) T(std::move(old_value));
124 old_value.~T();
125 }
126 index += 64;
127 }
128
129 stored_bitset.resize((new_capacity + 63) / 64);
130
131 const size_t old_free_size = free_list.size();
132 free_list.resize(old_free_size + (new_capacity - values_capacity));
133 std::iota(free_list.begin() + old_free_size, free_list.end(),
134 static_cast<u32>(values_capacity));
135
136 delete[] values;
137 values = new_values;
138 values_capacity = new_capacity;
139 }
140
141 Entry* values = nullptr;
142 size_t values_capacity = 0;
143 size_t values_size = 0;
144
145 std::vector<u64> stored_bitset;
146 std::vector<u32> free_list;
147};
148
149} // namespace VideoCommon
150
151template <>
152struct std::hash<VideoCommon::SlotId> {
153 size_t operator()(const VideoCommon::SlotId& id) const noexcept {
154 return std::hash<u32>{}(id.index);
155 }
156};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index b44c09d71..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,298 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/algorithm.h"
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "video_core/memory_manager.h"
10#include "video_core/texture_cache/surface_base.h"
11#include "video_core/texture_cache/surface_params.h"
12#include "video_core/textures/convert.h"
13
14namespace VideoCommon {
15
16MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
17MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
18
19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
23
24StagingCache::StagingCache() = default;
25
26StagingCache::~StagingCache() = default;
27
28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
29 bool is_astc_supported)
30 : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
35 std::size_t offset = 0;
36 for (u32 level = 0; level < params.num_levels; ++level) {
37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
38 mipmap_sizes[level] = mipmap_size;
39 mipmap_offsets[level] = offset;
40 offset += mipmap_size;
41 }
42 layer_size = offset;
43 if (params.is_layered) {
44 if (params.is_tiled) {
45 layer_size =
46 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
47 }
48 guest_memory_size = layer_size * params.depth;
49 } else {
50 guest_memory_size = layer_size;
51 }
52}
53
54MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
55 const u32 src_bpp{params.GetBytesPerPixel()};
56 const u32 dst_bpp{rhs.GetBytesPerPixel()};
57 const bool ib1 = params.IsBuffer();
58 const bool ib2 = rhs.IsBuffer();
59 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
60 const bool cb1 = params.IsCompressed();
61 const bool cb2 = rhs.IsCompressed();
62 if (cb1 == cb2) {
63 return MatchTopologyResult::FullMatch;
64 }
65 return MatchTopologyResult::CompressUnmatch;
66 }
67 return MatchTopologyResult::None;
68}
69
70MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
71 // Buffer surface Check
72 if (params.IsBuffer()) {
73 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
74 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
75 if (wd1 == wd2) {
76 return MatchStructureResult::FullMatch;
77 }
78 return MatchStructureResult::None;
79 }
80
81 // Linear Surface check
82 if (!params.is_tiled) {
83 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
84 if (params.width == rhs.width) {
85 return MatchStructureResult::FullMatch;
86 } else {
87 return MatchStructureResult::SemiMatch;
88 }
89 }
90 return MatchStructureResult::None;
91 }
92
93 // Tiled Surface check
94 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
95 params.tile_width_spacing, params.num_levels) ==
96 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
97 rhs.tile_width_spacing, rhs.num_levels)) {
98 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
99 return MatchStructureResult::FullMatch;
100 }
101 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
102 rhs.pixel_format);
103 const u32 hs =
104 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
105 const u32 w1 = params.GetBlockAlignedWidth();
106 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
107 return MatchStructureResult::SemiMatch;
108 }
109 }
110 return MatchStructureResult::None;
111}
112
113std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
114 const GPUVAddr candidate_gpu_addr) const {
115 if (gpu_addr == candidate_gpu_addr) {
116 return {{0, 0}};
117 }
118
119 if (candidate_gpu_addr < gpu_addr) {
120 return std::nullopt;
121 }
122
123 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
124 const auto layer{static_cast<u32>(relative_address / layer_size)};
125 if (layer >= params.depth) {
126 return std::nullopt;
127 }
128
129 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
130 const auto mipmap_it =
131 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
132 if (mipmap_it == mipmap_offsets.end()) {
133 return std::nullopt;
134 }
135
136 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
137 return std::make_pair(layer, level);
138}
139
140std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
141 const u32 layers{params.depth};
142 const u32 mipmaps{params.num_levels};
143 std::vector<CopyParams> result;
144 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
145
146 for (u32 layer = 0; layer < layers; layer++) {
147 for (u32 level = 0; level < mipmaps; level++) {
148 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
149 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
150 result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
151 }
152 }
153 return result;
154}
155
156std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
157 const u32 mipmaps{params.num_levels};
158 std::vector<CopyParams> result;
159 result.reserve(mipmaps);
160
161 for (u32 level = 0; level < mipmaps; level++) {
162 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
163 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
164 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
165 result.emplace_back(width, height, depth, level);
166 }
167 return result;
168}
169
170void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
171 u8* buffer, u32 level) {
172 const u32 width{params.GetMipWidth(level)};
173 const u32 height{params.GetMipHeight(level)};
174 const u32 block_height{params.GetMipBlockHeight(level)};
175 const u32 block_depth{params.GetMipBlockDepth(level)};
176
177 std::size_t guest_offset{mipmap_offsets[level]};
178 if (params.is_layered) {
179 std::size_t host_offset = 0;
180 const std::size_t guest_stride = layer_size;
181 const std::size_t host_stride = params.GetHostLayerSize(level);
182 for (u32 layer = 0; layer < params.depth; ++layer) {
183 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1,
184 params.tile_width_spacing, buffer + host_offset, memory + guest_offset);
185 guest_offset += guest_stride;
186 host_offset += host_stride;
187 }
188 } else {
189 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth,
190 params.GetMipDepth(level), params.tile_width_spacing, buffer,
191 memory + guest_offset);
192 }
193}
194
195void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
196 StagingCache& staging_cache) {
197 MICROPROFILE_SCOPE(GPU_Load_Texture);
198 auto& staging_buffer = staging_cache.GetBuffer(0);
199 u8* host_ptr;
200 // Use an extra temporal buffer
201 auto& tmp_buffer = staging_cache.GetBuffer(1);
202 tmp_buffer.resize(guest_memory_size);
203 host_ptr = tmp_buffer.data();
204 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
205
206 if (params.is_tiled) {
207 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
208 params.block_width, static_cast<u32>(params.target));
209 for (u32 level = 0; level < params.num_levels; ++level) {
210 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
211 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
212 staging_buffer.data() + host_offset, level);
213 }
214 } else {
215 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
216 const u32 bpp{params.GetBytesPerPixel()};
217 const u32 block_width{params.GetDefaultBlockWidth()};
218 const u32 block_height{params.GetDefaultBlockHeight()};
219 const u32 width{(params.width + block_width - 1) / block_width};
220 const u32 height{(params.height + block_height - 1) / block_height};
221 const u32 copy_size{width * bpp};
222 if (params.pitch == copy_size) {
223 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
224 } else {
225 const u8* start{host_ptr};
226 u8* write_to{staging_buffer.data()};
227 for (u32 h = height; h > 0; --h) {
228 std::memcpy(write_to, start, copy_size);
229 start += params.pitch;
230 write_to += copy_size;
231 }
232 }
233 }
234
235 if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
236 return;
237 }
238
239 for (u32 level = params.num_levels; level--;) {
240 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
241 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
242 u8* const in_buffer = staging_buffer.data() + in_host_offset;
243 u8* const out_buffer = staging_buffer.data() + out_host_offset;
244 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
245 params.GetMipWidth(level), params.GetMipHeight(level),
246 params.GetMipDepth(level), true, true);
247 }
248}
249
250void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
251 StagingCache& staging_cache) {
252 MICROPROFILE_SCOPE(GPU_Flush_Texture);
253 auto& staging_buffer = staging_cache.GetBuffer(0);
254 u8* host_ptr;
255
256 // Use an extra temporal buffer
257 auto& tmp_buffer = staging_cache.GetBuffer(1);
258 tmp_buffer.resize(guest_memory_size);
259 host_ptr = tmp_buffer.data();
260
261 if (params.target == SurfaceTarget::Texture3D) {
262 // Special case for 3D texture segments
263 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
264 }
265
266 if (params.is_tiled) {
267 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
268 for (u32 level = 0; level < params.num_levels; ++level) {
269 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
270 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
271 staging_buffer.data() + host_offset, level);
272 }
273 } else if (params.IsBuffer()) {
274 // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
275 // memory.
276 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
277 } else {
278 ASSERT(params.target == SurfaceTarget::Texture2D);
279 ASSERT(params.num_levels == 1);
280
281 const u32 bpp{params.GetBytesPerPixel()};
282 const u32 copy_size{params.width * bpp};
283 if (params.pitch == copy_size) {
284 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
285 } else {
286 u8* start{host_ptr};
287 const u8* read_to{staging_buffer.data()};
288 for (u32 h = params.height; h > 0; --h) {
289 std::memcpy(start, read_to, copy_size);
290 start += params.pitch;
291 read_to += copy_size;
292 }
293 }
294 }
295 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
296}
297
298} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index 173f2edba..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <tuple>
9#include <unordered_map>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/gpu.h"
14#include "video_core/morton.h"
15#include "video_core/texture_cache/copy_params.h"
16#include "video_core/texture_cache/surface_params.h"
17#include "video_core/texture_cache/surface_view.h"
18
19namespace Tegra {
20class MemoryManager;
21}
22
23namespace VideoCommon {
24
25using VideoCore::MortonSwizzleMode;
26using VideoCore::Surface::SurfaceTarget;
27
28enum class MatchStructureResult : u32 {
29 FullMatch = 0,
30 SemiMatch = 1,
31 None = 2,
32};
33
34enum class MatchTopologyResult : u32 {
35 FullMatch = 0,
36 CompressUnmatch = 1,
37 None = 2,
38};
39
40class StagingCache {
41public:
42 explicit StagingCache();
43 ~StagingCache();
44
45 std::vector<u8>& GetBuffer(std::size_t index) {
46 return staging_buffer[index];
47 }
48
49 const std::vector<u8>& GetBuffer(std::size_t index) const {
50 return staging_buffer[index];
51 }
52
53 void SetSize(std::size_t size) {
54 staging_buffer.resize(size);
55 }
56
57private:
58 std::vector<std::vector<u8>> staging_buffer;
59};
60
61class SurfaceBaseImpl {
62public:
63 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
64
65 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
66
67 GPUVAddr GetGpuAddr() const {
68 return gpu_addr;
69 }
70
71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cpu_addr < end) && (cpu_addr_end > start);
73 }
74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
76 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
77 return gpu_addr <= other_start && other_end <= gpu_addr_end;
78 }
79
80 // Use only when recycling a surface
81 void SetGpuAddr(const GPUVAddr new_addr) {
82 gpu_addr = new_addr;
83 }
84
85 VAddr GetCpuAddr() const {
86 return cpu_addr;
87 }
88
89 VAddr GetCpuAddrEnd() const {
90 return cpu_addr_end;
91 }
92
93 void SetCpuAddr(const VAddr new_addr) {
94 cpu_addr = new_addr;
95 cpu_addr_end = new_addr + guest_memory_size;
96 }
97
98 const SurfaceParams& GetSurfaceParams() const {
99 return params;
100 }
101
102 std::size_t GetSizeInBytes() const {
103 return guest_memory_size;
104 }
105
106 std::size_t GetHostSizeInBytes() const {
107 return host_memory_size;
108 }
109
110 std::size_t GetMipmapSize(const u32 level) const {
111 return mipmap_sizes[level];
112 }
113
114 bool IsLinear() const {
115 return !params.is_tiled;
116 }
117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
123 return params.pixel_format == pixel_format;
124 }
125
126 VideoCore::Surface::PixelFormat GetFormat() const {
127 return params.pixel_format;
128 }
129
130 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
131 return params.target == target;
132 }
133
134 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
135
136 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
137
138 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
139 return std::tie(gpu_addr, params.target, params.num_levels) ==
140 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
141 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
142 }
143
144 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
145
146 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
147 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
148 }
149
150protected:
151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
152 bool is_astc_supported);
153 ~SurfaceBaseImpl() = default;
154
155 virtual void DecorateSurfaceName() = 0;
156
157 const SurfaceParams params;
158 std::size_t layer_size;
159 std::size_t guest_memory_size;
160 std::size_t host_memory_size;
161 GPUVAddr gpu_addr{};
162 VAddr cpu_addr{};
163 VAddr cpu_addr_end{};
164 bool is_converted{};
165
166 std::vector<std::size_t> mipmap_sizes;
167 std::vector<std::size_t> mipmap_offsets;
168
169private:
170 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer,
171 u32 level);
172
173 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
174
175 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
176};
177
178template <typename TView>
179class SurfaceBase : public SurfaceBaseImpl {
180public:
181 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
182
183 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
184
185 void MarkAsModified(bool is_modified_, u64 tick) {
186 is_modified = is_modified_ || is_target;
187 modification_tick = tick;
188 }
189
190 void MarkAsRenderTarget(bool is_target_, u32 index_) {
191 is_target = is_target_;
192 index = index_;
193 }
194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
211 void MarkAsPicked(bool is_picked_) {
212 is_picked = is_picked_;
213 }
214
215 bool IsModified() const {
216 return is_modified;
217 }
218
219 bool IsProtected() const {
220 // Only 3D slices are to be protected
221 return is_target && params.target == SurfaceTarget::Texture3D;
222 }
223
224 bool IsRenderTarget() const {
225 return is_target;
226 }
227
228 u32 GetRenderTarget() const {
229 return index;
230 }
231
232 bool IsRegistered() const {
233 return is_registered;
234 }
235
236 bool IsPicked() const {
237 return is_picked;
238 }
239
240 void MarkAsRegistered(bool is_reg) {
241 is_registered = is_reg;
242 }
243
244 u64 GetModificationTick() const {
245 return modification_tick;
246 }
247
248 TView EmplaceOverview(const SurfaceParams& overview_params) {
249 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 }
252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
259 const GPUVAddr view_addr,
260 const std::size_t candidate_size, const u32 mipmap,
261 const u32 layer) {
262 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
263 if (!layer_mipmap) {
264 return {};
265 }
266 const auto [end_layer, end_mipmap] = *layer_mipmap;
267 if (layer != end_layer) {
268 if (mipmap == 0 && end_mipmap == 0) {
269 return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
270 }
271 return {};
272 } else {
273 return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
274 }
275 }
276
277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
278 const std::size_t candidate_size) {
279 if (params.target == SurfaceTarget::Texture3D ||
280 view_params.target == SurfaceTarget::Texture3D ||
281 (params.num_levels == 1 && !params.is_layered)) {
282 return {};
283 }
284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
285 if (!layer_mipmap) {
286 return {};
287 }
288 const auto [layer, mipmap] = *layer_mipmap;
289 if (GetMipmapSize(mipmap) != candidate_size) {
290 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
291 }
292 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
293 }
294
295 TView GetMainView() const {
296 return main_view;
297 }
298
299protected:
300 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
301 bool is_astc_supported)
302 : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
303
304 ~SurfaceBase() = default;
305
306 virtual TView CreateView(const ViewParams& view_key) = 0;
307
308 TView main_view;
309 std::unordered_map<ViewParams, TView> views;
310
311private:
312 TView GetView(const ViewParams& key) {
313 const auto [entry, is_cache_miss] = views.try_emplace(key);
314 auto& view{entry->second};
315 if (is_cache_miss) {
316 view = CreateView(key);
317 }
318 return view;
319 }
320
321 static constexpr u32 NO_RT = 0xFFFFFFFF;
322
323 bool is_modified{};
324 bool is_target{};
325 bool is_registered{};
326 bool is_picked{};
327 bool is_memory_marked{};
328 bool is_sync_pending{};
329 u32 index{NO_RT};
330 u64 modification_tick{};
331};
332
333} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index e8515321b..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,444 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "core/core.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/format_lookup_table.h"
15#include "video_core/texture_cache/surface_params.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::SurfaceTarget;
23using VideoCore::Surface::SurfaceTargetFromTextureType;
24using VideoCore::Surface::SurfaceType;
25
26namespace {
27
28SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
29 switch (type) {
30 case Tegra::Shader::TextureType::Texture1D:
31 return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
32 case Tegra::Shader::TextureType::Texture2D:
33 return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
34 case Tegra::Shader::TextureType::Texture3D:
35 ASSERT(!is_array);
36 return SurfaceTarget::Texture3D;
37 case Tegra::Shader::TextureType::TextureCube:
38 return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
39 default:
40 UNREACHABLE();
41 return SurfaceTarget::Texture2D;
42 }
43}
44
45SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
46 switch (type) {
47 case Tegra::Shader::ImageType::Texture1D:
48 return SurfaceTarget::Texture1D;
49 case Tegra::Shader::ImageType::TextureBuffer:
50 return SurfaceTarget::TextureBuffer;
51 case Tegra::Shader::ImageType::Texture1DArray:
52 return SurfaceTarget::Texture1DArray;
53 case Tegra::Shader::ImageType::Texture2D:
54 return SurfaceTarget::Texture2D;
55 case Tegra::Shader::ImageType::Texture2DArray:
56 return SurfaceTarget::Texture2DArray;
57 case Tegra::Shader::ImageType::Texture3D:
58 return SurfaceTarget::Texture3D;
59 default:
60 UNREACHABLE();
61 return SurfaceTarget::Texture2D;
62 }
63}
64
65constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
66 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
67}
68
69} // Anonymous namespace
70
71SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
72 const Tegra::Texture::TICEntry& tic,
73 const VideoCommon::Shader::Sampler& entry) {
74 SurfaceParams params;
75 params.is_tiled = tic.IsTiled();
76 params.srgb_conversion = tic.IsSrgbConversionEnabled();
77 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
78 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
79 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
80 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
81 params.pixel_format = lookup_table.GetPixelFormat(
82 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
83 params.type = GetFormatType(params.pixel_format);
84 if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) {
86 case PixelFormat::R16_UNORM:
87 case PixelFormat::R16_FLOAT:
88 params.pixel_format = PixelFormat::D16_UNORM;
89 break;
90 case PixelFormat::R32_FLOAT:
91 params.pixel_format = PixelFormat::D32_FLOAT;
92 break;
93 default:
94 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
95 static_cast<u32>(params.pixel_format));
96 }
97 params.type = GetFormatType(params.pixel_format);
98 }
99 // TODO: on 1DBuffer we should use the tic info.
100 if (tic.IsBuffer()) {
101 params.target = SurfaceTarget::TextureBuffer;
102 params.width = tic.Width();
103 params.pitch = params.width * params.GetBytesPerPixel();
104 params.height = 1;
105 params.depth = 1;
106 params.num_levels = 1;
107 params.emulated_levels = 1;
108 params.is_layered = false;
109 } else {
110 params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
111 params.width = tic.Width();
112 params.height = tic.Height();
113 params.depth = tic.Depth();
114 params.pitch = params.is_tiled ? 0 : tic.Pitch();
115 if (params.target == SurfaceTarget::TextureCubemap ||
116 params.target == SurfaceTarget::TextureCubeArray) {
117 params.depth *= 6;
118 }
119 params.num_levels = tic.max_mip_level + 1;
120 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
121 params.is_layered = params.IsLayered();
122 }
123 return params;
124}
125
126SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
127 const Tegra::Texture::TICEntry& tic,
128 const VideoCommon::Shader::Image& entry) {
129 SurfaceParams params;
130 params.is_tiled = tic.IsTiled();
131 params.srgb_conversion = tic.IsSrgbConversionEnabled();
132 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
133 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
134 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
135 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
136 params.pixel_format = lookup_table.GetPixelFormat(
137 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
138 params.type = GetFormatType(params.pixel_format);
139 params.target = ImageTypeToSurfaceTarget(entry.type);
140 // TODO: on 1DBuffer we should use the tic info.
141 if (tic.IsBuffer()) {
142 params.target = SurfaceTarget::TextureBuffer;
143 params.width = tic.Width();
144 params.pitch = params.width * params.GetBytesPerPixel();
145 params.height = 1;
146 params.depth = 1;
147 params.num_levels = 1;
148 params.emulated_levels = 1;
149 params.is_layered = false;
150 } else {
151 params.width = tic.Width();
152 params.height = tic.Height();
153 params.depth = tic.Depth();
154 params.pitch = params.is_tiled ? 0 : tic.Pitch();
155 if (params.target == SurfaceTarget::TextureCubemap ||
156 params.target == SurfaceTarget::TextureCubeArray) {
157 params.depth *= 6;
158 }
159 params.num_levels = tic.max_mip_level + 1;
160 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
161 params.is_layered = params.IsLayered();
162 }
163 return params;
164}
165
166SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
167 const auto& regs = maxwell3d.regs;
168 const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
169 const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
170 const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
171 return {
172 .is_tiled = regs.zeta.memory_layout.type ==
173 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
174 .srgb_conversion = false,
175 .is_layered = is_layered,
176 .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
177 .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
178 .block_depth = block_depth,
179 .tile_width_spacing = 1,
180 .width = regs.zeta_width,
181 .height = regs.zeta_height,
182 .depth = is_layered ? regs.zeta_layers.Value() : 1U,
183 .pitch = 0,
184 .num_levels = 1,
185 .emulated_levels = 1,
186 .pixel_format = pixel_format,
187 .type = GetFormatType(pixel_format),
188 .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
189 };
190}
191
192SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
193 std::size_t index) {
194 const auto& config{maxwell3d.regs.rt[index]};
195 SurfaceParams params;
196 params.is_tiled =
197 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
198 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
199 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
200 params.block_width = config.memory_layout.block_width;
201 params.block_height = config.memory_layout.block_height;
202 params.block_depth = config.memory_layout.block_depth;
203 params.tile_width_spacing = 1;
204 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
205 params.type = GetFormatType(params.pixel_format);
206 if (params.is_tiled) {
207 params.pitch = 0;
208 params.width = config.width;
209 } else {
210 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
211 params.pitch = config.width;
212 params.width = params.pitch / bpp;
213 }
214 params.height = config.height;
215 params.num_levels = 1;
216 params.emulated_levels = 1;
217
218 if (config.memory_layout.is_3d != 0) {
219 params.depth = config.layers.Value();
220 params.is_layered = false;
221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
231 return params;
232}
233
234SurfaceParams SurfaceParams::CreateForFermiCopySurface(
235 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
236 const bool is_tiled = !config.linear;
237 const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
238
239 SurfaceParams params{
240 .is_tiled = is_tiled,
241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
243 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
244 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
245 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
246 .tile_width_spacing = 1,
247 .width = config.width,
248 .height = config.height,
249 .depth = 1,
250 .pitch = config.pitch,
251 .num_levels = 1,
252 .emulated_levels = 1,
253 .pixel_format = pixel_format,
254 .type = GetFormatType(pixel_format),
255 // TODO(Rodrigo): Try to guess texture arrays from parameters
256 .target = SurfaceTarget::Texture2D,
257 };
258
259 params.is_layered = params.IsLayered();
260 return params;
261}
262
263VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
264 const VideoCommon::Shader::Sampler& entry) {
265 return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
266}
267
268VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
269 const VideoCommon::Shader::Image& entry) {
270 return ImageTypeToSurfaceTarget(entry.type);
271}
272
273bool SurfaceParams::IsLayered() const {
274 switch (target) {
275 case SurfaceTarget::Texture1DArray:
276 case SurfaceTarget::Texture2DArray:
277 case SurfaceTarget::TextureCubemap:
278 case SurfaceTarget::TextureCubeArray:
279 return true;
280 default:
281 return false;
282 }
283}
284
285// Auto block resizing algorithm from:
286// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
287u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
288 if (level == 0) {
289 return this->block_height;
290 }
291
292 const u32 height_new{GetMipHeight(level)};
293 const u32 default_block_height{GetDefaultBlockHeight()};
294 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
295 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
296 return std::clamp(block_height_new, 3U, 7U) - 3U;
297}
298
299u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
300 if (level == 0) {
301 return this->block_depth;
302 }
303 if (is_layered) {
304 return 0;
305 }
306
307 const u32 depth_new{GetMipDepth(level)};
308 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
309 if (block_depth_new > 4) {
310 return 5 - (GetMipBlockHeight(level) >= 2);
311 }
312 return block_depth_new;
313}
314
315std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
316 std::size_t offset = 0;
317 for (u32 i = 0; i < level; i++) {
318 offset += GetInnerMipmapMemorySize(i, false, false);
319 }
320 return offset;
321}
322
323std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
324 std::size_t offset = 0;
325 if (is_converted) {
326 for (u32 i = 0; i < level; ++i) {
327 offset += GetConvertedMipmapSize(i) * GetNumLayers();
328 }
329 } else {
330 for (u32 i = 0; i < level; ++i) {
331 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
332 }
333 }
334 return offset;
335}
336
337std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
338 constexpr std::size_t rgba8_bpp = 4ULL;
339 const std::size_t mip_width = GetMipWidth(level);
340 const std::size_t mip_height = GetMipHeight(level);
341 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
342 return mip_width * mip_height * mip_depth * rgba8_bpp;
343}
344
345std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
346 std::size_t size = 0;
347 for (u32 level = 0; level < num_levels; ++level) {
348 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
349 }
350 if (is_tiled && is_layered) {
351 return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
352 }
353 return size;
354}
355
356std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
357 bool uncompressed) const {
358 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
359 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
360 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
361 if (is_tiled) {
362 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
363 depth, GetMipBlockHeight(level),
364 GetMipBlockDepth(level));
365 } else if (as_host_size || IsBuffer()) {
366 return GetBytesPerPixel() * width * height * depth;
367 } else {
368 // Linear Texture Case
369 return pitch * height * depth;
370 }
371}
372
373bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
374 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
375 height, depth, pitch, num_levels, pixel_format, type, target) ==
376 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
377 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
378 rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
379}
380
381std::string SurfaceParams::TargetName() const {
382 switch (target) {
383 case SurfaceTarget::Texture1D:
384 return "1D";
385 case SurfaceTarget::TextureBuffer:
386 return "TexBuffer";
387 case SurfaceTarget::Texture2D:
388 return "2D";
389 case SurfaceTarget::Texture3D:
390 return "3D";
391 case SurfaceTarget::Texture1DArray:
392 return "1DArray";
393 case SurfaceTarget::Texture2DArray:
394 return "2DArray";
395 case SurfaceTarget::TextureCubemap:
396 return "Cube";
397 case SurfaceTarget::TextureCubeArray:
398 return "CubeArray";
399 default:
400 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
401 UNREACHABLE();
402 return fmt::format("TUK({})", static_cast<u32>(target));
403 }
404}
405
406u32 SurfaceParams::GetBlockSize() const {
407 const u32 x = 64U << block_width;
408 const u32 y = 8U << block_height;
409 const u32 z = 1U << block_depth;
410 return x * y * z;
411}
412
413std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
414 const u32 x_pixels = 64U / GetBytesPerPixel();
415 const u32 x = x_pixels << block_width;
416 const u32 y = 8U << block_height;
417 return {x, y};
418}
419
420std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
421 const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
422 const u32 block_size = GetBlockSize();
423 const u32 block_index = offset / block_size;
424 const u32 gob_offset = offset % block_size;
425 const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
426 const u32 x_gob_pixels = 64U / GetBytesPerPixel();
427 const u32 x_block_pixels = x_gob_pixels << block_width;
428 const u32 y_block_pixels = 8U << block_height;
429 const u32 z_block_pixels = 1U << block_depth;
430 const u32 x_blocks = div_ceil(width, x_block_pixels);
431 const u32 y_blocks = div_ceil(height, y_block_pixels);
432 const u32 z_blocks = div_ceil(depth, z_block_pixels);
433 const u32 base_x = block_index % x_blocks;
434 const u32 base_y = (block_index / x_blocks) % y_blocks;
435 const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
436 u32 x = base_x * x_block_pixels;
437 u32 y = base_y * y_block_pixels;
438 u32 z = base_z * z_block_pixels;
439 z += gob_index >> block_height;
440 y += (gob_index * 8U) % y_block_pixels;
441 return {x, y, z};
442}
443
444} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 4466c3c34..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,294 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "common/cityhash.h"
12#include "common/common_types.h"
13#include "video_core/engines/fermi_2d.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/surface.h"
17#include "video_core/textures/decoders.h"
18
19namespace VideoCommon {
20
21class FormatLookupTable;
22
23class SurfaceParams {
24public:
25 /// Creates SurfaceCachedParams from a texture configuration.
26 static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
27 const Tegra::Texture::TICEntry& tic,
28 const VideoCommon::Shader::Sampler& entry);
29
30 /// Creates SurfaceCachedParams from an image configuration.
31 static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
32 const Tegra::Texture::TICEntry& tic,
33 const VideoCommon::Shader::Image& entry);
34
35 /// Creates SurfaceCachedParams for a depth buffer configuration.
36 static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
37
38 /// Creates SurfaceCachedParams from a framebuffer configuration.
39 static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
40 std::size_t index);
41
42 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
43 static SurfaceParams CreateForFermiCopySurface(
44 const Tegra::Engines::Fermi2D::Regs::Surface& config);
45
46 /// Obtains the texture target from a shader's sampler entry.
47 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
48 const VideoCommon::Shader::Sampler& entry);
49
50 /// Obtains the texture target from a shader's sampler entry.
51 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
52 const VideoCommon::Shader::Image& entry);
53
54 std::size_t Hash() const {
55 return static_cast<std::size_t>(
56 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
57 }
58
59 bool operator==(const SurfaceParams& rhs) const;
60
61 bool operator!=(const SurfaceParams& rhs) const {
62 return !operator==(rhs);
63 }
64
65 std::size_t GetGuestSizeInBytes() const {
66 return GetInnerMemorySize(false, false, false);
67 }
68
69 std::size_t GetHostSizeInBytes(bool is_converted) const {
70 if (!is_converted) {
71 return GetInnerMemorySize(true, false, false);
72 }
73 // ASTC is uncompressed in software, in emulated as RGBA8
74 std::size_t host_size_in_bytes = 0;
75 for (u32 level = 0; level < num_levels; ++level) {
76 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
77 }
78 return host_size_in_bytes;
79 }
80
81 u32 GetBlockAlignedWidth() const {
82 return Common::AlignUp(width, 64 / GetBytesPerPixel());
83 }
84
85 /// Returns the width of a given mipmap level.
86 u32 GetMipWidth(u32 level) const {
87 return std::max(1U, width >> level);
88 }
89
90 /// Returns the height of a given mipmap level.
91 u32 GetMipHeight(u32 level) const {
92 return std::max(1U, height >> level);
93 }
94
95 /// Returns the depth of a given mipmap level.
96 u32 GetMipDepth(u32 level) const {
97 return is_layered ? depth : std::max(1U, depth >> level);
98 }
99
100 /// Returns the block height of a given mipmap level.
101 u32 GetMipBlockHeight(u32 level) const;
102
103 /// Returns the block depth of a given mipmap level.
104 u32 GetMipBlockDepth(u32 level) const;
105
106 /// Returns the best possible row/pitch alignment for the surface.
107 u32 GetRowAlignment(u32 level, bool is_converted) const {
108 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
109 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
110 }
111
112 /// Returns the offset in bytes in guest memory of a given mipmap level.
113 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
114
115 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
116 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
117
118 /// Returns the size in bytes in guest memory of a given mipmap level.
119 std::size_t GetGuestMipmapSize(u32 level) const {
120 return GetInnerMipmapMemorySize(level, false, false);
121 }
122
123 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
124 std::size_t GetHostMipmapSize(u32 level) const {
125 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
126 }
127
128 std::size_t GetConvertedMipmapSize(u32 level) const;
129
130 /// Get this texture Tegra Block size in guest memory layout
131 u32 GetBlockSize() const;
132
133 /// Get X, Y coordinates max sizes of a single block.
134 std::pair<u32, u32> GetBlockXY() const;
135
136 /// Get the offset in x, y, z coordinates from a memory offset
137 std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
138
139 /// Returns the size of a layer in bytes in guest memory.
140 std::size_t GetGuestLayerSize() const {
141 return GetLayerSize(false, false);
142 }
143
144 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
145 std::size_t GetHostLayerSize(u32 level) const {
146 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
147 return GetInnerMipmapMemorySize(level, true, false);
148 }
149
150 /// Returns the max possible mipmap that the texture can have in host gpu
151 u32 MaxPossibleMipmap() const {
152 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
153 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
154 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
155 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
156 return max_mipmap;
157 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
158 }
159
160 /// Returns if the guest surface is a compressed surface.
161 bool IsCompressed() const {
162 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
163 }
164
165 /// Returns the default block width.
166 u32 GetDefaultBlockWidth() const {
167 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
168 }
169
170 /// Returns the default block height.
171 u32 GetDefaultBlockHeight() const {
172 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
173 }
174
175 /// Returns the bits per pixel.
176 u32 GetBitsPerPixel() const {
177 return VideoCore::Surface::GetFormatBpp(pixel_format);
178 }
179
180 /// Returns the bytes per pixel.
181 u32 GetBytesPerPixel() const {
182 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
183 }
184
185 /// Returns true if the pixel format is a depth and/or stencil format.
186 bool IsPixelFormatZeta() const {
187 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
188 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
189 }
190
191 /// Returns is the surface is a TextureBuffer type of surface.
192 bool IsBuffer() const {
193 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
194 }
195
196 /// Returns the number of layers in the surface.
197 std::size_t GetNumLayers() const {
198 return is_layered ? depth : 1;
199 }
200
201 /// Returns the debug name of the texture for use in graphic debuggers.
202 std::string TargetName() const;
203
204 // Helper used for out of class size calculations
205 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
206 const u32 block_depth) {
207 return Common::AlignBits(out_size,
208 Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
209 }
210
211 /// Converts a width from a type of surface into another. This helps represent the
212 /// equivalent value between compressed/non-compressed textures.
213 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
214 VideoCore::Surface::PixelFormat pixel_format_to) {
215 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
216 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
217 return (width * bw2 + bw1 - 1) / bw1;
218 }
219
220 /// Converts a height from a type of surface into another. This helps represent the
221 /// equivalent value between compressed/non-compressed textures.
222 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
223 VideoCore::Surface::PixelFormat pixel_format_to) {
224 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
225 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
226 return (height * bh2 + bh1 - 1) / bh1;
227 }
228
229 // Finds the maximun possible width between 2 2D layers of different formats
230 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
231 const u32 src_level, const u32 dst_level) {
232 const u32 bw1 = src_params.GetDefaultBlockWidth();
233 const u32 bw2 = dst_params.GetDefaultBlockWidth();
234 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
235 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
236 return std::min(t_src_width, t_dst_width);
237 }
238
239 // Finds the maximun possible height between 2 2D layers of different formats
240 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
241 const u32 src_level, const u32 dst_level) {
242 const u32 bh1 = src_params.GetDefaultBlockHeight();
243 const u32 bh2 = dst_params.GetDefaultBlockHeight();
244 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
245 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
246 return std::min(t_src_height, t_dst_height);
247 }
248
249 bool is_tiled;
250 bool srgb_conversion;
251 bool is_layered;
252 u32 block_width;
253 u32 block_height;
254 u32 block_depth;
255 u32 tile_width_spacing;
256 u32 width;
257 u32 height;
258 u32 depth;
259 u32 pitch;
260 u32 num_levels;
261 u32 emulated_levels;
262 VideoCore::Surface::PixelFormat pixel_format;
263 VideoCore::Surface::SurfaceType type;
264 VideoCore::Surface::SurfaceTarget target;
265
266private:
267 /// Returns the size of a given mipmap level inside a layer.
268 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
269
270 /// Returns the size of all mipmap levels and aligns as needed.
271 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
272 return GetLayerSize(as_host_size, uncompressed) *
273 (layer_only ? 1U : (is_layered ? depth : 1U));
274 }
275
276 /// Returns the size of a layer
277 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
278
279 /// Returns true if these parameters are from a layered surface.
280 bool IsLayered() const;
281};
282
283} // namespace VideoCommon
284
285namespace std {
286
287template <>
288struct hash<VideoCommon::SurfaceParams> {
289 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
290 return k.Hash();
291 }
292};
293
294} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23bool ViewParams::operator!=(const ViewParams& rhs) const {
24 return !operator==(rhs);
25}
26
27} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 90a8bb0ae..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer,
17 u32 num_layers, u32 base_level, u32 num_levels)
18 : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
19 num_levels{num_levels} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24 bool operator!=(const ViewParams& rhs) const;
25
26 bool IsLayered() const {
27 switch (target) {
28 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
29 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
30 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
31 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
32 return true;
33 default:
34 return false;
35 }
36 }
37
38 VideoCore::Surface::SurfaceTarget target{};
39 u32 base_layer{};
40 u32 num_layers{};
41 u32 base_level{};
42 u32 num_levels{};
43};
44
45class ViewBase {
46public:
47 constexpr explicit ViewBase(const ViewParams& params) : params{params} {}
48
49 constexpr const ViewParams& GetViewParams() const {
50 return params;
51 }
52
53protected:
54 ViewParams params;
55};
56
57} // namespace VideoCommon
58
59namespace std {
60
61template <>
62struct hash<VideoCommon::ViewParams> {
63 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
64 return k.Hash();
65 }
66};
67
68} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index ea835c59f..d1080300f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1299 +6,1454 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list> 9#include <bit>
10#include <memory> 10#include <memory>
11#include <mutex> 11#include <mutex>
12#include <set> 12#include <optional>
13#include <tuple> 13#include <span>
14#include <type_traits>
14#include <unordered_map> 15#include <unordered_map>
16#include <utility>
15#include <vector> 17#include <vector>
16 18
17#include <boost/container/small_vector.hpp> 19#include <boost/container/small_vector.hpp>
18#include <boost/icl/interval_map.hpp>
19#include <boost/range/iterator_range.hpp>
20 20
21#include "common/assert.h" 21#include "common/alignment.h"
22#include "common/common_funcs.h"
22#include "common/common_types.h" 23#include "common/common_types.h"
23#include "common/math_util.h" 24#include "common/logging/log.h"
24#include "core/core.h"
25#include "core/memory.h"
26#include "core/settings.h"
27#include "video_core/compatible_formats.h" 25#include "video_core/compatible_formats.h"
26#include "video_core/delayed_destruction_ring.h"
28#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
29#include "video_core/engines/fermi_2d.h" 28#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/kepler_compute.h"
30#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
31#include "video_core/gpu.h"
32#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 32#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h" 33#include "video_core/surface.h"
35#include "video_core/texture_cache/copy_params.h" 34#include "video_core/texture_cache/descriptor_table.h"
36#include "video_core/texture_cache/format_lookup_table.h" 35#include "video_core/texture_cache/format_lookup_table.h"
37#include "video_core/texture_cache/surface_base.h" 36#include "video_core/texture_cache/formatter.h"
38#include "video_core/texture_cache/surface_params.h" 37#include "video_core/texture_cache/image_base.h"
39#include "video_core/texture_cache/surface_view.h" 38#include "video_core/texture_cache/image_info.h"
40 39#include "video_core/texture_cache/image_view_base.h"
41namespace Tegra::Texture { 40#include "video_core/texture_cache/image_view_info.h"
42struct FullTextureInfo; 41#include "video_core/texture_cache/render_targets.h"
43} 42#include "video_core/texture_cache/samples_helper.h"
44 43#include "video_core/texture_cache/slot_vector.h"
45namespace VideoCore { 44#include "video_core/texture_cache/types.h"
46class RasterizerInterface; 45#include "video_core/texture_cache/util.h"
47} 46#include "video_core/textures/texture.h"
48 47
49namespace VideoCommon { 48namespace VideoCommon {
50 49
51using VideoCore::Surface::FormatCompatibility; 50using Tegra::Texture::SwizzleSource;
51using Tegra::Texture::TextureType;
52using Tegra::Texture::TICEntry;
53using Tegra::Texture::TSCEntry;
54using VideoCore::Surface::GetFormatType;
55using VideoCore::Surface::IsCopyCompatible;
52using VideoCore::Surface::PixelFormat; 56using VideoCore::Surface::PixelFormat;
53using VideoCore::Surface::SurfaceTarget; 57using VideoCore::Surface::PixelFormatFromDepthFormat;
54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 58using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
59using VideoCore::Surface::SurfaceType;
55 60
56template <typename TSurface, typename TView> 61template <class P>
57class TextureCache { 62class TextureCache {
58 using VectorSurface = boost::container::small_vector<TSurface, 1>; 63 /// Address shift for caching images into a hash table
64 static constexpr u64 PAGE_BITS = 20;
65
66 /// Enables debugging features to the texture cache
67 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
68 /// Implement blits as copies between framebuffers
69 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
70 /// True when some copies have to be emulated
71 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
72
73 /// Image view ID for null descriptors
74 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
75 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0};
77
78 using Runtime = typename P::Runtime;
79 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc;
81 using ImageView = typename P::ImageView;
82 using Sampler = typename P::Sampler;
83 using Framebuffer = typename P::Framebuffer;
84
85 struct BlitImages {
86 ImageId dst_id;
87 ImageId src_id;
88 PixelFormat dst_format;
89 PixelFormat src_format;
90 };
91
92 template <typename T>
93 struct IdentityHash {
94 [[nodiscard]] size_t operator()(T value) const noexcept {
95 return static_cast<size_t>(value);
96 }
97 };
59 98
60public: 99public:
61 void InvalidateRegion(VAddr addr, std::size_t size) { 100 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
62 std::lock_guard lock{mutex}; 101 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
63 102
64 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 103 /// Notify the cache that a new frame has been queued
65 Unregister(surface); 104 void TickFrame();
66 }
67 }
68 105
69 void OnCPUWrite(VAddr addr, std::size_t size) { 106 /// Return an unique mutually exclusive lock for the cache
70 std::lock_guard lock{mutex}; 107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
71 108
72 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 109 /// Return a constant reference to the given image view id
73 if (surface->IsMemoryMarked()) { 110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
74 UnmarkMemory(surface);
75 surface->SetSyncPending(true);
76 marked_for_unregister.emplace_back(surface);
77 }
78 }
79 }
80 111
81 void SyncGuestHost() { 112 /// Return a reference to the given image view id
82 std::lock_guard lock{mutex}; 113 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
83 114
84 for (const auto& surface : marked_for_unregister) { 115 /// Fill image_view_ids with the graphics images in indices
85 if (surface->IsRegistered()) { 116 void FillGraphicsImageViews(std::span<const u32> indices,
86 surface->SetSyncPending(false); 117 std::span<ImageViewId> image_view_ids);
87 Unregister(surface);
88 }
89 }
90 marked_for_unregister.clear();
91 }
92 118
93 /** 119 /// Fill image_view_ids with the compute images in indices
94 * Guarantees that rendertargets don't unregister themselves if the 120 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
95 * collide. Protection is currently only done on 3D slices.
96 */
97 void GuardRenderTargets(bool new_guard) {
98 guard_render_targets = new_guard;
99 }
100 121
101 void GuardSamplers(bool new_guard) { 122 /// Get the sampler from the graphics descriptor table in the specified index
102 guard_samplers = new_guard; 123 Sampler* GetGraphicsSampler(u32 index);
103 }
104 124
105 void FlushRegion(VAddr addr, std::size_t size) { 125 /// Get the sampler from the compute descriptor table in the specified index
106 std::lock_guard lock{mutex}; 126 Sampler* GetComputeSampler(u32 index);
107 127
108 auto surfaces = GetSurfacesInRegion(addr, size); 128 /// Refresh the state for graphics image view and sampler descriptors
109 if (surfaces.empty()) { 129 void SynchronizeGraphicsDescriptors();
110 return;
111 }
112 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
113 return a->GetModificationTick() < b->GetModificationTick();
114 });
115 for (const auto& surface : surfaces) {
116 mutex.unlock();
117 FlushSurface(surface);
118 mutex.lock();
119 }
120 }
121 130
122 bool MustFlushRegion(VAddr addr, std::size_t size) { 131 /// Refresh the state for compute image view and sampler descriptors
123 std::lock_guard lock{mutex}; 132 void SynchronizeComputeDescriptors();
124 133
125 const auto surfaces = GetSurfacesInRegion(addr, size); 134 /// Update bound render targets and upload memory if necessary
126 return std::any_of(surfaces.cbegin(), surfaces.cend(), 135 /// @param is_clear True when the render targets are being used for clears
127 [](const TSurface& surface) { return surface->IsModified(); }); 136 void UpdateRenderTargets(bool is_clear);
128 }
129 137
130 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 138 /// Find a framebuffer with the currently bound render targets
131 const VideoCommon::Shader::Sampler& entry) { 139 /// UpdateRenderTargets should be called before this
132 std::lock_guard lock{mutex}; 140 Framebuffer* GetFramebuffer();
133 const auto gpu_addr{tic.Address()};
134 if (!gpu_addr) {
135 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
136 }
137 141
138 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 142 /// Mark images in a range as modified from the CPU
139 if (!cpu_addr) { 143 void WriteMemory(VAddr cpu_addr, size_t size);
140 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
141 }
142 144
143 if (!IsTypeCompatible(tic.texture_type, entry)) { 145 /// Download contents of host images to guest memory in a region
144 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 146 void DownloadMemory(VAddr cpu_addr, size_t size);
145 }
146 147
147 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 148 /// Remove images in a region
148 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 149 void UnmapMemory(VAddr cpu_addr, size_t size);
149 if (guard_samplers) {
150 sampled_textures.push_back(surface);
151 }
152 return view;
153 }
154 150
155 TView GetImageSurface(const Tegra::Texture::TICEntry& tic, 151 /// Blit an image with the given parameters
156 const VideoCommon::Shader::Image& entry) { 152 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
157 std::lock_guard lock{mutex}; 153 const Tegra::Engines::Fermi2D::Surface& src,
158 const auto gpu_addr{tic.Address()}; 154 const Tegra::Engines::Fermi2D::Config& copy);
159 if (!gpu_addr) {
160 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
161 }
162 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
163 if (!cpu_addr) {
164 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
165 }
166 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
167 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
168 if (guard_samplers) {
169 sampled_textures.push_back(surface);
170 }
171 return view;
172 }
173 155
174 bool TextureBarrier() { 156 /// Invalidate the contents of the color buffer index
175 const bool any_rt = 157 /// These contents become unspecified, the cache can assume aggressive optimizations.
176 std::any_of(sampled_textures.begin(), sampled_textures.end(), 158 void InvalidateColorBuffer(size_t index);
177 [](const auto& surface) { return surface->IsRenderTarget(); });
178 sampled_textures.clear();
179 return any_rt;
180 }
181 159
182 TView GetDepthBufferSurface(bool preserve_contents) { 160 /// Invalidate the contents of the depth buffer
183 std::lock_guard lock{mutex}; 161 /// These contents become unspecified, the cache can assume aggressive optimizations.
184 auto& dirty = maxwell3d.dirty; 162 void InvalidateDepthBuffer();
185 if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
186 return depth_buffer.view;
187 }
188 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
189 163
190 const auto& regs{maxwell3d.regs}; 164 /// Try to find a cached image view in the given CPU address
191 const auto gpu_addr{regs.zeta.Address()}; 165 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
192 if (!gpu_addr || !regs.zeta_enable) {
193 SetEmptyDepthBuffer();
194 return {};
195 }
196 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
197 if (!cpu_addr) {
198 SetEmptyDepthBuffer();
199 return {};
200 }
201 const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
202 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
203 if (depth_buffer.target)
204 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
205 depth_buffer.target = surface_view.first;
206 depth_buffer.view = surface_view.second;
207 if (depth_buffer.target)
208 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
209 return surface_view.second;
210 }
211
212 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
213 std::lock_guard lock{mutex};
214 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
215 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
216 return render_targets[index].view;
217 }
218 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
219 166
220 const auto& regs{maxwell3d.regs}; 167 /// Return true when there are uncommitted images to be downloaded
221 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 168 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
222 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
223 SetEmptyColorBuffer(index);
224 return {};
225 }
226 169
227 const auto& config{regs.rt[index]}; 170 /// Return true when the caller should wait for async downloads
228 const auto gpu_addr{config.Address()}; 171 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
229 if (!gpu_addr) {
230 SetEmptyColorBuffer(index);
231 return {};
232 }
233 172
234 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 173 /// Commit asynchronous downloads
235 if (!cpu_addr) { 174 void CommitAsyncFlushes();
236 SetEmptyColorBuffer(index); 175
237 return {}; 176 /// Pop asynchronous downloads
238 } 177 void PopAsyncFlushes();
178
179 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
239 181
240 auto surface_view = 182private:
241 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), 183 /// Iterate over all page indices in a range
242 preserve_contents, true); 184 template <typename Func>
243 if (render_targets[index].target) { 185 static void ForEachPage(VAddr addr, size_t size, Func&& func) {
244 auto& surface = render_targets[index].target; 186 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
245 surface->MarkAsRenderTarget(false, NO_RT); 187 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
246 const auto& cr_params = surface->GetSurfaceParams(); 188 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
247 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 189 if constexpr (RETURNS_BOOL) {
248 AsyncFlushSurface(surface); 190 if (func(page)) {
191 break;
192 }
193 } else {
194 func(page);
249 } 195 }
250 } 196 }
251 render_targets[index].target = surface_view.first;
252 render_targets[index].view = surface_view.second;
253 if (render_targets[index].target)
254 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
255 return surface_view.second;
256 } 197 }
257 198
258 void MarkColorBufferInUse(std::size_t index) { 199 /// Fills image_view_ids in the image views in indices
259 if (auto& render_target = render_targets[index].target) { 200 void FillImageViews(DescriptorTable<TICEntry>& table,
260 render_target->MarkAsModified(true, Tick()); 201 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
261 } 202 std::span<ImageViewId> image_view_ids);
262 }
263 203
264 void MarkDepthBufferInUse() { 204 /// Find or create an image view in the guest descriptor table
265 if (depth_buffer.target) { 205 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
266 depth_buffer.target->MarkAsModified(true, Tick()); 206 std::span<ImageViewId> cached_image_view_ids, u32 index);
267 }
268 }
269 207
270 void SetEmptyDepthBuffer() { 208 /// Find or create a framebuffer with the given render target parameters
271 if (depth_buffer.target == nullptr) { 209 FramebufferId GetFramebufferId(const RenderTargets& key);
272 return;
273 }
274 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
275 depth_buffer.target = nullptr;
276 depth_buffer.view = nullptr;
277 }
278 210
279 void SetEmptyColorBuffer(std::size_t index) { 211 /// Refresh the contents (pixel data) of an image
280 if (render_targets[index].target == nullptr) { 212 void RefreshContents(Image& image);
281 return;
282 }
283 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
284 render_targets[index].target = nullptr;
285 render_targets[index].view = nullptr;
286 }
287
288 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
289 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
290 const Tegra::Engines::Fermi2D::Config& copy_config) {
291 std::lock_guard lock{mutex};
292 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
293 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
294 const GPUVAddr src_gpu_addr = src_config.Address();
295 const GPUVAddr dst_gpu_addr = dst_config.Address();
296 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
297
298 const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
299 const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
300 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
301 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
302 ImageBlit(src_surface, dst_surface.second, copy_config);
303 dst_surface.first->MarkAsModified(true, Tick());
304 }
305
306 TSurface TryFindFramebufferSurface(VAddr addr) const {
307 if (!addr) {
308 return nullptr;
309 }
310 const VAddr page = addr >> registry_page_bits;
311 const auto it = registry.find(page);
312 if (it == registry.end()) {
313 return nullptr;
314 }
315 const auto& list = it->second;
316 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
317 return surface->GetCpuAddr() == addr;
318 });
319 return found != list.end() ? *found : nullptr;
320 }
321 213
322 u64 Tick() { 214 /// Upload data from guest to an image
323 return ++ticks; 215 template <typename MapBuffer>
324 } 216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
325 217
326 void CommitAsyncFlushes() { 218 /// Find or create an image view from a guest descriptor
327 committed_flushes.push_back(uncommitted_flushes); 219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
328 uncommitted_flushes.reset();
329 }
330 220
331 bool HasUncommittedFlushes() const { 221 /// Create a new image view from a guest descriptor
332 return uncommitted_flushes != nullptr; 222 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
333 }
334 223
335 bool ShouldWaitAsyncFlushes() const { 224 /// Find or create an image from the given parameters
336 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 225 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
337 } 226 RelaxedOptions options = RelaxedOptions{});
338 227
339 void PopAsyncFlushes() { 228 /// Find an image from the given parameters
340 if (committed_flushes.empty()) { 229 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
341 return; 230 RelaxedOptions options);
342 }
343 auto& flush_list = committed_flushes.front();
344 if (!flush_list) {
345 committed_flushes.pop_front();
346 return;
347 }
348 for (TSurface& surface : *flush_list) {
349 FlushSurface(surface);
350 }
351 committed_flushes.pop_front();
352 }
353 231
354protected: 232 /// Create an image from the given parameters
355 explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, 233 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
356 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 234 RelaxedOptions options);
357 bool is_astc_supported_)
358 : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
359 gpu_memory{gpu_memory_} {
360 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
361 SetEmptyColorBuffer(i);
362 }
363 235
364 SetEmptyDepthBuffer(); 236 /// Create a new image and join perfectly matching existing images
365 staging_cache.SetSize(2); 237 /// Remove joined images from the cache
238 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
366 239
367 const auto make_siblings = [this](PixelFormat a, PixelFormat b) { 240 /// Return a blit image pair from the given guest blit parameters
368 siblings_table[static_cast<std::size_t>(a)] = b; 241 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
369 siblings_table[static_cast<std::size_t>(b)] = a; 242 const Tegra::Engines::Fermi2D::Surface& src);
370 };
371 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
372 make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
373 make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
374 make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
375 243
376 sampled_textures.reserve(64); 244 /// Find or create a sampler from a guest descriptor sampler
377 } 245 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
378 246
379 ~TextureCache() = default; 247 /// Find or create an image view for the given color buffer index
248 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
380 249
381 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; 250 /// Find or create an image view for the depth buffer
251 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
382 252
383 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, 253 /// Find or create a view for a render target with the given image parameters
384 const CopyParams& copy_params) = 0; 254 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
255 bool is_clear);
385 256
386 virtual void ImageBlit(TView& src_view, TView& dst_view, 257 /// Iterates over all the images in a region calling func
387 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 258 template <typename Func>
259 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
388 260
389 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 261 /// Find or create an image view in the given image with the passed parameters
390 // and reading it from a separate buffer. 262 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
391 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
392 263
393 void ManageRenderTargetUnregister(TSurface& surface) { 264 /// Register image in the page table
394 auto& dirty = maxwell3d.dirty; 265 void RegisterImage(ImageId image);
395 const u32 index = surface->GetRenderTarget(); 266
396 if (index == DEPTH_RT) { 267 /// Unregister image from the page table
397 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; 268 void UnregisterImage(ImageId image);
398 } else { 269
399 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; 270 /// Track CPU reads and writes for image
400 } 271 void TrackImage(ImageBase& image);
401 dirty.flags[VideoCommon::Dirty::RenderTargets] = true; 272
273 /// Stop tracking CPU reads and writes for image
274 void UntrackImage(ImageBase& image);
275
276 /// Delete image from the cache
277 void DeleteImage(ImageId image);
278
279 /// Remove image views references from the cache
280 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
281
282 /// Remove framebuffers using the given image views from the cache
283 void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
284
285 /// Mark an image as modified from the GPU
286 void MarkModification(ImageBase& image) noexcept;
287
288 /// Synchronize image aliases, copying data if needed
289 void SynchronizeAliases(ImageId image_id);
290
291 /// Prepare an image to be used
292 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
293
294 /// Prepare an image view to be used
295 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
296
297 /// Execute copies from one image to the other, even if they are incompatible
298 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
299
300 /// Bind an image view as render target, downloading resources preemtively if needed
301 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
302
303 /// Create a render target from a given image and image view parameters
304 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
305 ImageId, const ImageViewInfo& view_info);
306
307 /// Returns true if the current clear parameters clear the whole image of a given image view
308 [[nodiscard]] bool IsFullClear(ImageViewId id);
309
310 Runtime& runtime;
311 VideoCore::RasterizerInterface& rasterizer;
312 Tegra::Engines::Maxwell3D& maxwell3d;
313 Tegra::Engines::KeplerCompute& kepler_compute;
314 Tegra::MemoryManager& gpu_memory;
315
316 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
317 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
318 std::vector<SamplerId> graphics_sampler_ids;
319 std::vector<ImageViewId> graphics_image_view_ids;
320
321 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
322 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
323 std::vector<SamplerId> compute_sampler_ids;
324 std::vector<ImageViewId> compute_image_view_ids;
325
326 RenderTargets render_targets;
327
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
333
334 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
335
336 bool has_deleted_images = false;
337
338 SlotVector<Image> slot_images;
339 SlotVector<ImageView> slot_image_views;
340 SlotVector<ImageAlloc> slot_image_allocs;
341 SlotVector<Sampler> slot_samplers;
342 SlotVector<Framebuffer> slot_framebuffers;
343
344 // TODO: This data structure is not optimal and it should be reworked
345 std::vector<ImageId> uncommitted_downloads;
346 std::queue<std::vector<ImageId>> committed_downloads;
347
348 static constexpr size_t TICKS_TO_DESTROY = 6;
349 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
350 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
351 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
352
353 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
354
355 u64 modification_tick = 0;
356 u64 frame_tick = 0;
357};
358
359template <class P>
360TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
361 Tegra::Engines::Maxwell3D& maxwell3d_,
362 Tegra::Engines::KeplerCompute& kepler_compute_,
363 Tegra::MemoryManager& gpu_memory_)
364 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
365 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
366 // Configure null sampler
367 TSCEntry sampler_descriptor{};
368 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
369 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
370 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
371 sampler_descriptor.cubemap_anisotropy.Assign(1);
372
373 // Make sure the first index is reserved for the null resources
374 // This way the null resource becomes a compile time constant
375 void(slot_image_views.insert(runtime, NullImageParams{}));
376 void(slot_samplers.insert(runtime, sampler_descriptor));
377}
378
379template <class P>
380void TextureCache<P>::TickFrame() {
381 // Tick sentenced resources in this order to ensure they are destroyed in the right order
382 sentenced_images.Tick();
383 sentenced_framebuffers.Tick();
384 sentenced_image_view.Tick();
385 ++frame_tick;
386}
387
388template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id];
396}
397
398template <class P>
399typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
400 return slot_image_views[id];
401}
402
403template <class P>
404void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
405 std::span<ImageViewId> image_view_ids) {
406 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
407}
408
409template <class P>
410void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
411 std::span<ImageViewId> image_view_ids) {
412 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
413}
414
415template <class P>
416typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
417 [[unlikely]] if (index > graphics_sampler_table.Limit()) {
418 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
419 return &slot_samplers[NULL_SAMPLER_ID];
420 }
421 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
422 SamplerId& id = graphics_sampler_ids[index];
423 [[unlikely]] if (is_new) {
424 id = FindSampler(descriptor);
402 } 425 }
426 return &slot_samplers[id];
427}
403 428
404 void Register(TSurface surface) { 429template <class P>
405 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 430typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
406 const std::size_t size = surface->GetSizeInBytes(); 431 [[unlikely]] if (index > compute_sampler_table.Limit()) {
407 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 432 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
408 if (!cpu_addr) { 433 return &slot_samplers[NULL_SAMPLER_ID];
409 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 434 }
410 gpu_addr); 435 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
411 return; 436 SamplerId& id = compute_sampler_ids[index];
412 } 437 [[unlikely]] if (is_new) {
413 surface->SetCpuAddr(*cpu_addr); 438 id = FindSampler(descriptor);
414 RegisterInnerCache(surface);
415 surface->MarkAsRegistered(true);
416 surface->SetMemoryMarked(true);
417 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
418 } 439 }
440 return &slot_samplers[id];
441}
419 442
420 void UnmarkMemory(TSurface surface) { 443template <class P>
421 if (!surface->IsMemoryMarked()) { 444void TextureCache<P>::SynchronizeGraphicsDescriptors() {
422 return; 445 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
423 } 446 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
424 const std::size_t size = surface->GetSizeInBytes(); 447 const u32 tic_limit = maxwell3d.regs.tic.limit;
425 const VAddr cpu_addr = surface->GetCpuAddr(); 448 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
426 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 449 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
427 surface->SetMemoryMarked(false); 450 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
428 } 451 }
452 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
453 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
454 }
455}
429 456
430 void Unregister(TSurface surface) { 457template <class P>
431 if (guard_render_targets && surface->IsProtected()) { 458void TextureCache<P>::SynchronizeComputeDescriptors() {
432 return; 459 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
433 } 460 const u32 tic_limit = kepler_compute.regs.tic.limit;
434 if (!guard_render_targets && surface->IsRenderTarget()) { 461 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
435 ManageRenderTargetUnregister(surface); 462 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
436 } 463 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
437 UnmarkMemory(surface); 464 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
438 if (surface->IsSyncPending()) {
439 marked_for_unregister.remove(surface);
440 surface->SetSyncPending(false);
441 }
442 UnregisterInnerCache(surface);
443 surface->MarkAsRegistered(false);
444 ReserveSurface(surface->GetSurfaceParams(), surface);
445 } 465 }
466 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
467 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
468 }
469}
446 470
447 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 471template <class P>
448 if (const auto surface = TryGetReservedSurface(params); surface) { 472void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
449 surface->SetGpuAddr(gpu_addr); 473 using namespace VideoCommon::Dirty;
450 return surface; 474 auto& flags = maxwell3d.dirty.flags;
451 } 475 if (!flags[Dirty::RenderTargets]) {
452 // No reserved surface available, create a new one and reserve it 476 return;
453 auto new_surface{CreateSurface(gpu_addr, params)};
454 return new_surface;
455 } 477 }
478 flags[Dirty::RenderTargets] = false;
456 479
457 const bool is_astc_supported; 480 // Render target control is used on all render targets, so force look ups when this one is up
481 const bool force = flags[Dirty::RenderTargetControl];
482 flags[Dirty::RenderTargetControl] = false;
458 483
459private: 484 for (size_t index = 0; index < NUM_RT; ++index) {
460 enum class RecycleStrategy : u32 { 485 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
461 Ignore = 0, 486 if (flags[Dirty::ColorBuffer0 + index] || force) {
462 Flush = 1, 487 flags[Dirty::ColorBuffer0 + index] = false;
463 BufferCopy = 3, 488 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
464 }; 489 }
490 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
491 }
492 if (flags[Dirty::ZetaBuffer] || force) {
493 flags[Dirty::ZetaBuffer] = false;
494 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
495 }
496 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
497 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
465 498
466 enum class DeductionType : u32 { 499 for (size_t index = 0; index < NUM_RT; ++index) {
467 DeductionComplete, 500 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
468 DeductionIncomplete, 501 }
469 DeductionFailed, 502 render_targets.size = Extent2D{
503 maxwell3d.regs.render_area.width,
504 maxwell3d.regs.render_area.height,
470 }; 505 };
506}
471 507
472 struct Deduction { 508template <class P>
473 DeductionType type{DeductionType::DeductionFailed}; 509typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
474 TSurface surface{}; 510 return &slot_framebuffers[GetFramebufferId(render_targets)];
511}
475 512
476 bool Failed() const { 513template <class P>
477 return type == DeductionType::DeductionFailed; 514void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
478 } 515 std::span<ImageViewId> cached_image_view_ids,
516 std::span<const u32> indices,
517 std::span<ImageViewId> image_view_ids) {
518 ASSERT(indices.size() <= image_view_ids.size());
519 do {
520 has_deleted_images = false;
521 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
522 return VisitImageView(table, cached_image_view_ids, index);
523 });
524 } while (has_deleted_images);
525}
479 526
480 bool Incomplete() const { 527template <class P>
481 return type == DeductionType::DeductionIncomplete; 528ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
482 } 529 std::span<ImageViewId> cached_image_view_ids,
530 u32 index) {
531 if (index > table.Limit()) {
532 LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
533 return NULL_IMAGE_VIEW_ID;
534 }
535 const auto [descriptor, is_new] = table.Read(index);
536 ImageViewId& image_view_id = cached_image_view_ids[index];
537 if (is_new) {
538 image_view_id = FindImageView(descriptor);
539 }
540 if (image_view_id != NULL_IMAGE_VIEW_ID) {
541 PrepareImageView(image_view_id, false, false);
542 }
543 return image_view_id;
544}
483 545
484 bool IsDepth() const { 546template <class P>
485 return surface->GetSurfaceParams().IsPixelFormatZeta(); 547FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
486 } 548 const auto [pair, is_new] = framebuffers.try_emplace(key);
487 }; 549 FramebufferId& framebuffer_id = pair->second;
550 if (!is_new) {
551 return framebuffer_id;
552 }
553 std::array<ImageView*, NUM_RT> color_buffers;
554 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
555 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
556 ImageView* const depth_buffer =
557 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
558 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
559 return framebuffer_id;
560}
488 561
489 /** 562template <class P>
490 * Takes care of selecting a proper strategy to deal with a texture recycle. 563void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
491 * 564 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
492 * @param overlaps The overlapping surfaces registered in the cache. 565 if (True(image.flags & ImageFlagBits::CpuModified)) {
493 * @param params The parameters on the new surface. 566 return;
494 * @param gpu_addr The starting address of the new surface.
495 * @param untopological Indicates to the recycler that the texture has no way
496 * to match the overlaps due to topological reasons.
497 **/
498 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
499 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
500 if (Settings::IsGPULevelExtreme()) {
501 return RecycleStrategy::Flush;
502 }
503 // 3D Textures decision
504 if (params.target == SurfaceTarget::Texture3D) {
505 return RecycleStrategy::Flush;
506 }
507 for (const auto& s : overlaps) {
508 const auto& s_params = s->GetSurfaceParams();
509 if (s_params.target == SurfaceTarget::Texture3D) {
510 return RecycleStrategy::Flush;
511 }
512 }
513 // Untopological decision
514 if (untopological == MatchTopologyResult::CompressUnmatch) {
515 return RecycleStrategy::Flush;
516 }
517 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
518 return RecycleStrategy::Flush;
519 }
520 return RecycleStrategy::Ignore;
521 }
522
523 /**
524 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
525 * strategies: Ignore and Flush.
526 *
527 * - Ignore: Just unregisters all the overlaps and loads the new texture.
528 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
529 *
530 * @param overlaps The overlapping surfaces registered in the cache.
531 * @param params The parameters for the new surface.
532 * @param gpu_addr The starting address of the new surface.
533 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
534 * blank.
535 * @param untopological Indicates to the recycler that the texture has no way to match the
536 * overlaps due to topological reasons.
537 **/
538 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
539 const GPUVAddr gpu_addr, const bool preserve_contents,
540 const MatchTopologyResult untopological) {
541 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
542 for (auto& surface : overlaps) {
543 Unregister(surface);
544 }
545 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
546 case RecycleStrategy::Ignore: {
547 return InitializeSurface(gpu_addr, params, do_load);
548 }
549 case RecycleStrategy::Flush: {
550 std::sort(overlaps.begin(), overlaps.end(),
551 [](const TSurface& a, const TSurface& b) -> bool {
552 return a->GetModificationTick() < b->GetModificationTick();
553 });
554 for (auto& surface : overlaps) {
555 FlushSurface(surface);
556 }
557 return InitializeSurface(gpu_addr, params, preserve_contents);
558 } 567 }
559 case RecycleStrategy::BufferCopy: { 568 image.flags |= ImageFlagBits::CpuModified;
560 auto new_surface = GetUncachedSurface(gpu_addr, params); 569 UntrackImage(image);
561 BufferCopy(overlaps[0], new_surface); 570 });
562 return {new_surface, new_surface->GetMainView()}; 571}
572
573template <class P>
574void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
575 std::vector<ImageId> images;
576 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
577 // Skip images that were not modified from the GPU
578 if (False(image.flags & ImageFlagBits::GpuModified)) {
579 return;
563 } 580 }
564 default: { 581 // Skip images that .are. modified from the CPU
565 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 582 // We don't want to write sensitive data from the guest
566 return InitializeSurface(gpu_addr, params, do_load); 583 if (True(image.flags & ImageFlagBits::CpuModified)) {
584 return;
567 } 585 }
586 if (image.info.num_samples > 1) {
587 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
588 return;
568 } 589 }
590 image.flags &= ~ImageFlagBits::GpuModified;
591 images.push_back(image_id);
592 });
593 if (images.empty()) {
594 return;
595 }
596 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
597 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
598 });
599 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies);
604 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
569 } 606 }
607}
570 608
571 /** 609template <class P>
572 * Takes a single surface and recreates into another that may differ in 610void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
573 * format, target or width alignment. 611 std::vector<ImageId> deleted_images;
574 * 612 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
575 * @param current_surface The registered surface in the cache which we want to convert. 613 for (const ImageId id : deleted_images) {
576 * @param params The new surface params which we'll use to recreate the surface. 614 Image& image = slot_images[id];
577 * @param is_render Whether or not the surface is a render target. 615 if (True(image.flags & ImageFlagBits::Tracked)) {
578 **/ 616 UntrackImage(image);
579 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 617 }
580 bool is_render) { 618 UnregisterImage(id);
581 const auto gpu_addr = current_surface->GetGpuAddr(); 619 DeleteImage(id);
582 const auto& cr_params = current_surface->GetSurfaceParams(); 620 }
583 TSurface new_surface; 621}
584 if (cr_params.pixel_format != params.pixel_format && !is_render &&
585 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
586 SurfaceParams new_params = params;
587 new_params.pixel_format = cr_params.pixel_format;
588 new_params.type = cr_params.type;
589 new_surface = GetUncachedSurface(gpu_addr, new_params);
590 } else {
591 new_surface = GetUncachedSurface(gpu_addr, params);
592 }
593 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
594 if (cr_params.type != final_params.type) {
595 if (Settings::IsGPULevelExtreme()) {
596 BufferCopy(current_surface, new_surface);
597 }
598 } else {
599 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
600 for (auto& brick : bricks) {
601 TryCopyImage(current_surface, new_surface, brick);
602 }
603 }
604 Unregister(current_surface);
605 Register(new_surface);
606 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
607 return {new_surface, new_surface->GetMainView()};
608 }
609
610 /**
611 * Takes a single surface and checks with the new surface's params if it's an exact
612 * match, we return the main view of the registered surface. If its formats don't
613 * match, we rebuild the surface. We call this last method a `Mirage`. If formats
614 * match but the targets don't, we create an overview View of the registered surface.
615 *
616 * @param current_surface The registered surface in the cache which we want to convert.
617 * @param params The new surface params which we want to check.
618 * @param is_render Whether or not the surface is a render target.
619 **/
620 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
621 const SurfaceParams& params, bool is_render) {
622 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
623 const bool matches_target = current_surface->MatchTarget(params.target);
624 const auto match_check = [&]() -> std::pair<TSurface, TView> {
625 if (matches_target) {
626 return {current_surface, current_surface->GetMainView()};
627 }
628 return {current_surface, current_surface->EmplaceOverview(params)};
629 };
630 if (!is_mirage) {
631 return match_check();
632 }
633 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
634 return match_check();
635 }
636 return RebuildSurface(current_surface, params, is_render);
637 }
638
639 /**
640 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
641 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
642 * of the new surface, if they all match we end up recreating a surface for them,
643 * else we return nothing.
644 *
645 * @param overlaps The overlapping surfaces registered in the cache.
646 * @param params The parameters on the new surface.
647 * @param gpu_addr The starting address of the new surface.
648 **/
649 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
650 const SurfaceParams& params,
651 GPUVAddr gpu_addr) {
652 if (params.target == SurfaceTarget::Texture3D) {
653 return std::nullopt;
654 }
655 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
656 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
657 622
658 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { 623template <class P>
659 LoadSurface(new_surface); 624void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
660 for (const auto& surface : overlaps) { 625 const Tegra::Engines::Fermi2D::Surface& src,
661 Unregister(surface); 626 const Tegra::Engines::Fermi2D::Config& copy) {
662 } 627 const BlitImages images = GetBlitImages(dst, src);
663 Register(new_surface); 628 const ImageId dst_id = images.dst_id;
664 return {{new_surface, new_surface->GetMainView()}}; 629 const ImageId src_id = images.src_id;
665 } 630 PrepareImage(src_id, false, false);
631 PrepareImage(dst_id, true, false);
632
633 ImageBase& dst_image = slot_images[dst_id];
634 const ImageBase& src_image = slot_images[src_id];
635
636 // TODO: Deduplicate
637 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
638 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
639 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
640 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
641 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
642 const std::array src_region{
643 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
644 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
645 };
666 646
667 std::size_t passed_tests = 0; 647 const std::optional src_base = src_image.TryFindBase(src.Address());
668 for (auto& surface : overlaps) { 648 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
669 const SurfaceParams& src_params = surface->GetSurfaceParams(); 649 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
670 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; 650 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
671 if (!mipmap_layer) { 651 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
672 continue; 652 const std::array dst_region{
673 } 653 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
674 const auto [base_layer, base_mipmap] = *mipmap_layer; 654 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
675 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { 655 };
676 continue;
677 }
678 ++passed_tests;
679
680 // Copy all mipmaps and layers
681 const u32 block_width = params.GetDefaultBlockWidth();
682 const u32 block_height = params.GetDefaultBlockHeight();
683 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
684 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
685 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
686 if (width < block_width || height < block_height) {
687 // Current APIs forbid copying small compressed textures, avoid errors
688 break;
689 }
690 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
691 src_params.depth);
692 TryCopyImage(surface, new_surface, copy_params);
693 }
694 }
695 if (passed_tests == 0) {
696 return std::nullopt;
697 }
698 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
699 // In Accurate GPU all tests should pass, else we recycle
700 return std::nullopt;
701 }
702 656
703 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); 657 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
704 for (const auto& surface : overlaps) { 658 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
705 Unregister(surface); 659 if constexpr (FRAMEBUFFER_BLITS) {
706 } 660 // OpenGL blits from framebuffers, not images
661 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
662 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
663 copy.filter, copy.operation);
664 } else {
665 // Vulkan can blit images, but it lacks format reinterpretations
666 // Provide a framebuffer in case it's necessary
667 ImageView& dst_view = slot_image_views[dst_view_id];
668 ImageView& src_view = slot_image_views[src_view_id];
669 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
670 copy.operation);
671 }
672}
707 673
708 new_surface->MarkAsModified(modified, Tick()); 674template <class P>
709 Register(new_surface); 675void TextureCache<P>::InvalidateColorBuffer(size_t index) {
710 return {{new_surface, new_surface->GetMainView()}}; 676 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
711 } 677 color_buffer_id = FindColorBuffer(index, false);
712 678 if (!color_buffer_id) {
713 /** 679 LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
714 * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D 680 return;
715 * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of 681 }
716 * the HLE methods. 682 // When invalidating a color buffer, the old contents are no longer relevant
717 * 683 ImageView& color_buffer = slot_image_views[color_buffer_id];
718 * @param overlaps The overlapping surfaces registered in the cache. 684 Image& image = slot_images[color_buffer.image_id];
719 * @param params The parameters on the new surface. 685 image.flags &= ~ImageFlagBits::CpuModified;
720 * @param gpu_addr The starting address of the new surface. 686 image.flags &= ~ImageFlagBits::GpuModified;
721 * @param cpu_addr The starting address of the new surface on physical memory.
722 * @param preserve_contents Indicates that the new surface should be loaded from memory or
723 * left blank.
724 */
725 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
726 const SurfaceParams& params,
727 GPUVAddr gpu_addr, VAddr cpu_addr,
728 bool preserve_contents) {
729 if (params.target != SurfaceTarget::Texture3D) {
730 for (const auto& surface : overlaps) {
731 if (!surface->MatchTarget(params.target)) {
732 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
733 if (Settings::IsGPULevelExtreme()) {
734 return std::nullopt;
735 }
736 Unregister(surface);
737 return InitializeSurface(gpu_addr, params, preserve_contents);
738 }
739 return std::nullopt;
740 }
741 if (surface->GetCpuAddr() != cpu_addr) {
742 continue;
743 }
744 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
745 return std::make_pair(surface, surface->GetMainView());
746 }
747 }
748 return InitializeSurface(gpu_addr, params, preserve_contents);
749 }
750 687
751 if (params.num_levels > 1) { 688 runtime.InvalidateColorBuffer(color_buffer, index);
752 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach 689}
753 return std::nullopt;
754 }
755 690
756 if (overlaps.size() == 1) { 691template <class P>
757 const auto& surface = overlaps[0]; 692void TextureCache<P>::InvalidateDepthBuffer() {
758 const SurfaceParams& overlap_params = surface->GetSurfaceParams(); 693 ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
759 // Don't attempt to render to textures with more than one level for now 694 depth_buffer_id = FindDepthBuffer(false);
760 // The texture has to be to the right or the sample address if we want to render to it 695 if (!depth_buffer_id) {
761 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { 696 LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
762 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); 697 return;
763 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 698 }
764 if (slice < overlap_params.depth) { 699 // When invalidating the depth buffer, the old contents are no longer relevant
765 auto view = surface->Emplace3DView(slice, params.depth, 0, 1); 700 ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
766 return std::make_pair(std::move(surface), std::move(view)); 701 image.flags &= ~ImageFlagBits::CpuModified;
767 } 702 image.flags &= ~ImageFlagBits::GpuModified;
768 }
769 }
770 703
771 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 704 ImageView& depth_buffer = slot_image_views[depth_buffer_id];
772 bool modified = false; 705 runtime.InvalidateDepthBuffer(depth_buffer);
706}
773 707
774 for (auto& surface : overlaps) { 708template <class P>
775 const SurfaceParams& src_params = surface->GetSurfaceParams(); 709typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
776 if (src_params.target != SurfaceTarget::Texture2D || 710 // TODO: Properly implement this
777 src_params.height != params.height || 711 const auto it = page_table.find(cpu_addr >> PAGE_BITS);
778 src_params.block_depth != params.block_depth || 712 if (it == page_table.end()) {
779 src_params.block_height != params.block_height) { 713 return nullptr;
780 return std::nullopt; 714 }
781 } 715 const auto& image_ids = it->second;
782 modified |= surface->IsModified(); 716 for (const ImageId image_id : image_ids) {
783 717 const ImageBase& image = slot_images[image_id];
784 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); 718 if (image.cpu_addr != cpu_addr) {
785 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 719 continue;
786 const u32 width = params.width;
787 const u32 height = params.height;
788 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
789 TryCopyImage(surface, new_surface, copy_params);
790 } 720 }
791 for (const auto& surface : overlaps) { 721 if (image.image_view_ids.empty()) {
792 Unregister(surface); 722 continue;
793 } 723 }
794 new_surface->MarkAsModified(modified, Tick()); 724 return &slot_image_views[image.image_view_ids.at(0)];
795 Register(new_surface); 725 }
796 726 return nullptr;
797 TView view = new_surface->GetMainView(); 727}
798 return std::make_pair(std::move(new_surface), std::move(view));
799 }
800
801 /**
802 * Gets the starting address and parameters of a candidate surface and tries
803 * to find a matching surface within the cache. This is done in 3 big steps:
804 *
805 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
806 *
807 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
808 * memory else we move to step 3.
809 *
810 * 3. Consists of figuring out the relationship between the candidate texture and the
811 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
812 * there's many, we just try to reconstruct a new surface out of them based on the
813 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
814 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
815 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
816 * a new surface.
817 *
818 * @param gpu_addr The starting address of the candidate surface.
819 * @param params The parameters on the candidate surface.
820 * @param preserve_contents Indicates that the new surface should be loaded from memory or
821 * left blank.
822 * @param is_render Whether or not the surface is a render target.
823 **/
824 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
825 const SurfaceParams& params, bool preserve_contents,
826 bool is_render) {
827 // Step 1
828 // Check Level 1 Cache for a fast structural match. If candidate surface
829 // matches at certain level we are pretty much done.
830 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
831 TSurface& current_surface = iter->second;
832 const auto topological_result = current_surface->MatchesTopology(params);
833 if (topological_result != MatchTopologyResult::FullMatch) {
834 VectorSurface overlaps{current_surface};
835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
836 topological_result);
837 }
838 728
839 const auto struct_result = current_surface->MatchesStructure(params); 729template <class P>
840 if (struct_result != MatchStructureResult::None) { 730bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
841 const auto& old_params = current_surface->GetSurfaceParams(); 731 return !uncommitted_downloads.empty();
842 const bool not_3d = params.target != SurfaceTarget::Texture3D && 732}
843 old_params.target != SurfaceTarget::Texture3D;
844 if (not_3d || current_surface->MatchTarget(params.target)) {
845 if (struct_result == MatchStructureResult::FullMatch) {
846 return ManageStructuralMatch(current_surface, params, is_render);
847 } else {
848 return RebuildSurface(current_surface, params, is_render);
849 }
850 }
851 }
852 }
853 733
854 // Step 2 734template <class P>
855 // Obtain all possible overlaps in the memory region 735bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
856 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 736 return !committed_downloads.empty() && !committed_downloads.front().empty();
857 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; 737}
858 738
859 // If none are found, we are done. we just load the surface and create it. 739template <class P>
860 if (overlaps.empty()) { 740void TextureCache<P>::CommitAsyncFlushes() {
861 return InitializeSurface(gpu_addr, params, preserve_contents); 741 // This is intentionally passing the value by copy
862 } 742 committed_downloads.push(uncommitted_downloads);
743 uncommitted_downloads.clear();
744}
863 745
864 // Step 3 746template <class P>
865 // Now we need to figure the relationship between the texture and its overlaps 747void TextureCache<P>::PopAsyncFlushes() {
866 // we do a topological test to ensure we can find some relationship. If it fails 748 if (committed_downloads.empty()) {
867 // immediately recycle the texture 749 return;
868 for (const auto& surface : overlaps) { 750 }
869 const auto topological_result = surface->MatchesTopology(params); 751 const std::span<const ImageId> download_ids = committed_downloads.front();
870 if (topological_result != MatchTopologyResult::FullMatch) { 752 if (download_ids.empty()) {
871 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 753 committed_downloads.pop();
872 topological_result); 754 return;
873 } 755 }
874 } 756 size_t total_size_bytes = 0;
757 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
761 size_t buffer_offset = 0;
762 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies);
766 buffer_offset += image.unswizzled_size_bytes;
767 }
768 // Wait for downloads to finish
769 runtime.Finish();
770
771 buffer_offset = 0;
772 const std::span<u8> download_span = download_map.Span();
773 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
778 buffer_offset += image.unswizzled_size_bytes;
779 }
780 committed_downloads.pop();
781}
875 782
876 // Manage 3D textures 783template <class P>
877 if (params.block_depth > 0) { 784bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
878 auto surface = 785 bool is_modified = false;
879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 786 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
880 if (surface) { 787 if (False(image.flags & ImageFlagBits::GpuModified)) {
881 return *surface; 788 return false;
882 }
883 } 789 }
790 is_modified = true;
791 return true;
792 });
793 return is_modified;
794}
884 795
885 // Split cases between 1 overlap or many. 796template <class P>
886 if (overlaps.size() == 1) { 797void TextureCache<P>::RefreshContents(Image& image) {
887 TSurface current_surface = overlaps[0]; 798 if (False(image.flags & ImageFlagBits::CpuModified)) {
888 // First check if the surface is within the overlap. If not, it means 799 // Only upload modified images
889 // two things either the candidate surface is a supertexture of the overlap 800 return;
890 // or they don't match in any known way. 801 }
891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 802 image.flags &= ~ImageFlagBits::CpuModified;
892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); 803 TrackImage(image);
893 if (view) {
894 return *view;
895 }
896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
897 MatchTopologyResult::FullMatch);
898 }
899 // Now we check if the candidate is a mipmap/layer of the overlap
900 std::optional<TView> view =
901 current_surface->EmplaceView(params, gpu_addr, candidate_size);
902 if (view) {
903 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
904 if (is_mirage) {
905 // On a mirage view, we need to recreate the surface under this new view
906 // and then obtain a view again.
907 SurfaceParams new_params = current_surface->GetSurfaceParams();
908 const u32 wh = SurfaceParams::ConvertWidth(
909 new_params.width, new_params.pixel_format, params.pixel_format);
910 const u32 hh = SurfaceParams::ConvertHeight(
911 new_params.height, new_params.pixel_format, params.pixel_format);
912 new_params.width = wh;
913 new_params.height = hh;
914 new_params.pixel_format = params.pixel_format;
915 std::pair<TSurface, TView> pair =
916 RebuildSurface(current_surface, new_params, is_render);
917 std::optional<TView> mirage_view =
918 pair.first->EmplaceView(params, gpu_addr, candidate_size);
919 if (mirage_view)
920 return {pair.first, *mirage_view};
921 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
922 MatchTopologyResult::FullMatch);
923 }
924 return {current_surface, *view};
925 }
926 } else {
927 // If there are many overlaps, odds are they are subtextures of the candidate
928 // surface. We try to construct a new surface based on the candidate parameters,
929 // using the overlaps. If a single overlap fails, this will fail.
930 std::optional<std::pair<TSurface, TView>> view =
931 TryReconstructSurface(overlaps, params, gpu_addr);
932 if (view) {
933 return *view;
934 }
935 }
936 // We failed all the tests, recycle the overlaps into a new texture.
937 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
938 MatchTopologyResult::FullMatch);
939 }
940
941 /**
942 * Gets the starting address and parameters of a candidate surface and tries to find a
943 * matching surface within the cache that's similar to it. If there are many textures
944 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
945 * blit will be unsuccessful.
946 *
947 * @param gpu_addr The starting address of the candidate surface.
948 * @param params The parameters on the candidate surface.
949 **/
950 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
951 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
952
953 if (!cpu_addr) {
954 Deduction result{};
955 result.type = DeductionType::DeductionFailed;
956 return result;
957 }
958 804
959 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { 805 if (image.info.num_samples > 1) {
960 TSurface& current_surface = iter->second; 806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
961 const auto topological_result = current_surface->MatchesTopology(params); 807 return;
962 if (topological_result != MatchTopologyResult::FullMatch) { 808 }
963 Deduction result{}; 809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
964 result.type = DeductionType::DeductionFailed; 810 UploadImageContents(image, map, 0);
965 return result; 811 runtime.InsertUploadMemoryBarrier();
966 } 812}
967 const auto struct_result = current_surface->MatchesStructure(params);
968 if (struct_result != MatchStructureResult::None &&
969 current_surface->MatchTarget(params.target)) {
970 Deduction result{};
971 result.type = DeductionType::DeductionComplete;
972 result.surface = current_surface;
973 return result;
974 }
975 }
976 813
977 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 814template <class P>
978 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; 815template <typename MapBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
818 const GPUVAddr gpu_addr = image.gpu_addr;
819
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies);
829 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies);
832 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies);
835 }
836}
979 837
980 if (overlaps.empty()) { 838template <class P>
981 Deduction result{}; 839ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
982 result.type = DeductionType::DeductionIncomplete; 840 if (!IsValidAddress(gpu_memory, config)) {
983 return result; 841 return NULL_IMAGE_VIEW_ID;
984 } 842 }
843 const auto [pair, is_new] = image_views.try_emplace(config);
844 ImageViewId& image_view_id = pair->second;
845 if (is_new) {
846 image_view_id = CreateImageView(config);
847 }
848 return image_view_id;
849}
985 850
986 if (overlaps.size() > 1) { 851template <class P>
987 Deduction result{}; 852ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
988 result.type = DeductionType::DeductionFailed; 853 const ImageInfo info(config);
989 return result; 854 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
990 } else { 855 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
991 Deduction result{}; 856 if (!image_id) {
992 result.type = DeductionType::DeductionComplete; 857 return NULL_IMAGE_VIEW_ID;
993 result.surface = overlaps[0];
994 return result;
995 }
996 } 858 }
859 ImageBase& image = slot_images[image_id];
860 const SubresourceBase base = image.TryFindBase(config.Address()).value();
861 ASSERT(base.level == 0);
862 const ImageViewInfo view_info(config, base.layer);
863 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
864 ImageViewBase& image_view = slot_image_views[image_view_id];
865 image_view.flags |= ImageViewFlagBits::Strong;
866 image.flags |= ImageFlagBits::Strong;
867 return image_view_id;
868}
997 869
998 /** 870template <class P>
999 * Gets a null surface based on a target texture. 871ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1000 * @param target The target of the null surface. 872 RelaxedOptions options) {
1001 */ 873 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
1002 TView GetNullSurface(SurfaceTarget target) { 874 return image_id;
1003 const u32 i_target = static_cast<u32>(target); 875 }
1004 if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { 876 return InsertImage(info, gpu_addr, options);
1005 return it->second->GetMainView(); 877}
1006 } 878
1007 SurfaceParams params{}; 879template <class P>
1008 params.target = target; 880ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1009 params.is_tiled = false; 881 RelaxedOptions options) {
1010 params.srgb_conversion = false; 882 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1011 params.is_layered = 883 if (!cpu_addr) {
1012 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || 884 return ImageId{};
1013 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; 885 }
1014 params.block_width = 0; 886 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1015 params.block_height = 0; 887 ImageId image_id;
1016 params.block_depth = 0; 888 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1017 params.tile_width_spacing = 1; 889 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1018 params.width = 1; 890 const bool strict_size = False(options & RelaxedOptions::Size) &&
1019 params.height = 1; 891 True(existing_image.flags & ImageFlagBits::Strong);
1020 params.depth = 1; 892 const ImageInfo& existing = existing_image.info;
1021 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { 893 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
1022 params.depth = 6; 894 existing.pitch == info.pitch &&
1023 } 895 IsPitchLinearSameSize(existing, info, strict_size) &&
1024 params.pitch = 4; 896 IsViewCompatible(existing.format, info.format, broken_views)) {
1025 params.num_levels = 1; 897 image_id = existing_image_id;
1026 params.emulated_levels = 1; 898 return true;
1027 params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; 899 }
1028 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 900 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) {
1029 auto surface = CreateSurface(0ULL, params); 901 image_id = existing_image_id;
1030 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 902 return true;
1031 surface->UploadTexture(invalid_memory);
1032 surface->MarkAsModified(false, Tick());
1033 invalid_cache.emplace(i_target, surface);
1034 return surface->GetMainView();
1035 }
1036
1037 /**
1038 * Gets the a source and destination starting address and parameters,
1039 * and tries to deduce if they are supposed to be depth textures. If so, their
1040 * parameters are modified and fixed into so.
1041 *
1042 * @param src_params The parameters of the candidate surface.
1043 * @param dst_params The parameters of the destination surface.
1044 * @param src_gpu_addr The starting address of the candidate surface.
1045 * @param dst_gpu_addr The starting address of the destination surface.
1046 **/
1047 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1048 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1049 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1050 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1051 if (deduced_src.Failed() || deduced_dst.Failed()) {
1052 return;
1053 } 903 }
904 return false;
905 };
906 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
907 return image_id;
908}
1054 909
1055 const bool incomplete_src = deduced_src.Incomplete(); 910template <class P>
1056 const bool incomplete_dst = deduced_dst.Incomplete(); 911ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
912 RelaxedOptions options) {
913 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
914 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
915 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
916 const Image& image = slot_images[image_id];
917 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
918 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
919 if (is_new) {
920 it->second = slot_image_allocs.insert();
921 }
922 slot_image_allocs[it->second].images.push_back(image_id);
923 return image_id;
924}
1057 925
1058 if (incomplete_src && incomplete_dst) { 926template <class P>
927ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
928 ImageInfo new_info = info;
929 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
930 const bool broken_views = runtime.HasBrokenTextureViewFormats();
931 std::vector<ImageId> overlap_ids;
932 std::vector<ImageId> left_aliased_ids;
933 std::vector<ImageId> right_aliased_ids;
934 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
935 if (info.type != overlap.info.type) {
1059 return; 936 return;
1060 } 937 }
1061 938 if (info.type == ImageType::Linear) {
1062 const bool any_incomplete = incomplete_src || incomplete_dst; 939 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1063 940 // Alias linear images with the same pitch
1064 if (!any_incomplete) { 941 left_aliased_ids.push_back(overlap_id);
1065 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
1066 return;
1067 }
1068 } else {
1069 if (incomplete_src && !(deduced_dst.IsDepth())) {
1070 return;
1071 }
1072
1073 if (incomplete_dst && !(deduced_src.IsDepth())) {
1074 return;
1075 } 942 }
943 return;
1076 } 944 }
1077 945 static constexpr bool strict_size = true;
1078 const auto inherit_format = [](SurfaceParams& to, TSurface from) { 946 const std::optional<OverlapResult> solution =
1079 const SurfaceParams& params = from->GetSurfaceParams(); 947 ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views);
1080 to.pixel_format = params.pixel_format; 948 if (solution) {
1081 to.type = params.type; 949 gpu_addr = solution->gpu_addr;
1082 }; 950 cpu_addr = solution->cpu_addr;
1083 // Now we got the cases where one or both is Depth and the other is not known 951 new_info.resources = solution->resources;
1084 if (!incomplete_src) { 952 overlap_ids.push_back(overlap_id);
1085 inherit_format(src_params, deduced_src.surface); 953 return;
1086 } else {
1087 inherit_format(src_params, deduced_dst.surface);
1088 } 954 }
1089 if (!incomplete_dst) { 955 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
1090 inherit_format(dst_params, deduced_dst.surface); 956 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
957 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) {
958 left_aliased_ids.push_back(overlap_id);
959 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
960 broken_views)) {
961 right_aliased_ids.push_back(overlap_id);
962 }
963 });
964 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
965 Image& new_image = slot_images[new_image_id];
966
967 // TODO: Only upload what we need
968 RefreshContents(new_image);
969
970 for (const ImageId overlap_id : overlap_ids) {
971 Image& overlap = slot_images[overlap_id];
972 if (overlap.info.num_samples != new_image.info.num_samples) {
973 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1091 } else { 974 } else {
1092 inherit_format(dst_params, deduced_src.surface); 975 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
976 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
977 runtime.CopyImage(new_image, overlap, copies);
1093 } 978 }
979 if (True(overlap.flags & ImageFlagBits::Tracked)) {
980 UntrackImage(overlap);
981 }
982 UnregisterImage(overlap_id);
983 DeleteImage(overlap_id);
984 }
985 ImageBase& new_image_base = new_image;
986 for (const ImageId aliased_id : right_aliased_ids) {
987 ImageBase& aliased = slot_images[aliased_id];
988 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
989 }
990 for (const ImageId aliased_id : left_aliased_ids) {
991 ImageBase& aliased = slot_images[aliased_id];
992 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1094 } 993 }
994 RegisterImage(new_image_id);
995 return new_image_id;
996}
1095 997
1096 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 998template <class P>
1097 bool preserve_contents) { 999typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1098 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 1000 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
1099 Register(new_surface); 1001 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
1100 if (preserve_contents) { 1002 const GPUVAddr dst_addr = dst.Address();
1101 LoadSurface(new_surface); 1003 const GPUVAddr src_addr = src.Address();
1102 } 1004 ImageInfo dst_info(dst);
1103 return {new_surface, new_surface->GetMainView()}; 1005 ImageInfo src_info(src);
1006 ImageId dst_id;
1007 ImageId src_id;
1008 do {
1009 has_deleted_images = false;
1010 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
1011 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
1012 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1013 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1014 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1015 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1016 continue;
1017 }
1018 if (!dst_id) {
1019 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1020 }
1021 if (!src_id) {
1022 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1023 }
1024 } while (has_deleted_images);
1025 return BlitImages{
1026 .dst_id = dst_id,
1027 .src_id = src_id,
1028 .dst_format = dst_info.format,
1029 .src_format = src_info.format,
1030 };
1031}
1032
1033template <class P>
1034SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1035 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1036 return NULL_SAMPLER_ID;
1037 }
1038 const auto [pair, is_new] = samplers.try_emplace(config);
1039 if (is_new) {
1040 pair->second = slot_samplers.insert(runtime, config);
1104 } 1041 }
1042 return pair->second;
1043}
1105 1044
1106 void LoadSurface(const TSurface& surface) { 1045template <class P>
1107 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1046ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1108 surface->LoadBuffer(gpu_memory, staging_cache); 1047 const auto& regs = maxwell3d.regs;
1109 surface->UploadTexture(staging_cache.GetBuffer(0)); 1048 if (index >= regs.rt_control.count) {
1110 surface->MarkAsModified(false, Tick()); 1049 return ImageViewId{};
1050 }
1051 const auto& rt = regs.rt[index];
1052 const GPUVAddr gpu_addr = rt.Address();
1053 if (gpu_addr == 0) {
1054 return ImageViewId{};
1055 }
1056 if (rt.format == Tegra::RenderTargetFormat::NONE) {
1057 return ImageViewId{};
1111 } 1058 }
1059 const ImageInfo info(regs, index);
1060 return FindRenderTargetView(info, gpu_addr, is_clear);
1061}
1112 1062
1113 void FlushSurface(const TSurface& surface) { 1063template <class P>
1114 if (!surface->IsModified()) { 1064ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1115 return; 1065 const auto& regs = maxwell3d.regs;
1116 } 1066 if (!regs.zeta_enable) {
1117 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1067 return ImageViewId{};
1118 surface->DownloadTexture(staging_cache.GetBuffer(0)); 1068 }
1119 surface->FlushBuffer(gpu_memory, staging_cache); 1069 const GPUVAddr gpu_addr = regs.zeta.Address();
1120 surface->MarkAsModified(false, Tick()); 1070 if (gpu_addr == 0) {
1121 } 1071 return ImageViewId{};
1122
1123 void RegisterInnerCache(TSurface& surface) {
1124 const VAddr cpu_addr = surface->GetCpuAddr();
1125 VAddr start = cpu_addr >> registry_page_bits;
1126 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1127 l1_cache[cpu_addr] = surface;
1128 while (start <= end) {
1129 registry[start].push_back(surface);
1130 start++;
1131 }
1132 } 1072 }
1073 const ImageInfo info(regs);
1074 return FindRenderTargetView(info, gpu_addr, is_clear);
1075}
1133 1076
1134 void UnregisterInnerCache(TSurface& surface) { 1077template <class P>
1135 const VAddr cpu_addr = surface->GetCpuAddr(); 1078ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1136 VAddr start = cpu_addr >> registry_page_bits; 1079 bool is_clear) {
1137 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1080 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1138 l1_cache.erase(cpu_addr); 1081 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
1139 while (start <= end) { 1082 if (!image_id) {
1140 auto& reg{registry[start]}; 1083 return NULL_IMAGE_VIEW_ID;
1141 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1084 }
1142 start++; 1085 Image& image = slot_images[image_id];
1143 } 1086 const ImageViewType view_type = RenderTargetImageViewType(info);
1087 SubresourceBase base;
1088 if (image.info.type == ImageType::Linear) {
1089 base = SubresourceBase{.level = 0, .layer = 0};
1090 } else {
1091 base = image.TryFindBase(gpu_addr).value();
1144 } 1092 }
1093 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
1094 const SubresourceRange range{
1095 .base = base,
1096 .extent = {.levels = 1, .layers = layers},
1097 };
1098 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
1099}
1145 1100
1146 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1101template <class P>
1147 if (size == 0) { 1102template <typename Func>
1148 return {}; 1103void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
1104 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1105 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1106 boost::container::small_vector<ImageId, 32> images;
1107 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
1108 const auto it = page_table.find(page);
1109 if (it == page_table.end()) {
1110 if constexpr (BOOL_BREAK) {
1111 return false;
1112 } else {
1113 return;
1114 }
1149 } 1115 }
1150 const VAddr cpu_addr_end = cpu_addr + size; 1116 for (const ImageId image_id : it->second) {
1151 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1117 Image& image = slot_images[image_id];
1152 VectorSurface surfaces; 1118 if (True(image.flags & ImageFlagBits::Picked)) {
1153 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1154 const auto it = registry.find(start);
1155 if (it == registry.end()) {
1156 continue; 1119 continue;
1157 } 1120 }
1158 for (auto& surface : it->second) { 1121 if (!image.Overlaps(cpu_addr, size)) {
1159 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { 1122 continue;
1160 continue; 1123 }
1124 image.flags |= ImageFlagBits::Picked;
1125 images.push_back(image_id);
1126 if constexpr (BOOL_BREAK) {
1127 if (func(image_id, image)) {
1128 return true;
1161 } 1129 }
1162 surface->MarkAsPicked(true); 1130 } else {
1163 surfaces.push_back(surface); 1131 func(image_id, image);
1164 } 1132 }
1165 } 1133 }
1166 for (auto& surface : surfaces) { 1134 if constexpr (BOOL_BREAK) {
1167 surface->MarkAsPicked(false); 1135 return false;
1168 } 1136 }
1169 return surfaces; 1137 });
1138 for (const ImageId image_id : images) {
1139 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1170 } 1140 }
1141}
1171 1142
1172 void ReserveSurface(const SurfaceParams& params, TSurface surface) { 1143template <class P>
1173 surface_reserve[params].push_back(std::move(surface)); 1144ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1145 Image& image = slot_images[image_id];
1146 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1147 return image_view_id;
1174 } 1148 }
1149 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1150 image.InsertView(info, image_view_id);
1151 return image_view_id;
1152}
1153
1154template <class P>
1155void TextureCache<P>::RegisterImage(ImageId image_id) {
1156 ImageBase& image = slot_images[image_id];
1157 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1158 "Trying to register an already registered image");
1159 image.flags |= ImageFlagBits::Registered;
1160 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1161 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1162}
1175 1163
1176 TSurface TryGetReservedSurface(const SurfaceParams& params) { 1164template <class P>
1177 auto search{surface_reserve.find(params)}; 1165void TextureCache<P>::UnregisterImage(ImageId image_id) {
1178 if (search == surface_reserve.end()) { 1166 Image& image = slot_images[image_id];
1179 return {}; 1167 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1168 "Trying to unregister an already registered image");
1169 image.flags &= ~ImageFlagBits::Registered;
1170 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1171 const auto page_it = page_table.find(page);
1172 if (page_it == page_table.end()) {
1173 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1174 return;
1180 } 1175 }
1181 for (auto& surface : search->second) { 1176 std::vector<ImageId>& image_ids = page_it->second;
1182 if (!surface->IsRegistered()) { 1177 const auto vector_it = std::ranges::find(image_ids, image_id);
1183 return surface; 1178 if (vector_it == image_ids.end()) {
1184 } 1179 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
1180 return;
1185 } 1181 }
1186 return {}; 1182 image_ids.erase(vector_it);
1187 } 1183 });
1184}
1188 1185
1189 /// Try to do an image copy logging when formats are incompatible. 1186template <class P>
1190 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { 1187void TextureCache<P>::TrackImage(ImageBase& image) {
1191 const SurfaceParams& src_params = src->GetSurfaceParams(); 1188 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 const SurfaceParams& dst_params = dst->GetSurfaceParams(); 1189 image.flags |= ImageFlagBits::Tracked;
1193 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { 1190 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1194 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", 1191}
1195 static_cast<int>(dst_params.pixel_format), 1192
1196 static_cast<int>(src_params.pixel_format)); 1193template <class P>
1197 return; 1194void TextureCache<P>::UntrackImage(ImageBase& image) {
1195 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1196 image.flags &= ~ImageFlagBits::Tracked;
1197 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1198}
1199
1200template <class P>
1201void TextureCache<P>::DeleteImage(ImageId image_id) {
1202 ImageBase& image = slot_images[image_id];
1203 const GPUVAddr gpu_addr = image.gpu_addr;
1204 const auto alloc_it = image_allocs_table.find(gpu_addr);
1205 if (alloc_it == image_allocs_table.end()) {
1206 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1207 gpu_addr);
1208 return;
1209 }
1210 const ImageAllocId alloc_id = alloc_it->second;
1211 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1212 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1213 if (alloc_image_it == alloc_images.end()) {
1214 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1215 return;
1216 }
1217 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1218 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1219
1220 // Mark render targets as dirty
1221 auto& dirty = maxwell3d.dirty.flags;
1222 dirty[Dirty::RenderTargets] = true;
1223 dirty[Dirty::ZetaBuffer] = true;
1224 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1225 dirty[Dirty::ColorBuffer0 + rt] = true;
1226 }
1227 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1228 for (const ImageViewId image_view_id : image_view_ids) {
1229 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1230 if (render_targets.depth_buffer_id == image_view_id) {
1231 render_targets.depth_buffer_id = ImageViewId{};
1198 } 1232 }
1199 ImageCopy(src, dst, copy);
1200 } 1233 }
1234 RemoveImageViewReferences(image_view_ids);
1235 RemoveFramebuffers(image_view_ids);
1236
1237 for (const AliasedImage& alias : image.aliased_images) {
1238 ImageBase& other_image = slot_images[alias.id];
1239 [[maybe_unused]] const size_t num_removed_aliases =
1240 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1241 return other_alias.id == image_id;
1242 });
1243 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1244 num_removed_aliases);
1245 }
1246 for (const ImageViewId image_view_id : image_view_ids) {
1247 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1248 slot_image_views.erase(image_view_id);
1249 }
1250 sentenced_images.Push(std::move(slot_images[image_id]));
1251 slot_images.erase(image_id);
1201 1252
1202 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1253 alloc_images.erase(alloc_image_it);
1203 return siblings_table[static_cast<std::size_t>(format)]; 1254 if (alloc_images.empty()) {
1255 image_allocs_table.erase(alloc_it);
1204 } 1256 }
1257 if constexpr (ENABLE_VALIDATION) {
1258 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1259 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1260 }
1261 graphics_image_table.Invalidate();
1262 compute_image_table.Invalidate();
1263 has_deleted_images = true;
1264}
1205 1265
1206 /// Returns true the shader sampler entry is compatible with the TIC texture type. 1266template <class P>
1207 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, 1267void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1208 const VideoCommon::Shader::Sampler& entry) { 1268 auto it = image_views.begin();
1209 const auto shader_type = entry.type; 1269 while (it != image_views.end()) {
1210 switch (tic_type) { 1270 const auto found = std::ranges::find(removed_views, it->second);
1211 case Tegra::Texture::TextureType::Texture1D: 1271 if (found != removed_views.end()) {
1212 case Tegra::Texture::TextureType::Texture1DArray: 1272 it = image_views.erase(it);
1213 return shader_type == Tegra::Shader::TextureType::Texture1D; 1273 } else {
1214 case Tegra::Texture::TextureType::Texture1DBuffer: 1274 ++it;
1215 // TODO(Rodrigo): Assume as valid for now
1216 return true;
1217 case Tegra::Texture::TextureType::Texture2D:
1218 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1219 return shader_type == Tegra::Shader::TextureType::Texture2D;
1220 case Tegra::Texture::TextureType::Texture2DArray:
1221 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1222 shader_type == Tegra::Shader::TextureType::TextureCube;
1223 case Tegra::Texture::TextureType::Texture3D:
1224 return shader_type == Tegra::Shader::TextureType::Texture3D;
1225 case Tegra::Texture::TextureType::TextureCubeArray:
1226 case Tegra::Texture::TextureType::TextureCubemap:
1227 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1228 return true;
1229 }
1230 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
1231 } 1275 }
1232 UNREACHABLE();
1233 return true;
1234 } 1276 }
1277}
1235 1278
1236 struct FramebufferTargetInfo { 1279template <class P>
1237 TSurface target; 1280void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1238 TView view; 1281 auto it = framebuffers.begin();
1239 }; 1282 while (it != framebuffers.end()) {
1240 1283 if (it->first.Contains(removed_views)) {
1241 void AsyncFlushSurface(TSurface& surface) { 1284 it = framebuffers.erase(it);
1242 if (!uncommitted_flushes) { 1285 } else {
1243 uncommitted_flushes = std::make_shared<std::list<TSurface>>(); 1286 ++it;
1244 } 1287 }
1245 uncommitted_flushes->push_back(surface);
1246 } 1288 }
1289}
1247 1290
1248 VideoCore::RasterizerInterface& rasterizer; 1291template <class P>
1249 Tegra::Engines::Maxwell3D& maxwell3d; 1292void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1250 Tegra::MemoryManager& gpu_memory; 1293 image.flags |= ImageFlagBits::GpuModified;
1251 1294 image.modification_tick = ++modification_tick;
1252 FormatLookupTable format_lookup_table; 1295}
1253 FormatCompatibility format_compatibility;
1254
1255 u64 ticks{};
1256
1257 // Guards the cache for protection conflicts.
1258 bool guard_render_targets{};
1259 bool guard_samplers{};
1260
1261 // The siblings table is for formats that can inter exchange with one another
1262 // without causing issues. This is only valid when a conflict occurs on a non
1263 // rendering use.
1264 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
1265
1266 // The internal Cache is different for the Texture Cache. It's based on buckets
1267 // of 1MB. This fits better for the purpose of this cache as textures are normaly
1268 // large in size.
1269 static constexpr u64 registry_page_bits{20};
1270 static constexpr u64 registry_page_size{1 << registry_page_bits};
1271 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1272 1296
1273 static constexpr u32 DEPTH_RT = 8; 1297template <class P>
1274 static constexpr u32 NO_RT = 0xFFFFFFFF; 1298void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1299 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1300 ImageBase& image = slot_images[image_id];
1301 u64 most_recent_tick = image.modification_tick;
1302 for (const AliasedImage& aliased : image.aliased_images) {
1303 ImageBase& aliased_image = slot_images[aliased.id];
1304 if (image.modification_tick < aliased_image.modification_tick) {
1305 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1306 aliased_images.push_back(&aliased);
1307 }
1308 }
1309 if (aliased_images.empty()) {
1310 return;
1311 }
1312 image.modification_tick = most_recent_tick;
1313 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1314 const ImageBase& lhs_image = slot_images[lhs->id];
1315 const ImageBase& rhs_image = slot_images[rhs->id];
1316 return lhs_image.modification_tick < rhs_image.modification_tick;
1317 });
1318 for (const AliasedImage* const aliased : aliased_images) {
1319 CopyImage(image_id, aliased->id, aliased->copies);
1320 }
1321}
1275 1322
1276 // The L1 Cache is used for fast texture lookup before checking the overlaps 1323template <class P>
1277 // This avoids calculating size and other stuffs. 1324void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1278 std::unordered_map<VAddr, TSurface> l1_cache; 1325 Image& image = slot_images[image_id];
1326 if (invalidate) {
1327 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1328 if (False(image.flags & ImageFlagBits::Tracked)) {
1329 TrackImage(image);
1330 }
1331 } else {
1332 RefreshContents(image);
1333 SynchronizeAliases(image_id);
1334 }
1335 if (is_modification) {
1336 MarkModification(image);
1337 }
1338 image.frame_tick = frame_tick;
1339}
1279 1340
1280 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1341template <class P>
1281 /// previously been used. This is to prevent surfaces from being constantly created and 1342void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1282 /// destroyed when used with different surface parameters. 1343 bool invalidate) {
1283 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; 1344 if (!image_view_id) {
1284 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> 1345 return;
1285 render_targets; 1346 }
1286 FramebufferTargetInfo depth_buffer; 1347 const ImageViewBase& image_view = slot_image_views[image_view_id];
1348 PrepareImage(image_view.image_id, is_modification, invalidate);
1349}
1287 1350
1288 std::vector<TSurface> sampled_textures; 1351template <class P>
1352void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1353 Image& dst = slot_images[dst_id];
1354 Image& src = slot_images[src_id];
1355 const auto dst_format_type = GetFormatType(dst.info.format);
1356 const auto src_format_type = GetFormatType(src.info.format);
1357 if (src_format_type == dst_format_type) {
1358 if constexpr (HAS_EMULATED_COPIES) {
1359 if (!runtime.CanImageBeCopied(dst, src)) {
1360 return runtime.EmulateCopyImage(dst, src, copies);
1361 }
1362 }
1363 return runtime.CopyImage(dst, src, copies);
1364 }
1365 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1366 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1367 for (const ImageCopy& copy : copies) {
1368 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1369 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1370 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1371 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1372
1373 const SubresourceBase dst_base{
1374 .level = copy.dst_subresource.base_level,
1375 .layer = copy.dst_subresource.base_layer,
1376 };
1377 const SubresourceBase src_base{
1378 .level = copy.src_subresource.base_level,
1379 .layer = copy.src_subresource.base_layer,
1380 };
1381 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1382 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1383 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1384 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1385 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1386 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1387 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1388 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1389 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1390 ImageView& dst_view = slot_image_views[dst_view_id];
1391 ImageView& src_view = slot_image_views[src_view_id];
1392 [[maybe_unused]] const Extent3D expected_size{
1393 .width = std::min(dst_view.size.width, src_view.size.width),
1394 .height = std::min(dst_view.size.height, src_view.size.height),
1395 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1396 };
1397 UNIMPLEMENTED_IF(copy.extent != expected_size);
1289 1398
1290 /// This cache stores null surfaces in order to be used as a placeholder 1399 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1291 /// for invalid texture calls. 1400 }
1292 std::unordered_map<u32, TSurface> invalid_cache; 1401}
1293 std::vector<u8> invalid_memory;
1294 1402
1295 std::list<TSurface> marked_for_unregister; 1403template <class P>
1404void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1405 if (*old_id == new_id) {
1406 return;
1407 }
1408 if (*old_id) {
1409 const ImageViewBase& old_view = slot_image_views[*old_id];
1410 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1411 uncommitted_downloads.push_back(old_view.image_id);
1412 }
1413 }
1414 *old_id = new_id;
1415}
1296 1416
1297 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; 1417template <class P>
1298 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; 1418std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1419 ImageId image_id, const ImageViewInfo& view_info) {
1420 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1421 const ImageBase& image = slot_images[image_id];
1422 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1423 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1424 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1425 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1426 const u32 num_samples = image.info.num_samples;
1427 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1428 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1429 .color_buffer_ids = {color_view_id},
1430 .depth_buffer_id = depth_view_id,
1431 .size = {extent.width >> samples_x, extent.height >> samples_y},
1432 });
1433 return {framebuffer_id, view_id};
1434}
1299 1435
1300 StagingCache staging_cache; 1436template <class P>
1301 std::recursive_mutex mutex; 1437bool TextureCache<P>::IsFullClear(ImageViewId id) {
1302}; 1438 if (!id) {
1439 return true;
1440 }
1441 const ImageViewBase& image_view = slot_image_views[id];
1442 const ImageBase& image = slot_images[image_view.image_id];
1443 const Extent3D size = image_view.size;
1444 const auto& regs = maxwell3d.regs;
1445 const auto& scissor = regs.scissor_test[0];
1446 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1447 // Images with multiple resources can't be cleared in a single call
1448 return false;
1449 }
1450 if (regs.clear_flags.scissor == 0) {
1451 // If scissor testing is disabled, the clear is always full
1452 return true;
1453 }
1454 // Make sure the clear covers all texels in the subresource
1455 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1456 scissor.max_y >= size.height;
1457}
1303 1458
1304} // namespace VideoCommon 1459} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9#include "video_core/texture_cache/slot_vector.h"
10
11namespace VideoCommon {
12
13constexpr size_t NUM_RT = 8;
14constexpr size_t MAX_MIP_LEVELS = 14;
15
16constexpr SlotId CORRUPT_ID{0xfffffffe};
17
18using ImageId = SlotId;
19using ImageViewId = SlotId;
20using ImageAllocId = SlotId;
21using SamplerId = SlotId;
22using FramebufferId = SlotId;
23
24enum class ImageType : u32 {
25 e1D,
26 e2D,
27 e3D,
28 Linear,
29 Buffer,
30};
31
32enum class ImageViewType : u32 {
33 e1D,
34 e2D,
35 Cube,
36 e3D,
37 e1DArray,
38 e2DArray,
39 CubeArray,
40 Rect,
41 Buffer,
42};
43constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
44
45enum class RelaxedOptions : u32 {
46 Size = 1 << 0,
47 Format = 1 << 1,
48 Samples = 1 << 2,
49};
50DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
51
52struct Offset2D {
53 constexpr auto operator<=>(const Offset2D&) const noexcept = default;
54
55 s32 x;
56 s32 y;
57};
58
59struct Offset3D {
60 constexpr auto operator<=>(const Offset3D&) const noexcept = default;
61
62 s32 x;
63 s32 y;
64 s32 z;
65};
66
67struct Extent2D {
68 constexpr auto operator<=>(const Extent2D&) const noexcept = default;
69
70 u32 width;
71 u32 height;
72};
73
74struct Extent3D {
75 constexpr auto operator<=>(const Extent3D&) const noexcept = default;
76
77 u32 width;
78 u32 height;
79 u32 depth;
80};
81
82struct SubresourceLayers {
83 s32 base_level = 0;
84 s32 base_layer = 0;
85 s32 num_layers = 1;
86};
87
88struct SubresourceBase {
89 constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
90
91 s32 level = 0;
92 s32 layer = 0;
93};
94
95struct SubresourceExtent {
96 constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
97
98 s32 levels = 1;
99 s32 layers = 1;
100};
101
102struct SubresourceRange {
103 constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
104
105 SubresourceBase base;
106 SubresourceExtent extent;
107};
108
109struct ImageCopy {
110 SubresourceLayers src_subresource;
111 SubresourceLayers dst_subresource;
112 Offset3D src_offset;
113 Offset3D dst_offset;
114 Extent3D extent;
115};
116
117struct BufferImageCopy {
118 size_t buffer_offset;
119 size_t buffer_size;
120 u32 buffer_row_length;
121 u32 buffer_image_height;
122 SubresourceLayers image_subresource;
123 Offset3D image_offset;
124 Extent3D image_extent;
125};
126
127struct BufferCopy {
128 size_t src_offset;
129 size_t dst_offset;
130 size_t size;
131};
132
133struct SwizzleParameters {
134 Extent3D num_tiles;
135 Extent3D block;
136 size_t buffer_offset;
137 s32 level;
138};
139
140} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..279932778
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1233 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This files contains code from Ryujinx
6// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
7// The sections using code from Ryujinx are marked with a link to the original version
8
9// MIT License
10//
11// Copyright (c) Ryujinx Team and Contributors
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
14// associated documentation files (the "Software"), to deal in the Software without restriction,
15// including without limitation the rights to use, copy, modify, merge, publish, distribute,
16// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
17// furnished to do so, subject to the following conditions:
18//
19// The above copyright notice and this permission notice shall be included in all copies or
20// substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
23// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
25// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27//
28
29#include <algorithm>
30#include <array>
31#include <numeric>
32#include <optional>
33#include <span>
34#include <vector>
35
36#include "common/alignment.h"
37#include "common/assert.h"
38#include "common/bit_util.h"
39#include "common/common_types.h"
40#include "common/div_ceil.h"
41#include "video_core/compatible_formats.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/surface.h"
45#include "video_core/texture_cache/decode_bc4.h"
46#include "video_core/texture_cache/format_lookup_table.h"
47#include "video_core/texture_cache/formatter.h"
48#include "video_core/texture_cache/samples_helper.h"
49#include "video_core/texture_cache/util.h"
50#include "video_core/textures/astc.h"
51#include "video_core/textures/decoders.h"
52
53namespace VideoCommon {
54
55namespace {
56
57using Tegra::Texture::GOB_SIZE;
58using Tegra::Texture::GOB_SIZE_SHIFT;
59using Tegra::Texture::GOB_SIZE_X;
60using Tegra::Texture::GOB_SIZE_X_SHIFT;
61using Tegra::Texture::GOB_SIZE_Y;
62using Tegra::Texture::GOB_SIZE_Y_SHIFT;
63using Tegra::Texture::GOB_SIZE_Z;
64using Tegra::Texture::GOB_SIZE_Z_SHIFT;
65using Tegra::Texture::MsaaMode;
66using Tegra::Texture::SwizzleTexture;
67using Tegra::Texture::TextureFormat;
68using Tegra::Texture::TextureType;
69using Tegra::Texture::TICEntry;
70using Tegra::Texture::UnswizzleTexture;
71using VideoCore::Surface::BytesPerBlock;
72using VideoCore::Surface::DefaultBlockHeight;
73using VideoCore::Surface::DefaultBlockWidth;
74using VideoCore::Surface::IsCopyCompatible;
75using VideoCore::Surface::IsPixelFormatASTC;
76using VideoCore::Surface::IsViewCompatible;
77using VideoCore::Surface::PixelFormatFromDepthFormat;
78using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
79using VideoCore::Surface::SurfaceType;
80
81constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
82
83struct LevelInfo {
84 Extent3D size;
85 Extent3D block;
86 Extent2D tile_size;
87 u32 bpp_log2;
88 u32 tile_width_spacing;
89};
90
91[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
92 if (shift == 0) {
93 return 0;
94 }
95 u32 x = unit_factor << (shift - 1);
96 if (x >= dimension) {
97 while (--shift) {
98 x >>= 1;
99 if (x < dimension) {
100 break;
101 }
102 }
103 }
104 return shift;
105}
106
107[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
108 return std::max<u32>(size >> level, 1);
109}
110
111[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
112 return Extent3D{
113 .width = AdjustMipSize(size.width, level),
114 .height = AdjustMipSize(size.height, level),
115 .depth = AdjustMipSize(size.depth, level),
116 };
117}
118
119[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
120 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
121 return Extent3D{
122 .width = size.width >> samples_x,
123 .height = size.height >> samples_y,
124 .depth = size.depth,
125 };
126}
127
128template <u32 GOB_EXTENT>
129[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
130 do {
131 while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
132 --block_size;
133 }
134 } while (level--);
135 return block_size;
136}
137
138[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
139 u32 level) {
140 return {
141 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
142 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
143 .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
144 };
145}
146
147[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
148 return {
149 .width = Common::DivCeil(size.width, tile_size.width),
150 .height = Common::DivCeil(size.height, tile_size.height),
151 .depth = size.depth,
152 };
153}
154
155[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
156 return std::countl_zero(bytes_per_block) ^ 0x1F;
157}
158
159[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
160 return BytesPerBlockLog2(BytesPerBlock(format));
161}
162
163[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
164 const Extent3D num_blocks = AdjustTileSize(size, tile_size);
165 return num_blocks.width * num_blocks.height * num_blocks.depth;
166}
167
168[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
169 return Common::DivCeil(AdjustMipSize(size, level), block_size);
170}
171
172[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
173 return config.Width() * config.Height() * BytesPerBlock(format);
174}
175
176[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
177 switch (type) {
178 case TextureType::Texture2D:
179 case TextureType::Texture2DArray:
180 case TextureType::Texture2DNoMipmap:
181 case TextureType::Texture3D:
182 case TextureType::TextureCubeArray:
183 case TextureType::TextureCubemap:
184 return true;
185 case TextureType::Texture1D:
186 case TextureType::Texture1DArray:
187 case TextureType::Texture1DBuffer:
188 return false;
189 }
190 return false;
191}
192
193[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
194 switch (type) {
195 case ImageType::e2D:
196 case ImageType::e3D:
197 case ImageType::Linear:
198 return true;
199 case ImageType::e1D:
200 case ImageType::Buffer:
201 return false;
202 }
203 UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
204}
205
206[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
207 switch (num_samples) {
208 case 1:
209 return {1, 1};
210 case 2:
211 return {2, 1};
212 case 4:
213 return {2, 2};
214 case 8:
215 return {4, 2};
216 case 16:
217 return {4, 4};
218 }
219 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
220 return {1, 1};
221}
222
223[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
224 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
225}
226
227[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
228 return Extent3D{
229 .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
230 .height = AdjustSize(info.size.height, level, info.tile_size.height),
231 .depth = AdjustMipSize(info.size.depth, level),
232 };
233}
234
235[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
236 const Extent3D blocks = NumLevelBlocks(info, level);
237 return Extent3D{
238 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
239 .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
240 .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
241 };
242}
243
244[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
245 return Extent2D{
246 .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
247 .height = GOB_SIZE_Y_SHIFT + block_height,
248 };
249}
250
251[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
252 u32 block_depth) {
253 return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
254 num_tiles.depth < (1U << block_depth);
255}
256
257[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
258 u32 bpp_log2) {
259 if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
260 return GOB_SIZE_X_SHIFT - bpp_log2;
261 } else {
262 return gob.width;
263 }
264}
265
266[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
267 u32 tile_width_spacing) {
268 const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
269 return StrideAlignment(num_tiles, block, gob, bpp_log2);
270}
271
272[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
273 const Extent3D blocks = NumLevelBlocks(info, level);
274 const Extent2D gobs{
275 .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
276 .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
277 };
278 const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
279 const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
280 const u32 alignment = is_small ? 0 : info.tile_width_spacing;
281 return Extent2D{
282 .width = Common::AlignBits(gobs.width, alignment),
283 .height = gobs.height,
284 };
285}
286
287[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
288 const Extent3D blocks = NumLevelBlocks(info, level);
289 const Extent3D tile_shift = TileShift(info, level);
290 const Extent2D gobs = NumGobs(info, level);
291 return Extent3D{
292 .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
293 .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
294 .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
295 };
296}
297
298[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
299 const Extent3D tile_shift = TileShift(info, level);
300 const Extent3D tiles = LevelTiles(info, level);
301 const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
302 const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
303 return num_tiles << shift;
304}
305
306[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
307 u32 num_levels) {
308 ASSERT(num_levels <= MAX_MIP_LEVELS);
309 std::array<u32, MAX_MIP_LEVELS> sizes{};
310 for (u32 level = 0; level < num_levels; ++level) {
311 sizes[level] = CalculateLevelSize(info, level);
312 }
313 return sizes;
314}
315
316[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
317 u32 num_samples, u32 tile_width_spacing) {
318 const auto [samples_x, samples_y] = Samples(num_samples);
319 const u32 bytes_per_block = BytesPerBlock(format);
320 return {
321 .size =
322 {
323 .width = size.width * samples_x,
324 .height = size.height * samples_y,
325 .depth = size.depth,
326 },
327 .block = block,
328 .tile_size = DefaultBlockSize(format),
329 .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
330 .tile_width_spacing = tile_width_spacing,
331 };
332}
333
334[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
335 return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
336 info.tile_width_spacing);
337}
338
339[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
340 u32 num_samples, u32 tile_width_spacing,
341 u32 level) {
342 const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
343 u32 offset = 0;
344 for (u32 current_level = 0; current_level < level; ++current_level) {
345 offset += CalculateLevelSize(info, current_level);
346 }
347 return offset;
348}
349
350[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
351 u32 tile_size_y, u32 tile_width_spacing) {
352 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
353 if (tile_width_spacing > 0) {
354 const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
355 return Common::AlignBits(size_bytes, alignment_log2);
356 }
357 const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
358 while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
359 --block.height;
360 }
361 while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
362 --block.depth;
363 }
364 const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
365 const u32 num_blocks = size_bytes >> block_shift;
366 if (size_bytes != num_blocks << block_shift) {
367 return (num_blocks + 1) << block_shift;
368 }
369 return size_bytes;
370}
371
372[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
373 const ImageBase& overlap,
374 bool strict_size) {
375 const ImageInfo& info = overlap.info;
376 if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
377 return std::nullopt;
378 }
379 if (new_info.block != info.block) {
380 return std::nullopt;
381 }
382 const SubresourceExtent resources = new_info.resources;
383 return SubresourceExtent{
384 .levels = std::max(resources.levels, info.resources.levels),
385 .layers = std::max(resources.layers, info.resources.layers),
386 };
387}
388
389[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
390 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
391 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
392 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
393 const auto it = std::ranges::find(slice_offsets, diff);
394 if (it == slice_offsets.end()) {
395 return std::nullopt;
396 }
397 const std::vector subresources = CalculateSliceSubresources(new_info);
398 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
399 const ImageInfo& info = overlap.info;
400 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
401 return std::nullopt;
402 }
403 const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
404 if (mip_depth < info.size.depth + base.layer) {
405 return std::nullopt;
406 }
407 if (MipBlockSize(new_info, base.level) != info.block) {
408 return std::nullopt;
409 }
410 return SubresourceExtent{
411 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
412 .layers = 1,
413 };
414}
415
416[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
417 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
418 const u32 layer_stride = new_info.layer_stride;
419 const s32 new_size = layer_stride * new_info.resources.layers;
420 const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
421 if (diff > new_size) {
422 return std::nullopt;
423 }
424 const s32 base_layer = diff / layer_stride;
425 const s32 mip_offset = diff % layer_stride;
426 const std::array offsets = CalculateMipLevelOffsets(new_info);
427 const auto end = offsets.begin() + new_info.resources.levels;
428 const auto it = std::find(offsets.begin(), end, mip_offset);
429 if (it == end) {
430 // Mipmap is not aligned to any valid size
431 return std::nullopt;
432 }
433 const SubresourceBase base{
434 .level = static_cast<s32>(std::distance(offsets.begin(), it)),
435 .layer = base_layer,
436 };
437 const ImageInfo& info = overlap.info;
438 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
439 return std::nullopt;
440 }
441 if (MipBlockSize(new_info, base.level) != info.block) {
442 return std::nullopt;
443 }
444 return SubresourceExtent{
445 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
446 .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
447 };
448}
449
450[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
451 GPUVAddr gpu_addr,
452 VAddr cpu_addr,
453 const ImageBase& overlap,
454 bool strict_size) {
455 std::optional<SubresourceExtent> resources;
456 if (new_info.type != ImageType::e3D) {
457 resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
458 } else {
459 resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
460 }
461 if (!resources) {
462 return std::nullopt;
463 }
464 return OverlapResult{
465 .gpu_addr = gpu_addr,
466 .cpu_addr = cpu_addr,
467 .resources = *resources,
468 };
469}
470
471[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
472 GPUVAddr gpu_addr,
473 VAddr cpu_addr,
474 const ImageBase& overlap,
475 bool strict_size) {
476 const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
477 if (!base) {
478 return std::nullopt;
479 }
480 const ImageInfo& info = overlap.info;
481 if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
482 return std::nullopt;
483 }
484 if (new_info.block != MipBlockSize(info, base->level)) {
485 return std::nullopt;
486 }
487 const SubresourceExtent resources = new_info.resources;
488 s32 layers = 1;
489 if (info.type != ImageType::e3D) {
490 layers = std::max(resources.layers, info.resources.layers + base->layer);
491 }
492 return OverlapResult{
493 .gpu_addr = overlap.gpu_addr,
494 .cpu_addr = overlap.cpu_addr,
495 .resources =
496 {
497 .levels = std::max(resources.levels + base->level, info.resources.levels),
498 .layers = layers,
499 },
500 };
501}
502
503[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
504 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
505 static constexpr u32 STRIDE_ALIGNMENT = 32;
506 ASSERT(info.type == ImageType::Linear);
507 const Extent2D num_tiles{
508 .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
509 .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
510 };
511 const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
512 return Extent2D{
513 .width = Common::AlignUp(num_tiles.width, width_alignment),
514 .height = num_tiles.height,
515 };
516}
517
518[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
519 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
520 ASSERT(info.type != ImageType::Linear);
521 const Extent3D size = AdjustMipSize(info.size, level);
522 const Extent3D num_tiles{
523 .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
524 .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
525 .depth = size.depth,
526 };
527 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
528 const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
529 const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
530 return Extent3D{
531 .width = Common::AlignBits(num_tiles.width, alignment),
532 .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
533 .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
534 };
535}
536
537[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
538 u32 num_blocks = 0;
539 for (s32 level = 0; level < info.resources.levels; ++level) {
540 const Extent3D mip_size = AdjustMipSize(info.size, level);
541 num_blocks += NumBlocks(mip_size, tile_size);
542 }
543 return num_blocks;
544}
545
546[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
547 ASSERT(info.type == ImageType::e3D);
548 u32 num_slices = 0;
549 for (s32 level = 0; level < info.resources.levels; ++level) {
550 num_slices += AdjustMipSize(info.size.depth, level);
551 }
552 return num_slices;
553}
554
555void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
556 const ImageInfo& info, const BufferImageCopy& copy,
557 std::span<const u8> memory) {
558 ASSERT(copy.image_offset.z == 0);
559 ASSERT(copy.image_extent.depth == 1);
560 ASSERT(copy.image_subresource.base_level == 0);
561 ASSERT(copy.image_subresource.base_layer == 0);
562 ASSERT(copy.image_subresource.num_layers == 1);
563
564 const u32 bytes_per_block = BytesPerBlock(info.format);
565 const u32 row_length = copy.image_extent.width * bytes_per_block;
566 const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
567
568 for (u32 line = 0; line < copy.image_extent.height; ++line) {
569 const u32 host_offset_y = line * info.pitch;
570 const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
571 const u32 guest_offset = guest_offset_x + guest_offset_y;
572 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
573 row_length);
574 }
575}
576
577void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
578 const ImageInfo& info, const BufferImageCopy& copy,
579 std::span<const u8> input) {
580 const Extent3D size = info.size;
581 const LevelInfo level_info = MakeLevelInfo(info);
582 const Extent2D tile_size = DefaultBlockSize(info.format);
583 const u32 bytes_per_block = BytesPerBlock(info.format);
584
585 const s32 level = copy.image_subresource.base_level;
586 const Extent3D level_size = AdjustMipSize(size, level);
587 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
588 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
589
590 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
591
592 UNIMPLEMENTED_IF(copy.image_offset.x != 0);
593 UNIMPLEMENTED_IF(copy.image_offset.y != 0);
594 UNIMPLEMENTED_IF(copy.image_offset.z != 0);
595 UNIMPLEMENTED_IF(copy.image_extent != level_size);
596
597 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
598 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
599
600 size_t host_offset = copy.buffer_offset;
601
602 const u32 num_levels = info.resources.levels;
603 const std::array sizes = CalculateLevelSizes(level_info, num_levels);
604 size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
605 const size_t layer_stride =
606 AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
607 level_info.block, tile_size.height, info.tile_width_spacing);
608 const size_t subresource_size = sizes[level];
609
610 const auto dst_data = std::make_unique<u8[]>(subresource_size);
611 const std::span<u8> dst(dst_data.get(), subresource_size);
612
613 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
614 const std::span<const u8> src = input.subspan(host_offset);
615 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
616 num_tiles.depth, block.height, block.depth);
617
618 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
619
620 host_offset += host_bytes_per_layer;
621 guest_offset += layer_stride;
622 }
623 ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
624}
625
626} // Anonymous namespace
627
628u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
629 if (info.type == ImageType::Buffer) {
630 return info.size.width * BytesPerBlock(info.format);
631 }
632 if (info.type == ImageType::Linear) {
633 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
634 }
635 if (info.resources.layers > 1) {
636 ASSERT(info.layer_stride != 0);
637 return info.layer_stride * info.resources.layers;
638 } else {
639 return CalculateLayerSize(info);
640 }
641}
642
643u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
644 if (info.type == ImageType::Buffer) {
645 return info.size.width * BytesPerBlock(info.format);
646 }
647 if (info.num_samples > 1) {
648 // Multisample images can't be uploaded or downloaded to the host
649 return 0;
650 }
651 if (info.type == ImageType::Linear) {
652 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
653 }
654 const Extent2D tile_size = DefaultBlockSize(info.format);
655 return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
656}
657
658u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
659 if (info.type == ImageType::Buffer) {
660 return info.size.width * BytesPerBlock(info.format);
661 }
662 static constexpr Extent2D TILE_SIZE{1, 1};
663 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
664}
665
666u32 CalculateLayerStride(const ImageInfo& info) noexcept {
667 ASSERT(info.type != ImageType::Linear);
668 const u32 layer_size = CalculateLayerSize(info);
669 const Extent3D size = info.size;
670 const Extent3D block = info.block;
671 const u32 tile_size_y = DefaultBlockHeight(info.format);
672 return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
673}
674
675u32 CalculateLayerSize(const ImageInfo& info) noexcept {
676 ASSERT(info.type != ImageType::Linear);
677 return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
678 info.tile_width_spacing, info.resources.levels);
679}
680
681std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
682 ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
683 const LevelInfo level_info = MakeLevelInfo(info);
684 std::array<u32, MAX_MIP_LEVELS> offsets{};
685 u32 offset = 0;
686 for (s32 level = 0; level < info.resources.levels; ++level) {
687 offsets[level] = offset;
688 offset += CalculateLevelSize(level_info, level);
689 }
690 return offsets;
691}
692
693std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
694 ASSERT(info.type == ImageType::e3D);
695 std::vector<u32> offsets;
696 offsets.reserve(NumSlices(info));
697
698 const LevelInfo level_info = MakeLevelInfo(info);
699 u32 mip_offset = 0;
700 for (s32 level = 0; level < info.resources.levels; ++level) {
701 const Extent3D tile_shift = TileShift(level_info, level);
702 const Extent3D tiles = LevelTiles(level_info, level);
703 const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
704 const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
705 const u32 z_mask = (1U << tile_shift.depth) - 1;
706 const u32 depth = AdjustMipSize(info.size.depth, level);
707 for (u32 slice = 0; slice < depth; ++slice) {
708 const u32 z_low = slice & z_mask;
709 const u32 z_high = slice & ~z_mask;
710 offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
711 }
712 mip_offset += CalculateLevelSize(level_info, level);
713 }
714 return offsets;
715}
716
717std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
718 ASSERT(info.type == ImageType::e3D);
719 std::vector<SubresourceBase> subresources;
720 subresources.reserve(NumSlices(info));
721 for (s32 level = 0; level < info.resources.levels; ++level) {
722 const s32 depth = AdjustMipSize(info.size.depth, level);
723 for (s32 slice = 0; slice < depth; ++slice) {
724 subresources.emplace_back(SubresourceBase{
725 .level = level,
726 .layer = slice,
727 });
728 }
729 }
730 return subresources;
731}
732
733u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
734 const Extent2D tile_size = DefaultBlockSize(info.format);
735 const Extent3D level_size = AdjustMipSize(info.size, level);
736 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
737 const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
738 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
739 return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
740}
741
742PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
743 return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
744 config.a_type, config.srgb_conversion);
745}
746
747ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
748 switch (info.type) {
749 case ImageType::e2D:
750 return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
751 case ImageType::e3D:
752 return ImageViewType::e2DArray;
753 case ImageType::Linear:
754 return ImageViewType::e2D;
755 default:
756 UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
757 return ImageViewType{};
758 }
759}
760
761std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
762 SubresourceBase base) {
763 ASSERT(dst.resources.levels >= src.resources.levels);
764 ASSERT(dst.num_samples == src.num_samples);
765
766 const bool is_dst_3d = dst.type == ImageType::e3D;
767 if (is_dst_3d) {
768 ASSERT(src.type == ImageType::e3D);
769 ASSERT(src.resources.levels == 1);
770 }
771
772 std::vector<ImageCopy> copies;
773 copies.reserve(src.resources.levels);
774 for (s32 level = 0; level < src.resources.levels; ++level) {
775 ImageCopy& copy = copies.emplace_back();
776 copy.src_subresource = SubresourceLayers{
777 .base_level = level,
778 .base_layer = 0,
779 .num_layers = src.resources.layers,
780 };
781 copy.dst_subresource = SubresourceLayers{
782 .base_level = base.level + level,
783 .base_layer = is_dst_3d ? 0 : base.layer,
784 .num_layers = is_dst_3d ? 1 : src.resources.layers,
785 };
786 copy.src_offset = Offset3D{
787 .x = 0,
788 .y = 0,
789 .z = 0,
790 };
791 copy.dst_offset = Offset3D{
792 .x = 0,
793 .y = 0,
794 .z = is_dst_3d ? base.layer : 0,
795 };
796 const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
797 copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
798 if (is_dst_3d) {
799 copy.extent.depth = src.size.depth;
800 }
801 }
802 return copies;
803}
804
805bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
806 if (config.Address() == 0) {
807 return false;
808 }
809 if (config.Address() > (u64(1) << 48)) {
810 return false;
811 }
812 return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
813}
814
815std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
816 const ImageInfo& info, std::span<u8> output) {
817 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
818 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
819 const Extent3D size = info.size;
820
821 if (info.type == ImageType::Linear) {
822 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
823
824 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
825 return {{
826 .buffer_offset = 0,
827 .buffer_size = guest_size_bytes,
828 .buffer_row_length = info.pitch >> bpp_log2,
829 .buffer_image_height = size.height,
830 .image_subresource =
831 {
832 .base_level = 0,
833 .base_layer = 0,
834 .num_layers = 1,
835 },
836 .image_offset = {0, 0, 0},
837 .image_extent = size,
838 }};
839 }
840 const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
841 gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
842 const std::span<const u8> input(input_data.get(), guest_size_bytes);
843
844 const LevelInfo level_info = MakeLevelInfo(info);
845 const s32 num_layers = info.resources.layers;
846 const s32 num_levels = info.resources.levels;
847 const Extent2D tile_size = DefaultBlockSize(info.format);
848 const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
849 const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
850 const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
851 const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
852 info.tile_width_spacing);
853 size_t guest_offset = 0;
854 u32 host_offset = 0;
855 std::vector<BufferImageCopy> copies(num_levels);
856
857 for (s32 level = 0; level < num_levels; ++level) {
858 const Extent3D level_size = AdjustMipSize(size, level);
859 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
860 const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
861 copies[level] = BufferImageCopy{
862 .buffer_offset = host_offset,
863 .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
864 .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
865 .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
866 .image_subresource =
867 {
868 .base_level = level,
869 .base_layer = 0,
870 .num_layers = info.resources.layers,
871 },
872 .image_offset = {0, 0, 0},
873 .image_extent = level_size,
874 };
875 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
876 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
877 const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
878 size_t guest_layer_offset = 0;
879
880 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
881 const std::span<u8> dst = output.subspan(host_offset);
882 const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
883 UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
884 num_tiles.depth, block.height, block.depth, stride_alignment);
885 guest_layer_offset += layer_stride;
886 host_offset += host_bytes_per_layer;
887 }
888 guest_offset += level_sizes[level];
889 }
890 return copies;
891}
892
893BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
894 const ImageBase& image, std::span<u8> output) {
895 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
896 return BufferCopy{
897 .src_offset = 0,
898 .dst_offset = 0,
899 .size = image.guest_size_bytes,
900 };
901}
902
903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
904 std::span<BufferImageCopy> copies) {
905 u32 output_offset = 0;
906
907 const Extent2D tile_size = DefaultBlockSize(info.format);
908 for (BufferImageCopy& copy : copies) {
909 const u32 level = copy.image_subresource.base_level;
910 const Extent3D mip_size = AdjustMipSize(info.size, level);
911 ASSERT(copy.image_offset == Offset3D{});
912 ASSERT(copy.image_subresource.base_layer == 0);
913 ASSERT(copy.image_extent == mip_size);
914 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
915 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
916
917 if (IsPixelFormatASTC(info.format)) {
918 ASSERT(copy.image_extent.depth == 1);
919 Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
920 copy.image_extent.width, copy.image_extent.height,
921 copy.image_subresource.num_layers, tile_size.width,
922 tile_size.height, output.subspan(output_offset));
923 } else {
924 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
925 output.subspan(output_offset));
926 }
927 copy.buffer_offset = output_offset;
928 copy.buffer_row_length = mip_size.width;
929 copy.buffer_image_height = mip_size.height;
930
931 output_offset += copy.image_extent.width * copy.image_extent.height *
932 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
933 }
934}
935
936std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
937 const Extent3D size = info.size;
938 const u32 bytes_per_block = BytesPerBlock(info.format);
939 if (info.type == ImageType::Linear) {
940 ASSERT(info.pitch % bytes_per_block == 0);
941 return {{
942 .buffer_offset = 0,
943 .buffer_size = static_cast<size_t>(info.pitch) * size.height,
944 .buffer_row_length = info.pitch / bytes_per_block,
945 .buffer_image_height = size.height,
946 .image_subresource =
947 {
948 .base_level = 0,
949 .base_layer = 0,
950 .num_layers = 1,
951 },
952 .image_offset = {0, 0, 0},
953 .image_extent = size,
954 }};
955 }
956 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
957
958 const s32 num_layers = info.resources.layers;
959 const s32 num_levels = info.resources.levels;
960 const Extent2D tile_size = DefaultBlockSize(info.format);
961
962 u32 host_offset = 0;
963
964 std::vector<BufferImageCopy> copies(num_levels);
965 for (s32 level = 0; level < num_levels; ++level) {
966 const Extent3D level_size = AdjustMipSize(size, level);
967 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
968 const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
969 copies[level] = BufferImageCopy{
970 .buffer_offset = host_offset,
971 .buffer_size = host_bytes_per_level,
972 .buffer_row_length = level_size.width,
973 .buffer_image_height = level_size.height,
974 .image_subresource =
975 {
976 .base_level = level,
977 .base_layer = 0,
978 .num_layers = info.resources.layers,
979 },
980 .image_offset = {0, 0, 0},
981 .image_extent = level_size,
982 };
983 host_offset += host_bytes_per_level;
984 }
985 return copies;
986}
987
988Extent3D MipSize(Extent3D size, u32 level) {
989 return AdjustMipSize(size, level);
990}
991
992Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
993 const LevelInfo level_info = MakeLevelInfo(info);
994 const Extent2D tile_size = DefaultBlockSize(info.format);
995 const Extent3D level_size = AdjustMipSize(info.size, level);
996 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
997 return AdjustMipBlockSize(num_tiles, level_info.block, level);
998}
999
1000std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1001 const Extent2D tile_size = DefaultBlockSize(info.format);
1002 if (info.type == ImageType::Linear) {
1003 return std::vector{SwizzleParameters{
1004 .num_tiles = AdjustTileSize(info.size, tile_size),
1005 .block = {},
1006 .buffer_offset = 0,
1007 .level = 0,
1008 }};
1009 }
1010 const LevelInfo level_info = MakeLevelInfo(info);
1011 const Extent3D size = info.size;
1012 const s32 num_levels = info.resources.levels;
1013
1014 u32 guest_offset = 0;
1015 std::vector<SwizzleParameters> params(num_levels);
1016 for (s32 level = 0; level < num_levels; ++level) {
1017 const Extent3D level_size = AdjustMipSize(size, level);
1018 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
1019 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
1020 params[level] = SwizzleParameters{
1021 .num_tiles = num_tiles,
1022 .block = block,
1023 .buffer_offset = guest_offset,
1024 .level = level,
1025 };
1026 guest_offset += CalculateLevelSize(level_info, level);
1027 }
1028 return params;
1029}
1030
1031void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
1032 std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
1033 const bool is_pitch_linear = info.type == ImageType::Linear;
1034 for (const BufferImageCopy& copy : copies) {
1035 if (is_pitch_linear) {
1036 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1037 } else {
1038 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1039 }
1040 }
1041}
1042
1043bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
1044 u32 rhs_level, bool strict_size) noexcept {
1045 ASSERT(lhs.type != ImageType::Linear);
1046 ASSERT(rhs.type != ImageType::Linear);
1047 if (strict_size) {
1048 const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
1049 const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
1050 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1051 } else {
1052 const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
1053 const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
1054 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1055 }
1056}
1057
1058bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
1059 ASSERT(lhs.type == ImageType::Linear);
1060 ASSERT(rhs.type == ImageType::Linear);
1061 if (strict_size) {
1062 return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
1063 } else {
1064 const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
1065 const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
1066 return lhs_size == rhs_size;
1067 }
1068}
1069
1070std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
1071 VAddr cpu_addr, const ImageBase& overlap,
1072 bool strict_size, bool broken_views) {
1073 ASSERT(new_info.type != ImageType::Linear);
1074 ASSERT(overlap.info.type != ImageType::Linear);
1075 if (!IsLayerStrideCompatible(new_info, overlap.info)) {
1076 return std::nullopt;
1077 }
1078 if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) {
1079 return std::nullopt;
1080 }
1081 if (gpu_addr == overlap.gpu_addr) {
1082 const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
1083 if (!solution) {
1084 return std::nullopt;
1085 }
1086 return OverlapResult{
1087 .gpu_addr = gpu_addr,
1088 .cpu_addr = cpu_addr,
1089 .resources = *solution,
1090 };
1091 }
1092 if (overlap.gpu_addr > gpu_addr) {
1093 return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1094 }
1095 // if overlap.gpu_addr < gpu_addr
1096 return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1097}
1098
1099bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
1100 // If either of the layer strides is zero, we can assume they are compatible
1101 // These images generally come from rendertargets
1102 if (lhs.layer_stride == 0) {
1103 return true;
1104 }
1105 if (rhs.layer_stride == 0) {
1106 return true;
1107 }
1108 // It's definitely compatible if the layer stride matches
1109 if (lhs.layer_stride == rhs.layer_stride) {
1110 return true;
1111 }
1112 // Although we also have to compare for cases where it can be unaligned
1113 // This can happen if the image doesn't have layers, so the stride is not aligned
1114 if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
1115 return true;
1116 }
1117 return false;
1118}
1119
1120std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
1121 GPUVAddr candidate_addr, RelaxedOptions options,
1122 bool broken_views) {
1123 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
1124 if (!base) {
1125 return std::nullopt;
1126 }
1127 const ImageInfo& existing = image.info;
1128 if (False(options & RelaxedOptions::Format)) {
1129 if (!IsViewCompatible(existing.format, candidate.format, broken_views)) {
1130 return std::nullopt;
1131 }
1132 }
1133 if (!IsLayerStrideCompatible(existing, candidate)) {
1134 return std::nullopt;
1135 }
1136 if (existing.type != candidate.type) {
1137 return std::nullopt;
1138 }
1139 if (False(options & RelaxedOptions::Samples)) {
1140 if (existing.num_samples != candidate.num_samples) {
1141 return std::nullopt;
1142 }
1143 }
1144 if (existing.resources.levels < candidate.resources.levels + base->level) {
1145 return std::nullopt;
1146 }
1147 if (existing.type == ImageType::e3D) {
1148 const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
1149 if (mip_depth < candidate.size.depth + base->layer) {
1150 return std::nullopt;
1151 }
1152 } else {
1153 if (existing.resources.layers < candidate.resources.layers + base->layer) {
1154 return std::nullopt;
1155 }
1156 }
1157 const bool strict_size = False(options & RelaxedOptions::Size);
1158 if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
1159 return std::nullopt;
1160 }
1161 // TODO: compare block sizes
1162 return base;
1163}
1164
1165bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
1166 RelaxedOptions options, bool broken_views) {
1167 return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value();
1168}
1169
1170void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
1171 const ImageBase* src) {
1172 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1173 src_info.format = src->info.format;
1174 }
1175 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1176 dst_info.format = dst->info.format;
1177 }
1178 if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1179 dst_info.format = src->info.format;
1180 }
1181 if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1182 src_info.format = src->info.format;
1183 }
1184}
1185
1186u32 MapSizeBytes(const ImageBase& image) {
1187 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
1188 return image.guest_size_bytes;
1189 } else if (True(image.flags & ImageFlagBits::Converted)) {
1190 return image.converted_size_bytes;
1191 } else {
1192 return image.unswizzled_size_bytes;
1193 }
1194}
1195
1196using P = PixelFormat;
1197
1198static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000);
1199static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
1200
1201static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00);
1202static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) ==
1203 0x50d200);
1204
1205static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0);
1206static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000);
1207static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000);
1208static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000);
1209static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000);
1210static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000);
1211static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000);
1212static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400);
1213static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600);
1214static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800);
1215
1216constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
1217 u32 tile_width_spacing, u32 level) {
1218 const Extent3D size{width, height, 1};
1219 const Extent3D block{0, block_height, 0};
1220 const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
1221 return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
1222}
1223
1224static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800);
1225static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
1226static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
1227
1228static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
1229 "Tile width spacing is not working");
1230static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
1231 "Compressed tile width spacing is not working");
1232
1233} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..52a9207d6
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,109 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <span>
9
10#include "common/common_types.h"
11
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/image_base.h"
15#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/types.h"
17#include "video_core/textures/texture.h"
18
19namespace VideoCommon {
20
21using Tegra::Texture::TICEntry;
22
23struct OverlapResult {
24 GPUVAddr gpu_addr;
25 VAddr cpu_addr;
26 SubresourceExtent resources;
27};
28
29[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
30
31[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
32
33[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
34
35[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
36
37[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
38
39[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
40 const ImageInfo& info) noexcept;
41
42[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
43
44[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
45
46[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
47
48[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
49 const Tegra::Texture::TICEntry& config) noexcept;
50
51[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
52
53[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
54 const ImageInfo& src,
55 SubresourceBase base);
56
57[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
58
59[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
60 GPUVAddr gpu_addr, const ImageInfo& info,
61 std::span<u8> output);
62
63[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
64 const ImageBase& image, std::span<u8> output);
65
66void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
67 std::span<BufferImageCopy> copies);
68
69[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
70
71[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
72
73[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
74
75[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
76
77void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
78 std::span<const BufferImageCopy> copies, std::span<const u8> memory);
79
80[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
81 const ImageInfo& overlap_info, u32 new_level,
82 u32 overlap_level, bool strict_size) noexcept;
83
84[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
85 bool strict_size) noexcept;
86
87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
88 GPUVAddr gpu_addr, VAddr cpu_addr,
89 const ImageBase& overlap,
90 bool strict_size, bool broken_views);
91
92[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
93
94[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
95 const ImageBase& image,
96 GPUVAddr candidate_addr,
97 RelaxedOptions options,
98 bool broken_views);
99
100[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
101 GPUVAddr candidate_addr, RelaxedOptions options,
102 bool broken_views);
103
104void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
105 const ImageBase* src);
106
107[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
108
109} // namespace VideoCommon