summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-12-30 02:25:23 -0300
committerGravatar ReinUsesLisp2020-12-30 03:38:50 -0300
commit9764c13d6d2977903f407761b27d847c0056e1c4 (patch)
treef6f5d6d6379b0404147969e7d1f548ed3d49ca01 /src/video_core/texture_cache
parentvideo_core: Add a delayed destruction ring abstraction (diff)
downloadyuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.gz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.tar.xz
yuzu-9764c13d6d2977903f407761b27d847c0056e1c4.zip
video_core: Rewrite the texture cache
The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage.The current texture cache has several points that hurt maintainability and performance. It's easy to break unrelated parts of the cache when doing minor changes. The cache can easily forget valuable information about the cached textures by CPU writes or simply by its normal usage. This commit aims to address those issues.
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.cpp70
-rw-r--r--src/video_core/texture_cache/accelerated_swizzle.h45
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp97
-rw-r--r--src/video_core/texture_cache/decode_bc4.h16
-rw-r--r--src/video_core/texture_cache/descriptor_table.h82
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp380
-rw-r--r--src/video_core/texture_cache/format_lookup_table.h42
-rw-r--r--src/video_core/texture_cache/formatter.cpp95
-rw-r--r--src/video_core/texture_cache/formatter.h263
-rw-r--r--src/video_core/texture_cache/image_base.cpp216
-rw-r--r--src/video_core/texture_cache/image_base.h83
-rw-r--r--src/video_core/texture_cache/image_info.cpp189
-rw-r--r--src/video_core/texture_cache/image_info.h38
-rw-r--r--src/video_core/texture_cache/image_view_base.cpp41
-rw-r--r--src/video_core/texture_cache/image_view_base.h47
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp88
-rw-r--r--src/video_core/texture_cache/image_view_info.h50
-rw-r--r--src/video_core/texture_cache/render_targets.h51
-rw-r--r--src/video_core/texture_cache/samples_helper.h55
-rw-r--r--src/video_core/texture_cache/slot_vector.h156
-rw-r--r--src/video_core/texture_cache/surface_base.cpp299
-rw-r--r--src/video_core/texture_cache/surface_base.h333
-rw-r--r--src/video_core/texture_cache/surface_params.cpp445
-rw-r--r--src/video_core/texture_cache/surface_params.h294
-rw-r--r--src/video_core/texture_cache/surface_view.cpp27
-rw-r--r--src/video_core/texture_cache/surface_view.h68
-rw-r--r--src/video_core/texture_cache/texture_cache.h2397
-rw-r--r--src/video_core/texture_cache/types.h140
-rw-r--r--src/video_core/texture_cache/util.cpp1232
-rw-r--r--src/video_core/texture_cache/util.h107
31 files changed, 4633 insertions, 2849 deletions
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp
new file mode 100644
index 000000000..a4fc1184b
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.cpp
@@ -0,0 +1,70 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/alignment.h"
9#include "common/common_types.h"
10#include "common/div_ceil.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/accelerated_swizzle.h"
13#include "video_core/texture_cache/util.h"
14#include "video_core/textures/decoders.h"
15
16namespace VideoCommon::Accelerated {
17
18using Tegra::Texture::GOB_SIZE_SHIFT;
19using Tegra::Texture::GOB_SIZE_X;
20using Tegra::Texture::GOB_SIZE_X_SHIFT;
21using Tegra::Texture::GOB_SIZE_Y_SHIFT;
22using VideoCore::Surface::BytesPerBlock;
23
24BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle,
25 const ImageInfo& info) {
26 const Extent3D block = swizzle.block;
27 const Extent3D num_tiles = swizzle.num_tiles;
28 const u32 bytes_per_block = BytesPerBlock(info.format);
29 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
30 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
31 const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
32 return BlockLinearSwizzle2DParams{
33 .origin{0, 0, 0},
34 .destination{0, 0, 0},
35 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
36 .layer_stride = info.layer_stride,
37 .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth),
38 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
39 .block_height = block.height,
40 .block_height_mask = (1U << block.height) - 1,
41 };
42}
43
44BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle,
45 const ImageInfo& info) {
46 const Extent3D block = swizzle.block;
47 const Extent3D num_tiles = swizzle.num_tiles;
48 const u32 bytes_per_block = BytesPerBlock(info.format);
49 const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level);
50 const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block;
51
52 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT;
53 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth);
54 const u32 slice_size =
55 Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size;
56 return BlockLinearSwizzle3DParams{
57 .origin{0, 0, 0},
58 .destination{0, 0, 0},
59 .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)),
60 .slice_size = slice_size,
61 .block_size = block_size,
62 .x_shift = GOB_SIZE_SHIFT + block.height + block.depth,
63 .block_height = block.height,
64 .block_height_mask = (1U << block.height) - 1,
65 .block_depth = block.depth,
66 .block_depth_mask = (1U << block.depth) - 1,
67 };
68}
69
70} // namespace VideoCommon::Accelerated \ No newline at end of file
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h
new file mode 100644
index 000000000..6ec5c78c4
--- /dev/null
+++ b/src/video_core/texture_cache/accelerated_swizzle.h
@@ -0,0 +1,45 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/types.h"
12
13namespace VideoCommon::Accelerated {
14
15struct BlockLinearSwizzle2DParams {
16 std::array<u32, 3> origin;
17 std::array<s32, 3> destination;
18 u32 bytes_per_block_log2;
19 u32 layer_stride;
20 u32 block_size;
21 u32 x_shift;
22 u32 block_height;
23 u32 block_height_mask;
24};
25
26struct BlockLinearSwizzle3DParams {
27 std::array<u32, 3> origin;
28 std::array<s32, 3> destination;
29 u32 bytes_per_block_log2;
30 u32 slice_size;
31 u32 block_size;
32 u32 x_shift;
33 u32 block_height;
34 u32 block_height_mask;
35 u32 block_depth;
36 u32 block_depth_mask;
37};
38
39[[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(
40 const SwizzleParameters& swizzle, const ImageInfo& info);
41
42[[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(
43 const SwizzleParameters& swizzle, const ImageInfo& info);
44
45} // namespace VideoCommon::Accelerated
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
deleted file mode 100644
index 5b475fe06..000000000
--- a/src/video_core/texture_cache/copy_params.h
+++ /dev/null
@@ -1,36 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x_, u32 source_y_, u32 source_z_, u32 dest_x_, u32 dest_y_,
13 u32 dest_z_, u32 source_level_, u32 dest_level_, u32 width_, u32 height_,
14 u32 depth_)
15 : source_x{source_x_}, source_y{source_y_}, source_z{source_z_}, dest_x{dest_x_},
16 dest_y{dest_y_}, dest_z{dest_z_}, source_level{source_level_},
17 dest_level{dest_level_}, width{width_}, height{height_}, depth{depth_} {}
18
19 constexpr CopyParams(u32 width_, u32 height_, u32 depth_, u32 level_)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level_},
21 dest_level{level_}, width{width_}, height{height_}, depth{depth_} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
new file mode 100644
index 000000000..017327975
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.cpp
@@ -0,0 +1,97 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <span>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/texture_cache/decode_bc4.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
17[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
18 const u32 code_offset = 16 + 3 * (4 * y + x);
19 const u32 code = (bits >> code_offset) & 7;
20 const u32 red0 = (bits >> 0) & 0xff;
21 const u32 red1 = (bits >> 8) & 0xff;
22 if (red0 > red1) {
23 switch (code) {
24 case 0:
25 return red0;
26 case 1:
27 return red1;
28 case 2:
29 return (6 * red0 + 1 * red1) / 7;
30 case 3:
31 return (5 * red0 + 2 * red1) / 7;
32 case 4:
33 return (4 * red0 + 3 * red1) / 7;
34 case 5:
35 return (3 * red0 + 4 * red1) / 7;
36 case 6:
37 return (2 * red0 + 5 * red1) / 7;
38 case 7:
39 return (1 * red0 + 6 * red1) / 7;
40 }
41 } else {
42 switch (code) {
43 case 0:
44 return red0;
45 case 1:
46 return red1;
47 case 2:
48 return (4 * red0 + 1 * red1) / 5;
49 case 3:
50 return (3 * red0 + 2 * red1) / 5;
51 case 4:
52 return (2 * red0 + 3 * red1) / 5;
53 case 5:
54 return (1 * red0 + 4 * red1) / 5;
55 case 6:
56 return 0;
57 case 7:
58 return 0xff;
59 }
60 }
61 return 0;
62}
63
64void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
65 UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
66 UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
67 static constexpr u32 BLOCK_SIZE = 4;
68 size_t input_offset = 0;
69 for (u32 slice = 0; slice < extent.depth; ++slice) {
70 for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
71 for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
72 u64 bits;
73 std::memcpy(&bits, &input[input_offset], sizeof(bits));
74 input_offset += sizeof(bits);
75
76 for (u32 y = 0; y < BLOCK_SIZE; ++y) {
77 for (u32 x = 0; x < BLOCK_SIZE; ++x) {
78 const u32 linear_z = slice;
79 const u32 linear_y = block_y * BLOCK_SIZE + y;
80 const u32 linear_x = block_x * BLOCK_SIZE + x;
81 const u32 offset_z = linear_z * extent.width * extent.height;
82 const u32 offset_y = linear_y * extent.width;
83 const u32 offset_x = linear_x;
84 const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
85 const u32 color = DecompressBlock(bits, x, y);
86 output[output_offset + 0] = static_cast<u8>(color);
87 output[output_offset + 1] = 0;
88 output[output_offset + 2] = 0;
89 output[output_offset + 3] = 0xff;
90 }
91 }
92 }
93 }
94 }
95}
96
97} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h
new file mode 100644
index 000000000..63fb23508
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc4.h
@@ -0,0 +1,16 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <span>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
15
16} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h
new file mode 100644
index 000000000..3a03b786f
--- /dev/null
+++ b/src/video_core/texture_cache/descriptor_table.h
@@ -0,0 +1,82 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/div_ceil.h"
12#include "common/logging/log.h"
13#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h"
15
16namespace VideoCommon {
17
18template <typename Descriptor>
19class DescriptorTable {
20public:
21 explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
22
23 [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) {
24 [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) {
25 return false;
26 }
27 Refresh(gpu_addr, limit);
28 return true;
29 }
30
31 void Invalidate() noexcept {
32 std::ranges::fill(read_descriptors, 0);
33 }
34
35 [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
36 DEBUG_ASSERT(index <= current_limit);
37 const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
38 std::pair<Descriptor, bool> result;
39 gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
40 if (IsDescriptorRead(index)) {
41 result.second = result.first != descriptors[index];
42 } else {
43 MarkDescriptorAsRead(index);
44 result.second = true;
45 }
46 if (result.second) {
47 descriptors[index] = result.first;
48 }
49 return result;
50 }
51
52 [[nodiscard]] u32 Limit() const noexcept {
53 return current_limit;
54 }
55
56private:
57 void Refresh(GPUVAddr gpu_addr, u32 limit) {
58 current_gpu_addr = gpu_addr;
59 current_limit = limit;
60
61 const size_t num_descriptors = static_cast<size_t>(limit) + 1;
62 read_descriptors.clear();
63 read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
64 descriptors.resize(num_descriptors);
65 }
66
67 void MarkDescriptorAsRead(u32 index) noexcept {
68 read_descriptors[index / 64] |= 1ULL << (index % 64);
69 }
70
71 [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
72 return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
73 }
74
75 Tegra::MemoryManager& gpu_memory;
76 GPUVAddr current_gpu_addr{};
77 u32 current_limit{};
78 std::vector<u64> read_descriptors;
79 std::vector<Descriptor> descriptors;
80};
81
82} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7938d71eb..ddfb726fe 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include "common/common_types.h" 5#include "common/common_types.h"
7#include "common/logging/log.h" 6#include "common/logging/log.h"
8#include "video_core/texture_cache/format_lookup_table.h" 7#include "video_core/texture_cache/format_lookup_table.h"
@@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM;
20constexpr auto SINT = ComponentType::SINT; 19constexpr auto SINT = ComponentType::SINT;
21constexpr auto UINT = ComponentType::UINT; 20constexpr auto UINT = ComponentType::UINT;
22constexpr auto FLOAT = ComponentType::FLOAT; 21constexpr auto FLOAT = ComponentType::FLOAT;
23constexpr bool C = false; // Normal color 22constexpr bool LINEAR = false;
24constexpr bool S = true; // Srgb 23constexpr bool SRGB = true;
25 24
26struct Table { 25constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component,
27 constexpr Table(TextureFormat texture_format_, bool is_srgb_, ComponentType red_component_, 26 ComponentType blue_component, ComponentType alpha_component, bool is_srgb) {
28 ComponentType green_component_, ComponentType blue_component_, 27 u32 hash = is_srgb ? 1 : 0;
29 ComponentType alpha_component_, PixelFormat pixel_format_) 28 hash |= static_cast<u32>(red_component) << 1;
30 : texture_format{texture_format_}, pixel_format{pixel_format_}, 29 hash |= static_cast<u32>(green_component) << 4;
31 red_component{red_component_}, green_component{green_component_}, 30 hash |= static_cast<u32>(blue_component) << 7;
32 blue_component{blue_component_}, alpha_component{alpha_component_}, is_srgb{is_srgb_} {} 31 hash |= static_cast<u32>(alpha_component) << 10;
33 32 hash |= static_cast<u32>(format) << 13;
34 TextureFormat texture_format; 33 return hash;
35 PixelFormat pixel_format; 34}
36 ComponentType red_component;
37 ComponentType green_component;
38 ComponentType blue_component;
39 ComponentType alpha_component;
40 bool is_srgb;
41};
42constexpr std::array<Table, 86> DefinitionTable = {{
43 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM},
44 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM},
45 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT},
46 {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT},
47 {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB},
48
49 {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM},
50
51 {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM},
52 {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT},
53
54 {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM},
55
56 {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM},
57
58 {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM},
59 {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM},
60 {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT},
61 {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT},
62
63 {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM},
64 {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM},
65 {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT},
66 {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT},
67
68 {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM},
69 {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM},
70 {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT},
71 {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT},
72 {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT},
73
74 {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT},
75 {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM},
76 {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM},
77 {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT},
78 {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT},
79
80 {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT},
81 {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM},
82 {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM},
83 {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT},
84 {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT},
85
86 {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT},
87
88 {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT},
89 {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT},
90 {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT},
91
92 {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT},
93
94 {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT},
95 {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT},
96 {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT},
97
98 {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT},
99 {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT},
100 {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT},
101
102 {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT},
103
104 {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT},
105 {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM},
106 {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
107 {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM},
108 {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT},
109
110 {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM},
111 {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB},
112
113 {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM},
114 {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB},
115
116 {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM},
117 {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB},
118
119 {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM},
120 {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM},
121
122 {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM},
123 {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM},
124
125 {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM},
126 {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB},
127
128 {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT},
129 {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT},
130
131 {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM},
132 {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
133
134 {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM},
135 {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
136
137 {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM},
138 {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
139
140 {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM},
141 {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
142
143 {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM},
144 {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
145
146 {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM},
147 {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
148
149 {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM},
150 {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
151
152 {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM},
153 {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
154
155 {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM},
156 {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
157
158 {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM},
159 {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
160 35
161 {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, 36constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) {
162 {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, 37 return Hash(format, component, component, component, component, is_srgb);
163}}; 38}
164 39
165} // Anonymous namespace 40} // Anonymous namespace
166 41
167FormatLookupTable::FormatLookupTable() { 42PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green,
168 table.fill(static_cast<u8>(PixelFormat::Invalid)); 43 ComponentType blue, ComponentType alpha,
169 44 bool is_srgb) noexcept {
170 for (const auto& entry : DefinitionTable) { 45 switch (Hash(format, red, green, blue, alpha, is_srgb)) {
171 table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, 46 case Hash(TextureFormat::A8R8G8B8, UNORM):
172 entry.green_component, entry.blue_component, entry.alpha_component)] = 47 return PixelFormat::A8B8G8R8_UNORM;
173 static_cast<u8>(entry.pixel_format); 48 case Hash(TextureFormat::A8R8G8B8, SNORM):
174 } 49 return PixelFormat::A8B8G8R8_SNORM;
175} 50 case Hash(TextureFormat::A8R8G8B8, UINT):
176 51 return PixelFormat::A8B8G8R8_UINT;
177PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, 52 case Hash(TextureFormat::A8R8G8B8, SINT):
178 ComponentType red_component, 53 return PixelFormat::A8B8G8R8_SINT;
179 ComponentType green_component, 54 case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB):
180 ComponentType blue_component, 55 return PixelFormat::A8B8G8R8_SRGB;
181 ComponentType alpha_component) const noexcept { 56 case Hash(TextureFormat::B5G6R5, UNORM):
182 const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( 57 return PixelFormat::B5G6R5_UNORM;
183 format, is_srgb, red_component, green_component, blue_component, alpha_component)]); 58 case Hash(TextureFormat::A2B10G10R10, UNORM):
184 // [[likely]] 59 return PixelFormat::A2B10G10R10_UNORM;
185 if (pixel_format != PixelFormat::Invalid) { 60 case Hash(TextureFormat::A2B10G10R10, UINT):
186 return pixel_format; 61 return PixelFormat::A2B10G10R10_UINT;
62 case Hash(TextureFormat::A1B5G5R5, UNORM):
63 return PixelFormat::A1B5G5R5_UNORM;
64 case Hash(TextureFormat::A4B4G4R4, UNORM):
65 return PixelFormat::A4B4G4R4_UNORM;
66 case Hash(TextureFormat::R8, UNORM):
67 return PixelFormat::R8_UNORM;
68 case Hash(TextureFormat::R8, SNORM):
69 return PixelFormat::R8_SNORM;
70 case Hash(TextureFormat::R8, UINT):
71 return PixelFormat::R8_UINT;
72 case Hash(TextureFormat::R8, SINT):
73 return PixelFormat::R8_SINT;
74 case Hash(TextureFormat::R8G8, UNORM):
75 return PixelFormat::R8G8_UNORM;
76 case Hash(TextureFormat::R8G8, SNORM):
77 return PixelFormat::R8G8_SNORM;
78 case Hash(TextureFormat::R8G8, UINT):
79 return PixelFormat::R8G8_UINT;
80 case Hash(TextureFormat::R8G8, SINT):
81 return PixelFormat::R8G8_SINT;
82 case Hash(TextureFormat::R16G16B16A16, FLOAT):
83 return PixelFormat::R16G16B16A16_FLOAT;
84 case Hash(TextureFormat::R16G16B16A16, UNORM):
85 return PixelFormat::R16G16B16A16_UNORM;
86 case Hash(TextureFormat::R16G16B16A16, SNORM):
87 return PixelFormat::R16G16B16A16_SNORM;
88 case Hash(TextureFormat::R16G16B16A16, UINT):
89 return PixelFormat::R16G16B16A16_UINT;
90 case Hash(TextureFormat::R16G16B16A16, SINT):
91 return PixelFormat::R16G16B16A16_SINT;
92 case Hash(TextureFormat::R16G16, FLOAT):
93 return PixelFormat::R16G16_FLOAT;
94 case Hash(TextureFormat::R16G16, UNORM):
95 return PixelFormat::R16G16_UNORM;
96 case Hash(TextureFormat::R16G16, SNORM):
97 return PixelFormat::R16G16_SNORM;
98 case Hash(TextureFormat::R16G16, UINT):
99 return PixelFormat::R16G16_UINT;
100 case Hash(TextureFormat::R16G16, SINT):
101 return PixelFormat::R16G16_SINT;
102 case Hash(TextureFormat::R16, FLOAT):
103 return PixelFormat::R16_FLOAT;
104 case Hash(TextureFormat::R16, UNORM):
105 return PixelFormat::R16_UNORM;
106 case Hash(TextureFormat::R16, SNORM):
107 return PixelFormat::R16_SNORM;
108 case Hash(TextureFormat::R16, UINT):
109 return PixelFormat::R16_UINT;
110 case Hash(TextureFormat::R16, SINT):
111 return PixelFormat::R16_SINT;
112 case Hash(TextureFormat::B10G11R11, FLOAT):
113 return PixelFormat::B10G11R11_FLOAT;
114 case Hash(TextureFormat::R32G32B32A32, FLOAT):
115 return PixelFormat::R32G32B32A32_FLOAT;
116 case Hash(TextureFormat::R32G32B32A32, UINT):
117 return PixelFormat::R32G32B32A32_UINT;
118 case Hash(TextureFormat::R32G32B32A32, SINT):
119 return PixelFormat::R32G32B32A32_SINT;
120 case Hash(TextureFormat::R32G32B32, FLOAT):
121 return PixelFormat::R32G32B32_FLOAT;
122 case Hash(TextureFormat::R32G32, FLOAT):
123 return PixelFormat::R32G32_FLOAT;
124 case Hash(TextureFormat::R32G32, UINT):
125 return PixelFormat::R32G32_UINT;
126 case Hash(TextureFormat::R32G32, SINT):
127 return PixelFormat::R32G32_SINT;
128 case Hash(TextureFormat::R32, FLOAT):
129 return PixelFormat::R32_FLOAT;
130 case Hash(TextureFormat::R32, UINT):
131 return PixelFormat::R32_UINT;
132 case Hash(TextureFormat::R32, SINT):
133 return PixelFormat::R32_SINT;
134 case Hash(TextureFormat::E5B9G9R9, FLOAT):
135 return PixelFormat::E5B9G9R9_FLOAT;
136 case Hash(TextureFormat::D32, FLOAT):
137 return PixelFormat::D32_FLOAT;
138 case Hash(TextureFormat::D16, UNORM):
139 return PixelFormat::D16_UNORM;
140 case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR):
141 return PixelFormat::S8_UINT_D24_UNORM;
142 case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR):
143 return PixelFormat::S8_UINT_D24_UNORM;
144 case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR):
145 return PixelFormat::D32_FLOAT_S8_UINT;
146 case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR):
147 return PixelFormat::BC1_RGBA_UNORM;
148 case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB):
149 return PixelFormat::BC1_RGBA_SRGB;
150 case Hash(TextureFormat::BC2, UNORM, LINEAR):
151 return PixelFormat::BC2_UNORM;
152 case Hash(TextureFormat::BC2, UNORM, SRGB):
153 return PixelFormat::BC2_SRGB;
154 case Hash(TextureFormat::BC3, UNORM, LINEAR):
155 return PixelFormat::BC3_UNORM;
156 case Hash(TextureFormat::BC3, UNORM, SRGB):
157 return PixelFormat::BC3_SRGB;
158 case Hash(TextureFormat::BC4, UNORM):
159 return PixelFormat::BC4_UNORM;
160 case Hash(TextureFormat::BC4, SNORM):
161 return PixelFormat::BC4_SNORM;
162 case Hash(TextureFormat::BC5, UNORM):
163 return PixelFormat::BC5_UNORM;
164 case Hash(TextureFormat::BC5, SNORM):
165 return PixelFormat::BC5_SNORM;
166 case Hash(TextureFormat::BC7, UNORM, LINEAR):
167 return PixelFormat::BC7_UNORM;
168 case Hash(TextureFormat::BC7, UNORM, SRGB):
169 return PixelFormat::BC7_SRGB;
170 case Hash(TextureFormat::BC6H_SFLOAT, FLOAT):
171 return PixelFormat::BC6H_SFLOAT;
172 case Hash(TextureFormat::BC6H_UFLOAT, FLOAT):
173 return PixelFormat::BC6H_UFLOAT;
174 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR):
175 return PixelFormat::ASTC_2D_4X4_UNORM;
176 case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB):
177 return PixelFormat::ASTC_2D_4X4_SRGB;
178 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR):
179 return PixelFormat::ASTC_2D_5X4_UNORM;
180 case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB):
181 return PixelFormat::ASTC_2D_5X4_SRGB;
182 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR):
183 return PixelFormat::ASTC_2D_5X5_UNORM;
184 case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB):
185 return PixelFormat::ASTC_2D_5X5_SRGB;
186 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR):
187 return PixelFormat::ASTC_2D_8X8_UNORM;
188 case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB):
189 return PixelFormat::ASTC_2D_8X8_SRGB;
190 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR):
191 return PixelFormat::ASTC_2D_8X5_UNORM;
192 case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB):
193 return PixelFormat::ASTC_2D_8X5_SRGB;
194 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR):
195 return PixelFormat::ASTC_2D_10X8_UNORM;
196 case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB):
197 return PixelFormat::ASTC_2D_10X8_SRGB;
198 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR):
199 return PixelFormat::ASTC_2D_6X6_UNORM;
200 case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB):
201 return PixelFormat::ASTC_2D_6X6_SRGB;
202 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR):
203 return PixelFormat::ASTC_2D_10X10_UNORM;
204 case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB):
205 return PixelFormat::ASTC_2D_10X10_SRGB;
206 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR):
207 return PixelFormat::ASTC_2D_12X12_UNORM;
208 case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB):
209 return PixelFormat::ASTC_2D_12X12_SRGB;
210 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR):
211 return PixelFormat::ASTC_2D_8X6_UNORM;
212 case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB):
213 return PixelFormat::ASTC_2D_8X6_SRGB;
214 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR):
215 return PixelFormat::ASTC_2D_6X5_UNORM;
216 case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB):
217 return PixelFormat::ASTC_2D_6X5_SRGB;
187 } 218 }
188 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", 219 UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
189 static_cast<int>(format), is_srgb, static_cast<int>(red_component), 220 static_cast<int>(format), is_srgb, static_cast<int>(red),
190 static_cast<int>(green_component), static_cast<int>(blue_component), 221 static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha));
191 static_cast<int>(alpha_component));
192 return PixelFormat::A8B8G8R8_UNORM; 222 return PixelFormat::A8B8G8R8_UNORM;
193} 223}
194 224
195void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
196 ComponentType green_component, ComponentType blue_component,
197 ComponentType alpha_component, PixelFormat pixel_format) {}
198
199std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
200 ComponentType red_component,
201 ComponentType green_component,
202 ComponentType blue_component,
203 ComponentType alpha_component) noexcept {
204 const auto format_index = static_cast<std::size_t>(format);
205 const auto red_index = static_cast<std::size_t>(red_component);
206 const auto green_index = static_cast<std::size_t>(green_component);
207 const auto blue_index = static_cast<std::size_t>(blue_component);
208 const auto alpha_index = static_cast<std::size_t>(alpha_component);
209 const std::size_t srgb_index = is_srgb ? 1 : 0;
210
211 return format_index * PerFormat +
212 srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
213 alpha_index * PerComponent * PerComponent * PerComponent +
214 blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
215}
216
217} // namespace VideoCommon 225} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
index aa77e0a5a..729533999 100644
--- a/src/video_core/texture_cache/format_lookup_table.h
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -4,48 +4,14 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <limits>
9#include "video_core/surface.h" 7#include "video_core/surface.h"
10#include "video_core/textures/texture.h" 8#include "video_core/textures/texture.h"
11 9
12namespace VideoCommon { 10namespace VideoCommon {
13 11
14class FormatLookupTable { 12VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo(
15public: 13 Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component,
16 explicit FormatLookupTable(); 14 Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component,
17 15 Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept;
18 VideoCore::Surface::PixelFormat GetPixelFormat(
19 Tegra::Texture::TextureFormat format, bool is_srgb,
20 Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
21 Tegra::Texture::ComponentType blue_component,
22 Tegra::Texture::ComponentType alpha_component) const noexcept;
23
24private:
25 static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
26
27 static constexpr std::size_t NumTextureFormats = 128;
28
29 static constexpr std::size_t PerComponent = 8;
30 static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
31 static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
32 static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
33 static constexpr std::size_t PerFormat = PerComponents4 * 2;
34
35 static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
36 Tegra::Texture::ComponentType red_component,
37 Tegra::Texture::ComponentType green_component,
38 Tegra::Texture::ComponentType blue_component,
39 Tegra::Texture::ComponentType alpha_component) noexcept;
40
41 void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
42 Tegra::Texture::ComponentType red_component,
43 Tegra::Texture::ComponentType green_component,
44 Tegra::Texture::ComponentType blue_component,
45 Tegra::Texture::ComponentType alpha_component,
46 VideoCore::Surface::PixelFormat pixel_format);
47
48 std::array<u8, NumTextureFormats * PerFormat> table;
49};
50 16
51} // namespace VideoCommon 17} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
new file mode 100644
index 000000000..d10ba4ccd
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7
8#include "video_core/texture_cache/formatter.h"
9#include "video_core/texture_cache/image_base.h"
10#include "video_core/texture_cache/image_info.h"
11#include "video_core/texture_cache/image_view_base.h"
12#include "video_core/texture_cache/render_targets.h"
13
14namespace VideoCommon {
15
16std::string Name(const ImageBase& image) {
17 const GPUVAddr gpu_addr = image.gpu_addr;
18 const ImageInfo& info = image.info;
19 const u32 width = info.size.width;
20 const u32 height = info.size.height;
21 const u32 depth = info.size.depth;
22 const u32 num_layers = image.info.resources.layers;
23 const u32 num_levels = image.info.resources.levels;
24 std::string resource;
25 if (num_layers > 1) {
26 resource += fmt::format(":L{}", num_layers);
27 }
28 if (num_levels > 1) {
29 resource += fmt::format(":M{}", num_levels);
30 }
31 switch (image.info.type) {
32 case ImageType::e1D:
33 return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource);
34 case ImageType::e2D:
35 return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource);
36 case ImageType::e3D:
37 return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource);
38 case ImageType::Linear:
39 return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height);
40 case ImageType::Buffer:
41 return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width);
42 }
43 return "Invalid";
44}
45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth;
50 const u32 num_levels = image_view.range.extent.levels;
51 const u32 num_layers = image_view.range.extent.layers;
52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) {
55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D:
58 return fmt::format("ImageView 2D {}x{}{}", width, height, level);
59 case ImageViewType::Cube:
60 return fmt::format("ImageView Cube {}x{}{}", width, height, level);
61 case ImageViewType::e3D:
62 return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level);
63 case ImageViewType::e1DArray:
64 return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers);
65 case ImageViewType::e2DArray:
66 return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers);
67 case ImageViewType::CubeArray:
68 return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers);
69 case ImageViewType::Rect:
70 return fmt::format("ImageView Rect {}x{}{}", width, height, level);
71 case ImageViewType::Buffer:
72 return fmt::format("BufferView {}", width);
73 }
74 return "Invalid";
75}
76
77std::string Name(const RenderTargets& render_targets) {
78 std::string_view debug_prefix;
79 const auto num_color = std::ranges::count_if(
80 render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); });
81 if (render_targets.depth_buffer_id) {
82 debug_prefix = num_color > 0 ? "R" : "Z";
83 } else {
84 debug_prefix = num_color > 0 ? "C" : "X";
85 }
86 const Extent2D size = render_targets.size;
87 if (num_color > 0) {
88 return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width,
89 size.height);
90 } else {
91 return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height);
92 }
93}
94
95} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
new file mode 100644
index 000000000..a48413983
--- /dev/null
+++ b/src/video_core/texture_cache/formatter.h
@@ -0,0 +1,263 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "video_core/surface.h"
12#include "video_core/texture_cache/types.h"
13
14template <>
15struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> {
16 template <typename FormatContext>
17 auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) {
18 using VideoCore::Surface::PixelFormat;
19 const string_view name = [format] {
20 switch (format) {
21 case PixelFormat::A8B8G8R8_UNORM:
22 return "A8B8G8R8_UNORM";
23 case PixelFormat::A8B8G8R8_SNORM:
24 return "A8B8G8R8_SNORM";
25 case PixelFormat::A8B8G8R8_SINT:
26 return "A8B8G8R8_SINT";
27 case PixelFormat::A8B8G8R8_UINT:
28 return "A8B8G8R8_UINT";
29 case PixelFormat::R5G6B5_UNORM:
30 return "R5G6B5_UNORM";
31 case PixelFormat::B5G6R5_UNORM:
32 return "B5G6R5_UNORM";
33 case PixelFormat::A1R5G5B5_UNORM:
34 return "A1R5G5B5_UNORM";
35 case PixelFormat::A2B10G10R10_UNORM:
36 return "A2B10G10R10_UNORM";
37 case PixelFormat::A2B10G10R10_UINT:
38 return "A2B10G10R10_UINT";
39 case PixelFormat::A1B5G5R5_UNORM:
40 return "A1B5G5R5_UNORM";
41 case PixelFormat::R8_UNORM:
42 return "R8_UNORM";
43 case PixelFormat::R8_SNORM:
44 return "R8_SNORM";
45 case PixelFormat::R8_SINT:
46 return "R8_SINT";
47 case PixelFormat::R8_UINT:
48 return "R8_UINT";
49 case PixelFormat::R16G16B16A16_FLOAT:
50 return "R16G16B16A16_FLOAT";
51 case PixelFormat::R16G16B16A16_UNORM:
52 return "R16G16B16A16_UNORM";
53 case PixelFormat::R16G16B16A16_SNORM:
54 return "R16G16B16A16_SNORM";
55 case PixelFormat::R16G16B16A16_SINT:
56 return "R16G16B16A16_SINT";
57 case PixelFormat::R16G16B16A16_UINT:
58 return "R16G16B16A16_UINT";
59 case PixelFormat::B10G11R11_FLOAT:
60 return "B10G11R11_FLOAT";
61 case PixelFormat::R32G32B32A32_UINT:
62 return "R32G32B32A32_UINT";
63 case PixelFormat::BC1_RGBA_UNORM:
64 return "BC1_RGBA_UNORM";
65 case PixelFormat::BC2_UNORM:
66 return "BC2_UNORM";
67 case PixelFormat::BC3_UNORM:
68 return "BC3_UNORM";
69 case PixelFormat::BC4_UNORM:
70 return "BC4_UNORM";
71 case PixelFormat::BC4_SNORM:
72 return "BC4_SNORM";
73 case PixelFormat::BC5_UNORM:
74 return "BC5_UNORM";
75 case PixelFormat::BC5_SNORM:
76 return "BC5_SNORM";
77 case PixelFormat::BC7_UNORM:
78 return "BC7_UNORM";
79 case PixelFormat::BC6H_UFLOAT:
80 return "BC6H_UFLOAT";
81 case PixelFormat::BC6H_SFLOAT:
82 return "BC6H_SFLOAT";
83 case PixelFormat::ASTC_2D_4X4_UNORM:
84 return "ASTC_2D_4X4_UNORM";
85 case PixelFormat::B8G8R8A8_UNORM:
86 return "B8G8R8A8_UNORM";
87 case PixelFormat::R32G32B32A32_FLOAT:
88 return "R32G32B32A32_FLOAT";
89 case PixelFormat::R32G32B32A32_SINT:
90 return "R32G32B32A32_SINT";
91 case PixelFormat::R32G32_FLOAT:
92 return "R32G32_FLOAT";
93 case PixelFormat::R32G32_SINT:
94 return "R32G32_SINT";
95 case PixelFormat::R32_FLOAT:
96 return "R32_FLOAT";
97 case PixelFormat::R16_FLOAT:
98 return "R16_FLOAT";
99 case PixelFormat::R16_UNORM:
100 return "R16_UNORM";
101 case PixelFormat::R16_SNORM:
102 return "R16_SNORM";
103 case PixelFormat::R16_UINT:
104 return "R16_UINT";
105 case PixelFormat::R16_SINT:
106 return "R16_SINT";
107 case PixelFormat::R16G16_UNORM:
108 return "R16G16_UNORM";
109 case PixelFormat::R16G16_FLOAT:
110 return "R16G16_FLOAT";
111 case PixelFormat::R16G16_UINT:
112 return "R16G16_UINT";
113 case PixelFormat::R16G16_SINT:
114 return "R16G16_SINT";
115 case PixelFormat::R16G16_SNORM:
116 return "R16G16_SNORM";
117 case PixelFormat::R32G32B32_FLOAT:
118 return "R32G32B32_FLOAT";
119 case PixelFormat::A8B8G8R8_SRGB:
120 return "A8B8G8R8_SRGB";
121 case PixelFormat::R8G8_UNORM:
122 return "R8G8_UNORM";
123 case PixelFormat::R8G8_SNORM:
124 return "R8G8_SNORM";
125 case PixelFormat::R8G8_SINT:
126 return "R8G8_SINT";
127 case PixelFormat::R8G8_UINT:
128 return "R8G8_UINT";
129 case PixelFormat::R32G32_UINT:
130 return "R32G32_UINT";
131 case PixelFormat::R16G16B16X16_FLOAT:
132 return "R16G16B16X16_FLOAT";
133 case PixelFormat::R32_UINT:
134 return "R32_UINT";
135 case PixelFormat::R32_SINT:
136 return "R32_SINT";
137 case PixelFormat::ASTC_2D_8X8_UNORM:
138 return "ASTC_2D_8X8_UNORM";
139 case PixelFormat::ASTC_2D_8X5_UNORM:
140 return "ASTC_2D_8X5_UNORM";
141 case PixelFormat::ASTC_2D_5X4_UNORM:
142 return "ASTC_2D_5X4_UNORM";
143 case PixelFormat::B8G8R8A8_SRGB:
144 return "B8G8R8A8_SRGB";
145 case PixelFormat::BC1_RGBA_SRGB:
146 return "BC1_RGBA_SRGB";
147 case PixelFormat::BC2_SRGB:
148 return "BC2_SRGB";
149 case PixelFormat::BC3_SRGB:
150 return "BC3_SRGB";
151 case PixelFormat::BC7_SRGB:
152 return "BC7_SRGB";
153 case PixelFormat::A4B4G4R4_UNORM:
154 return "A4B4G4R4_UNORM";
155 case PixelFormat::ASTC_2D_4X4_SRGB:
156 return "ASTC_2D_4X4_SRGB";
157 case PixelFormat::ASTC_2D_8X8_SRGB:
158 return "ASTC_2D_8X8_SRGB";
159 case PixelFormat::ASTC_2D_8X5_SRGB:
160 return "ASTC_2D_8X5_SRGB";
161 case PixelFormat::ASTC_2D_5X4_SRGB:
162 return "ASTC_2D_5X4_SRGB";
163 case PixelFormat::ASTC_2D_5X5_UNORM:
164 return "ASTC_2D_5X5_UNORM";
165 case PixelFormat::ASTC_2D_5X5_SRGB:
166 return "ASTC_2D_5X5_SRGB";
167 case PixelFormat::ASTC_2D_10X8_UNORM:
168 return "ASTC_2D_10X8_UNORM";
169 case PixelFormat::ASTC_2D_10X8_SRGB:
170 return "ASTC_2D_10X8_SRGB";
171 case PixelFormat::ASTC_2D_6X6_UNORM:
172 return "ASTC_2D_6X6_UNORM";
173 case PixelFormat::ASTC_2D_6X6_SRGB:
174 return "ASTC_2D_6X6_SRGB";
175 case PixelFormat::ASTC_2D_10X10_UNORM:
176 return "ASTC_2D_10X10_UNORM";
177 case PixelFormat::ASTC_2D_10X10_SRGB:
178 return "ASTC_2D_10X10_SRGB";
179 case PixelFormat::ASTC_2D_12X12_UNORM:
180 return "ASTC_2D_12X12_UNORM";
181 case PixelFormat::ASTC_2D_12X12_SRGB:
182 return "ASTC_2D_12X12_SRGB";
183 case PixelFormat::ASTC_2D_8X6_UNORM:
184 return "ASTC_2D_8X6_UNORM";
185 case PixelFormat::ASTC_2D_8X6_SRGB:
186 return "ASTC_2D_8X6_SRGB";
187 case PixelFormat::ASTC_2D_6X5_UNORM:
188 return "ASTC_2D_6X5_UNORM";
189 case PixelFormat::ASTC_2D_6X5_SRGB:
190 return "ASTC_2D_6X5_SRGB";
191 case PixelFormat::E5B9G9R9_FLOAT:
192 return "E5B9G9R9_FLOAT";
193 case PixelFormat::D32_FLOAT:
194 return "D32_FLOAT";
195 case PixelFormat::D16_UNORM:
196 return "D16_UNORM";
197 case PixelFormat::D24_UNORM_S8_UINT:
198 return "D24_UNORM_S8_UINT";
199 case PixelFormat::S8_UINT_D24_UNORM:
200 return "S8_UINT_D24_UNORM";
201 case PixelFormat::D32_FLOAT_S8_UINT:
202 return "D32_FLOAT_S8_UINT";
203 case PixelFormat::MaxDepthStencilFormat:
204 case PixelFormat::Invalid:
205 return "Invalid";
206 }
207 return "Invalid";
208 }();
209 return formatter<string_view>::format(name, ctx);
210 }
211};
212
213template <>
214struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> {
215 template <typename FormatContext>
216 auto format(VideoCommon::ImageType type, FormatContext& ctx) {
217 const string_view name = [type] {
218 using VideoCommon::ImageType;
219 switch (type) {
220 case ImageType::e1D:
221 return "1D";
222 case ImageType::e2D:
223 return "2D";
224 case ImageType::e3D:
225 return "3D";
226 case ImageType::Linear:
227 return "Linear";
228 case ImageType::Buffer:
229 return "Buffer";
230 }
231 return "Invalid";
232 }();
233 return formatter<string_view>::format(name, ctx);
234 }
235};
236
237template <>
238struct fmt::formatter<VideoCommon::Extent3D> {
239 constexpr auto parse(fmt::format_parse_context& ctx) {
240 return ctx.begin();
241 }
242
243 template <typename FormatContext>
244 auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) {
245 return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height,
246 extent.depth);
247 }
248};
249
250namespace VideoCommon {
251
252struct ImageBase;
253struct ImageViewBase;
254struct RenderTargets;
255
256[[nodiscard]] std::string Name(const ImageBase& image);
257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view,
259 std::optional<ImageViewType> type = std::nullopt);
260
261[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262
263} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
new file mode 100644
index 000000000..448a05fcc
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -0,0 +1,216 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/formatter.h"
13#include "video_core/texture_cache/image_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/util.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::DefaultBlockHeight;
20using VideoCore::Surface::DefaultBlockWidth;
21
22namespace {
23/// Returns the base layer and mip level offset
24[[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) {
25 if (layer_stride == 0) {
26 return {0, diff};
27 } else {
28 return {diff / layer_stride, diff % layer_stride};
29 }
30}
31
32[[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) {
33 return layers.base_level < info.resources.levels &&
34 layers.base_layer + layers.num_layers <= info.resources.layers;
35}
36
37[[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) {
38 const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level);
39 const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level);
40 if (!ValidateLayers(copy.src_subresource, src)) {
41 return false;
42 }
43 if (!ValidateLayers(copy.dst_subresource, dst)) {
44 return false;
45 }
46 if (copy.src_offset.x + copy.extent.width > src_size.width ||
47 copy.src_offset.y + copy.extent.height > src_size.height ||
48 copy.src_offset.z + copy.extent.depth > src_size.depth) {
49 return false;
50 }
51 if (copy.dst_offset.x + copy.extent.width > dst_size.width ||
52 copy.dst_offset.y + copy.extent.height > dst_size.height ||
53 copy.dst_offset.z + copy.extent.depth > dst_size.depth) {
54 return false;
55 }
56 return true;
57}
58} // Anonymous namespace
59
60ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_)
61 : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)},
62 unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)},
63 converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_},
64 cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes},
65 mip_level_offsets{CalculateMipLevelOffsets(info)} {
66 if (info.type == ImageType::e3D) {
67 slice_offsets = CalculateSliceOffsets(info);
68 slice_subresources = CalculateSliceSubresources(info);
69 }
70}
71
72std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
73 if (other_addr < gpu_addr) {
74 // Subresource address can't be lower than the base
75 return std::nullopt;
76 }
77 const u32 diff = static_cast<u32>(other_addr - gpu_addr);
78 if (diff > guest_size_bytes) {
79 // This can happen when two CPU addresses are used for different GPU addresses
80 return std::nullopt;
81 }
82 if (info.type != ImageType::e3D) {
83 const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride);
84 const auto end = mip_level_offsets.begin() + info.resources.levels;
85 const auto it = std::find(mip_level_offsets.begin(), end, mip_offset);
86 if (layer > info.resources.layers || it == end) {
87 return std::nullopt;
88 }
89 return SubresourceBase{
90 .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)),
91 .layer = layer,
92 };
93 } else {
94 // TODO: Consider using binary_search after a threshold
95 const auto it = std::ranges::find(slice_offsets, diff);
96 if (it == slice_offsets.cend()) {
97 return std::nullopt;
98 }
99 return slice_subresources[std::distance(slice_offsets.begin(), it)];
100 }
101}
102
103ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept {
104 const auto it = std::ranges::find(image_view_infos, view_info);
105 if (it == image_view_infos.end()) {
106 return ImageViewId{};
107 }
108 return image_view_ids[std::distance(image_view_infos.begin(), it)];
109}
110
111void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) {
112 image_view_infos.push_back(view_info);
113 image_view_ids.push_back(image_view_id);
114}
115
116void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
117 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
118 ASSERT(lhs.info.type == rhs.info.type);
119 std::optional<SubresourceBase> base;
120 if (lhs.info.type == ImageType::Linear) {
121 base = SubresourceBase{.level = 0, .layer = 0};
122 } else {
123 base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS);
124 }
125 if (!base) {
126 LOG_ERROR(HW_GPU, "Image alias should have been flipped");
127 return;
128 }
129 const PixelFormat lhs_format = lhs.info.format;
130 const PixelFormat rhs_format = rhs.info.format;
131 const Extent2D lhs_block{
132 .width = DefaultBlockWidth(lhs_format),
133 .height = DefaultBlockHeight(lhs_format),
134 };
135 const Extent2D rhs_block{
136 .width = DefaultBlockWidth(rhs_format),
137 .height = DefaultBlockHeight(rhs_format),
138 };
139 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
140 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
141 if (is_lhs_compressed && is_rhs_compressed) {
142 LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
143 return;
144 }
145 const s32 lhs_mips = lhs.info.resources.levels;
146 const s32 rhs_mips = rhs.info.resources.levels;
147 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
148 AliasedImage lhs_alias;
149 AliasedImage rhs_alias;
150 lhs_alias.id = rhs_id;
151 rhs_alias.id = lhs_id;
152 lhs_alias.copies.reserve(num_mips);
153 rhs_alias.copies.reserve(num_mips);
154 for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) {
155 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
156 Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
157 if (is_lhs_compressed) {
158 lhs_size.width /= lhs_block.width;
159 lhs_size.height /= lhs_block.height;
160 }
161 if (is_rhs_compressed) {
162 rhs_size.width /= rhs_block.width;
163 rhs_size.height /= rhs_block.height;
164 }
165 const Extent3D copy_size{
166 .width = std::min(lhs_size.width, rhs_size.width),
167 .height = std::min(lhs_size.height, rhs_size.height),
168 .depth = std::min(lhs_size.depth, rhs_size.depth),
169 };
170 if (copy_size.width == 0 || copy_size.height == 0) {
171 LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased.");
172 continue;
173 }
174 const bool is_lhs_3d = lhs.info.type == ImageType::e3D;
175 const bool is_rhs_3d = rhs.info.type == ImageType::e3D;
176 const Offset3D lhs_offset{0, 0, 0};
177 const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0};
178 const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer;
179 const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers;
180 const s32 num_layers = std::min(lhs_layers, rhs_layers);
181 const SubresourceLayers lhs_subresource{
182 .base_level = mip_level,
183 .base_layer = 0,
184 .num_layers = num_layers,
185 };
186 const SubresourceLayers rhs_subresource{
187 .base_level = base->level + mip_level,
188 .base_layer = is_rhs_3d ? 0 : base->layer,
189 .num_layers = num_layers,
190 };
191 [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{
192 .src_subresource = lhs_subresource,
193 .dst_subresource = rhs_subresource,
194 .src_offset = lhs_offset,
195 .dst_offset = rhs_offset,
196 .extent = copy_size,
197 });
198 [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{
199 .src_subresource = rhs_subresource,
200 .dst_subresource = lhs_subresource,
201 .src_offset = rhs_offset,
202 .dst_offset = lhs_offset,
203 .extent = copy_size,
204 });
205 ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy");
206 ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy");
207 }
208 ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
209 if (lhs_alias.copies.empty()) {
210 return;
211 }
212 lhs.aliased_images.push_back(std::move(lhs_alias));
213 rhs.aliased_images.push_back(std::move(rhs_alias));
214}
215
216} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
new file mode 100644
index 000000000..b7f3b7e43
--- /dev/null
+++ b/src/video_core/texture_cache/image_base.h
@@ -0,0 +1,83 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <optional>
9#include <vector>
10
11#include "common/common_funcs.h"
12#include "common/common_types.h"
13#include "video_core/texture_cache/image_info.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19enum class ImageFlagBits : u32 {
20 AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
21 Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted
22 CpuModified = 1 << 2, ///< Contents have been modified from the CPU
23 GpuModified = 1 << 3, ///< Contents have been modified from the GPU
24 Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30
31struct ImageViewInfo;
32
33struct AliasedImage {
34 std::vector<ImageCopy> copies;
35 ImageId id;
36};
37
38struct ImageBase {
39 explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
40
41 [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept;
42
43 [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept;
44
45 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
46
47 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
48 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
49 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
50 }
51
52 ImageInfo info;
53
54 u32 guest_size_bytes = 0;
55 u32 unswizzled_size_bytes = 0;
56 u32 converted_size_bytes = 0;
57 ImageFlagBits flags = ImageFlagBits::CpuModified;
58
59 GPUVAddr gpu_addr = 0;
60 VAddr cpu_addr = 0;
61 VAddr cpu_addr_end = 0;
62
63 u64 modification_tick = 0;
64 u64 frame_tick = 0;
65
66 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
67
68 std::vector<ImageViewInfo> image_view_infos;
69 std::vector<ImageViewId> image_view_ids;
70
71 std::vector<u32> slice_offsets;
72 std::vector<SubresourceBase> slice_subresources;
73
74 std::vector<AliasedImage> aliased_images;
75};
76
77struct ImageAllocBase {
78 std::vector<ImageId> images;
79};
80
81void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
82
83} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp
new file mode 100644
index 000000000..64fd7010a
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.cpp
@@ -0,0 +1,189 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/surface.h"
7#include "video_core/texture_cache/format_lookup_table.h"
8#include "video_core/texture_cache/image_info.h"
9#include "video_core/texture_cache/samples_helper.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/texture_cache/util.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::TextureType;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20ImageInfo::ImageInfo(const TICEntry& config) noexcept {
21 format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
22 config.a_type, config.srgb_conversion);
23 num_samples = NumSamples(config.msaa_mode);
24 resources.levels = config.max_mip_level + 1;
25 if (config.IsPitchLinear()) {
26 pitch = config.Pitch();
27 } else if (config.IsBlockLinear()) {
28 block = Extent3D{
29 .width = config.block_width,
30 .height = config.block_height,
31 .depth = config.block_depth,
32 };
33 }
34 tile_width_spacing = config.tile_width_spacing;
35 if (config.texture_type != TextureType::Texture2D &&
36 config.texture_type != TextureType::Texture2DNoMipmap) {
37 ASSERT(!config.IsPitchLinear());
38 }
39 switch (config.texture_type) {
40 case TextureType::Texture1D:
41 ASSERT(config.BaseLayer() == 0);
42 type = ImageType::e1D;
43 size.width = config.Width();
44 break;
45 case TextureType::Texture1DArray:
46 UNIMPLEMENTED_IF(config.BaseLayer() != 0);
47 type = ImageType::e1D;
48 size.width = config.Width();
49 resources.layers = config.Depth();
50 break;
51 case TextureType::Texture2D:
52 case TextureType::Texture2DNoMipmap:
53 ASSERT(config.Depth() == 1);
54 type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D;
55 size.width = config.Width();
56 size.height = config.Height();
57 resources.layers = config.BaseLayer() + 1;
58 break;
59 case TextureType::Texture2DArray:
60 type = ImageType::e2D;
61 size.width = config.Width();
62 size.height = config.Height();
63 resources.layers = config.BaseLayer() + config.Depth();
64 break;
65 case TextureType::TextureCubemap:
66 ASSERT(config.Depth() == 1);
67 type = ImageType::e2D;
68 size.width = config.Width();
69 size.height = config.Height();
70 resources.layers = config.BaseLayer() + 6;
71 break;
72 case TextureType::TextureCubeArray:
73 UNIMPLEMENTED_IF(config.load_store_hint != 0);
74 type = ImageType::e2D;
75 size.width = config.Width();
76 size.height = config.Height();
77 resources.layers = config.BaseLayer() + config.Depth() * 6;
78 break;
79 case TextureType::Texture3D:
80 ASSERT(config.BaseLayer() == 0);
81 type = ImageType::e3D;
82 size.width = config.Width();
83 size.height = config.Height();
84 size.depth = config.Depth();
85 break;
86 case TextureType::Texture1DBuffer:
87 type = ImageType::Buffer;
88 size.width = config.Width();
89 break;
90 default:
91 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
92 break;
93 }
94 if (type != ImageType::Linear) {
95 // FIXME: Call this without passing *this
96 layer_stride = CalculateLayerStride(*this);
97 maybe_unaligned_layer_stride = CalculateLayerSize(*this);
98 }
99}
100
101ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept {
102 const auto& rt = regs.rt[index];
103 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format);
104 if (rt.tile_mode.is_pitch_linear) {
105 ASSERT(rt.tile_mode.is_3d == 0);
106 type = ImageType::Linear;
107 pitch = rt.width;
108 size = Extent3D{
109 .width = pitch / BytesPerBlock(format),
110 .height = rt.height,
111 .depth = 1,
112 };
113 return;
114 }
115 size.width = rt.width;
116 size.height = rt.height;
117 layer_stride = rt.layer_stride * 4;
118 maybe_unaligned_layer_stride = layer_stride;
119 num_samples = NumSamples(regs.multisample_mode);
120 block = Extent3D{
121 .width = rt.tile_mode.block_width,
122 .height = rt.tile_mode.block_height,
123 .depth = rt.tile_mode.block_depth,
124 };
125 if (rt.tile_mode.is_3d) {
126 type = ImageType::e3D;
127 size.depth = rt.depth;
128 } else {
129 type = ImageType::e2D;
130 resources.layers = rt.depth;
131 }
132}
133
134ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
135 format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format);
136 size.width = regs.zeta_width;
137 size.height = regs.zeta_height;
138 resources.levels = 1;
139 layer_stride = regs.zeta.layer_stride * 4;
140 maybe_unaligned_layer_stride = layer_stride;
141 num_samples = NumSamples(regs.multisample_mode);
142 block = Extent3D{
143 .width = regs.zeta.tile_mode.block_width,
144 .height = regs.zeta.tile_mode.block_height,
145 .depth = regs.zeta.tile_mode.block_depth,
146 };
147 if (regs.zeta.tile_mode.is_pitch_linear) {
148 ASSERT(regs.zeta.tile_mode.is_3d == 0);
149 type = ImageType::Linear;
150 pitch = size.width * BytesPerBlock(format);
151 } else if (regs.zeta.tile_mode.is_3d) {
152 ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0);
153 type = ImageType::e3D;
154 size.depth = regs.zeta_depth;
155 } else {
156 type = ImageType::e2D;
157 resources.layers = regs.zeta_depth;
158 }
159}
160
161ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
162 UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero");
163 format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format);
164 if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) {
165 type = ImageType::Linear;
166 size = Extent3D{
167 .width = config.pitch / VideoCore::Surface::BytesPerBlock(format),
168 .height = config.height,
169 .depth = 1,
170 };
171 pitch = config.pitch;
172 } else {
173 type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D;
174 block = Extent3D{
175 .width = config.block_width,
176 .height = config.block_height,
177 .depth = config.block_depth,
178 };
179 // 3D blits with more than once slice are not implemented for now
180 // Render to individual slices
181 size = Extent3D{
182 .width = config.width,
183 .height = config.height,
184 .depth = 1,
185 };
186 }
187}
188
189} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h
new file mode 100644
index 000000000..5049fc36e
--- /dev/null
+++ b/src/video_core/texture_cache/image_info.h
@@ -0,0 +1,38 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/surface.h"
10#include "video_core/texture_cache/types.h"
11
12namespace VideoCommon {
13
14using Tegra::Texture::TICEntry;
15using VideoCore::Surface::PixelFormat;
16
17struct ImageInfo {
18 explicit ImageInfo() = default;
19 explicit ImageInfo(const TICEntry& config) noexcept;
20 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept;
21 explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept;
22 explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept;
23
24 PixelFormat format = PixelFormat::Invalid;
25 ImageType type = ImageType::e1D;
26 SubresourceExtent resources;
27 Extent3D size{1, 1, 1};
28 union {
29 Extent3D block{0, 0, 0};
30 u32 pitch;
31 };
32 u32 layer_stride = 0;
33 u32 maybe_unaligned_layer_stride = 0;
34 u32 num_samples = 1;
35 u32 tile_width_spacing = 0;
36};
37
38} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp
new file mode 100644
index 000000000..076a4bcfd
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.cpp
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "common/assert.h"
8#include "core/settings.h"
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/formatter.h"
12#include "video_core/texture_cache/image_info.h"
13#include "video_core/texture_cache/image_view_base.h"
14#include "video_core/texture_cache/image_view_info.h"
15#include "video_core/texture_cache/types.h"
16
17namespace VideoCommon {
18
19ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
20 ImageId image_id_)
21 : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range},
22 size{
23 .width = std::max(image_info.size.width >> range.base.level, 1u),
24 .height = std::max(image_info.size.height >> range.base.level, 1u),
25 .depth = std::max(image_info.size.depth >> range.base.level, 1u),
26 } {
27 ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format),
28 "Image view format {} is incompatible with image format {}", info.format,
29 image_info.format);
30 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
31 if (image_info.type == ImageType::Linear && is_async) {
32 flags |= ImageViewFlagBits::PreemtiveDownload;
33 }
34 if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) {
35 flags |= ImageViewFlagBits::Slice;
36 }
37}
38
39ImageViewBase::ImageViewBase(const NullImageParams&) {}
40
41} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h
new file mode 100644
index 000000000..73954167e
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_base.h
@@ -0,0 +1,47 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "video_core/surface.h"
9#include "video_core/texture_cache/types.h"
10
11namespace VideoCommon {
12
13using VideoCore::Surface::PixelFormat;
14
15struct ImageViewInfo;
16struct ImageInfo;
17
18struct NullImageParams {};
19
20enum class ImageViewFlagBits : u16 {
21 PreemtiveDownload = 1 << 0,
22 Strong = 1 << 1,
23 Slice = 1 << 2,
24};
25DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits)
26
27struct ImageViewBase {
28 explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info,
29 ImageId image_id);
30 explicit ImageViewBase(const NullImageParams&);
31
32 [[nodiscard]] bool IsBuffer() const noexcept {
33 return type == ImageViewType::Buffer;
34 }
35
36 ImageId image_id{};
37 PixelFormat format{};
38 ImageViewType type{};
39 SubresourceRange range;
40 Extent3D size{0, 0, 0};
41 ImageViewFlagBits flags{};
42
43 u64 invalidation_tick = 0;
44 u64 modification_tick = 0;
45};
46
47} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
new file mode 100644
index 000000000..faf5b151f
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -0,0 +1,88 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/assert.h"
8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache.h"
10#include "video_core/texture_cache/types.h"
11#include "video_core/textures/texture.h"
12
13namespace VideoCommon {
14
15namespace {
16
17constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
18
19[[nodiscard]] u8 CastSwizzle(SwizzleSource source) {
20 const u8 casted = static_cast<u8>(source);
21 ASSERT(static_cast<SwizzleSource>(casted) == source);
22 return casted;
23}
24
25} // Anonymous namespace
26
27ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept
28 : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)},
29 y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)},
30 w_source{CastSwizzle(config.w_source)} {
31 range.base = SubresourceBase{
32 .level = static_cast<s32>(config.res_min_mip_level),
33 .layer = base_layer,
34 };
35 range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1;
36
37 switch (config.texture_type) {
38 case TextureType::Texture1D:
39 ASSERT(config.Height() == 1);
40 ASSERT(config.Depth() == 1);
41 type = ImageViewType::e1D;
42 break;
43 case TextureType::Texture2D:
44 case TextureType::Texture2DNoMipmap:
45 ASSERT(config.Depth() == 1);
46 type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect;
47 break;
48 case TextureType::Texture3D:
49 type = ImageViewType::e3D;
50 break;
51 case TextureType::TextureCubemap:
52 ASSERT(config.Depth() == 1);
53 type = ImageViewType::Cube;
54 range.extent.layers = 6;
55 break;
56 case TextureType::Texture1DArray:
57 type = ImageViewType::e1DArray;
58 range.extent.layers = config.Depth();
59 break;
60 case TextureType::Texture2DArray:
61 type = ImageViewType::e2DArray;
62 range.extent.layers = config.Depth();
63 break;
64 case TextureType::Texture1DBuffer:
65 type = ImageViewType::Buffer;
66 break;
67 case TextureType::TextureCubeArray:
68 type = ImageViewType::CubeArray;
69 range.extent.layers = config.Depth() * 6;
70 break;
71 default:
72 UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value()));
73 break;
74 }
75}
76
77ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_,
78 SubresourceRange range_) noexcept
79 : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE},
80 y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE},
81 w_source{RENDER_TARGET_SWIZZLE} {}
82
83bool ImageViewInfo::IsRenderTarget() const noexcept {
84 return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE &&
85 z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE;
86}
87
88} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h
new file mode 100644
index 000000000..0c1f99117
--- /dev/null
+++ b/src/video_core/texture_cache/image_view_info.h
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9
10#include "video_core/surface.h"
11#include "video_core/texture_cache/types.h"
12#include "video_core/textures/texture.h"
13
14namespace VideoCommon {
15
16using Tegra::Texture::SwizzleSource;
17using Tegra::Texture::TICEntry;
18using VideoCore::Surface::PixelFormat;
19
20/// Properties used to determine a image view
21struct ImageViewInfo {
22 explicit ImageViewInfo() noexcept = default;
23 explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept;
24 explicit ImageViewInfo(ImageViewType type, PixelFormat format,
25 SubresourceRange range = {}) noexcept;
26
27 auto operator<=>(const ImageViewInfo&) const noexcept = default;
28
29 [[nodiscard]] bool IsRenderTarget() const noexcept;
30
31 [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept {
32 return std::array{
33 static_cast<SwizzleSource>(x_source),
34 static_cast<SwizzleSource>(y_source),
35 static_cast<SwizzleSource>(z_source),
36 static_cast<SwizzleSource>(w_source),
37 };
38 }
39
40 ImageViewType type{};
41 PixelFormat format{};
42 SubresourceRange range;
43 u8 x_source = static_cast<u8>(SwizzleSource::R);
44 u8 y_source = static_cast<u8>(SwizzleSource::G);
45 u8 z_source = static_cast<u8>(SwizzleSource::B);
46 u8 w_source = static_cast<u8>(SwizzleSource::A);
47};
48static_assert(std::has_unique_object_representations_v<ImageViewInfo>);
49
50} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
new file mode 100644
index 000000000..9b9544b07
--- /dev/null
+++ b/src/video_core/texture_cache/render_targets.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <span>
9#include <utility>
10
11#include "common/bit_cast.h"
12#include "video_core/texture_cache/types.h"
13
14namespace VideoCommon {
15
16/// Framebuffer properties used to lookup a framebuffer
17struct RenderTargets {
18 constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
19
20 constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept {
21 const auto contains = [elements](ImageViewId item) {
22 return std::ranges::find(elements, item) != elements.end();
23 };
24 return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id);
25 }
26
27 std::array<ImageViewId, NUM_RT> color_buffer_ids;
28 ImageViewId depth_buffer_id;
29 std::array<u8, NUM_RT> draw_buffers{};
30 Extent2D size;
31};
32
33} // namespace VideoCommon
34
35namespace std {
36
37template <>
38struct hash<VideoCommon::RenderTargets> {
39 size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept {
40 using VideoCommon::ImageViewId;
41 size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id);
42 for (const ImageViewId color_buffer_id : rt.color_buffer_ids) {
43 value ^= std::hash<ImageViewId>{}(color_buffer_id);
44 }
45 value ^= Common::BitCast<u64>(rt.draw_buffers);
46 value ^= Common::BitCast<u64>(rt.size);
47 return value;
48 }
49};
50
51} // namespace std
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h
new file mode 100644
index 000000000..04539a43c
--- /dev/null
+++ b/src/video_core/texture_cache/samples_helper.h
@@ -0,0 +1,55 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/assert.h"
10#include "video_core/textures/texture.h"
11
12namespace VideoCommon {
13
14[[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) {
15 switch (num_samples) {
16 case 1:
17 return {0, 0};
18 case 2:
19 return {1, 0};
20 case 4:
21 return {1, 1};
22 case 8:
23 return {2, 1};
24 case 16:
25 return {2, 2};
26 }
27 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
28 return {1, 1};
29}
30
31[[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) {
32 using Tegra::Texture::MsaaMode;
33 switch (msaa_mode) {
34 case MsaaMode::Msaa1x1:
35 return 1;
36 case MsaaMode::Msaa2x1:
37 case MsaaMode::Msaa2x1_D3D:
38 return 2;
39 case MsaaMode::Msaa2x2:
40 case MsaaMode::Msaa2x2_VC4:
41 case MsaaMode::Msaa2x2_VC12:
42 return 4;
43 case MsaaMode::Msaa4x2:
44 case MsaaMode::Msaa4x2_D3D:
45 case MsaaMode::Msaa4x2_VC8:
46 case MsaaMode::Msaa4x2_VC24:
47 return 8;
48 case MsaaMode::Msaa4x4:
49 return 16;
50 }
51 UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode));
52 return 1;
53}
54
55} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
new file mode 100644
index 000000000..eae3be6ea
--- /dev/null
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -0,0 +1,156 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <concepts>
9#include <numeric>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16
17namespace VideoCommon {
18
19struct SlotId {
20 static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
21
22 constexpr auto operator<=>(const SlotId&) const noexcept = default;
23
24 constexpr explicit operator bool() const noexcept {
25 return index != INVALID_INDEX;
26 }
27
28 u32 index = INVALID_INDEX;
29};
30
31template <class T>
32requires std::is_nothrow_move_assignable_v<T>&&
33 std::is_nothrow_move_constructible_v<T> class SlotVector {
34public:
35 ~SlotVector() noexcept {
36 size_t index = 0;
37 for (u64 bits : stored_bitset) {
38 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
39 if ((bits & 1) != 0) {
40 values[index + bit].object.~T();
41 }
42 }
43 index += 64;
44 }
45 delete[] values;
46 }
47
48 [[nodiscard]] T& operator[](SlotId id) noexcept {
49 ValidateIndex(id);
50 return values[id.index].object;
51 }
52
53 [[nodiscard]] const T& operator[](SlotId id) const noexcept {
54 ValidateIndex(id);
55 return values[id.index].object;
56 }
57
58 template <typename... Args>
59 [[nodiscard]] SlotId insert(Args&&... args) noexcept {
60 const u32 index = FreeValueIndex();
61 new (&values[index].object) T(std::forward<Args>(args)...);
62 SetStorageBit(index);
63
64 return SlotId{index};
65 }
66
67 void erase(SlotId id) noexcept {
68 values[id.index].object.~T();
69 free_list.push_back(id.index);
70 ResetStorageBit(id.index);
71 }
72
73private:
74 struct NonTrivialDummy {
75 NonTrivialDummy() noexcept {}
76 };
77
78 union Entry {
79 Entry() noexcept : dummy{} {}
80 ~Entry() noexcept {}
81
82 NonTrivialDummy dummy;
83 T object;
84 };
85
86 void SetStorageBit(u32 index) noexcept {
87 stored_bitset[index / 64] |= u64(1) << (index % 64);
88 }
89
90 void ResetStorageBit(u32 index) noexcept {
91 stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
92 }
93
94 bool ReadStorageBit(u32 index) noexcept {
95 return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
96 }
97
98 void ValidateIndex(SlotId id) const noexcept {
99 DEBUG_ASSERT(id);
100 DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
101 DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
102 }
103
104 [[nodiscard]] u32 FreeValueIndex() noexcept {
105 if (free_list.empty()) {
106 Reserve(values_capacity ? (values_capacity << 1) : 1);
107 }
108 const u32 free_index = free_list.back();
109 free_list.pop_back();
110 return free_index;
111 }
112
113 void Reserve(size_t new_capacity) noexcept {
114 Entry* const new_values = new Entry[new_capacity];
115 size_t index = 0;
116 for (u64 bits : stored_bitset) {
117 for (size_t bit = 0; bits; ++bit, bits >>= 1) {
118 const size_t i = index + bit;
119 if ((bits & 1) == 0) {
120 continue;
121 }
122 T& old_value = values[i].object;
123 new (&new_values[i].object) T(std::move(old_value));
124 old_value.~T();
125 }
126 index += 64;
127 }
128
129 stored_bitset.resize((new_capacity + 63) / 64);
130
131 const size_t old_free_size = free_list.size();
132 free_list.resize(old_free_size + (new_capacity - values_capacity));
133 std::iota(free_list.begin() + old_free_size, free_list.end(),
134 static_cast<u32>(values_capacity));
135
136 delete[] values;
137 values = new_values;
138 values_capacity = new_capacity;
139 }
140
141 Entry* values = nullptr;
142 size_t values_capacity = 0;
143 size_t values_size = 0;
144
145 std::vector<u64> stored_bitset;
146 std::vector<u32> free_list;
147};
148
149} // namespace VideoCommon
150
151template <>
152struct std::hash<VideoCommon::SlotId> {
153 size_t operator()(const VideoCommon::SlotId& id) const noexcept {
154 return std::hash<u32>{}(id.index);
155 }
156};
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
deleted file mode 100644
index efbcf6723..000000000
--- a/src/video_core/texture_cache/surface_base.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/algorithm.h"
6#include "common/assert.h"
7#include "common/common_types.h"
8#include "common/microprofile.h"
9#include "video_core/memory_manager.h"
10#include "video_core/texture_cache/surface_base.h"
11#include "video_core/texture_cache/surface_params.h"
12#include "video_core/textures/convert.h"
13
14namespace VideoCommon {
15
16MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
17MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
18
19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
23
24StagingCache::StagingCache() = default;
25
26StagingCache::~StagingCache() = default;
27
28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
29 bool is_astc_supported_)
30 : params{params_}, gpu_addr{gpu_addr_}, mipmap_sizes(params_.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported_;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
35 std::size_t offset = 0;
36 for (u32 level = 0; level < params.num_levels; ++level) {
37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
38 mipmap_sizes[level] = mipmap_size;
39 mipmap_offsets[level] = offset;
40 offset += mipmap_size;
41 }
42 layer_size = offset;
43 if (params.is_layered) {
44 if (params.is_tiled) {
45 layer_size =
46 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
47 }
48 guest_memory_size = layer_size * params.depth;
49 } else {
50 guest_memory_size = layer_size;
51 }
52}
53
54MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
55 const u32 src_bpp{params.GetBytesPerPixel()};
56 const u32 dst_bpp{rhs.GetBytesPerPixel()};
57 const bool ib1 = params.IsBuffer();
58 const bool ib2 = rhs.IsBuffer();
59 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
60 const bool cb1 = params.IsCompressed();
61 const bool cb2 = rhs.IsCompressed();
62 if (cb1 == cb2) {
63 return MatchTopologyResult::FullMatch;
64 }
65 return MatchTopologyResult::CompressUnmatch;
66 }
67 return MatchTopologyResult::None;
68}
69
70MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
71 // Buffer surface Check
72 if (params.IsBuffer()) {
73 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
74 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
75 if (wd1 == wd2) {
76 return MatchStructureResult::FullMatch;
77 }
78 return MatchStructureResult::None;
79 }
80
81 // Linear Surface check
82 if (!params.is_tiled) {
83 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
84 if (params.width == rhs.width) {
85 return MatchStructureResult::FullMatch;
86 } else {
87 return MatchStructureResult::SemiMatch;
88 }
89 }
90 return MatchStructureResult::None;
91 }
92
93 // Tiled Surface check
94 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
95 params.tile_width_spacing, params.num_levels) ==
96 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
97 rhs.tile_width_spacing, rhs.num_levels)) {
98 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
99 return MatchStructureResult::FullMatch;
100 }
101 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
102 rhs.pixel_format);
103 const u32 hs =
104 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
105 const u32 w1 = params.GetBlockAlignedWidth();
106 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
107 return MatchStructureResult::SemiMatch;
108 }
109 }
110 return MatchStructureResult::None;
111}
112
113std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
114 const GPUVAddr candidate_gpu_addr) const {
115 if (gpu_addr == candidate_gpu_addr) {
116 return {{0, 0}};
117 }
118
119 if (candidate_gpu_addr < gpu_addr) {
120 return std::nullopt;
121 }
122
123 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
124 const auto layer{static_cast<u32>(relative_address / layer_size)};
125 if (layer >= params.depth) {
126 return std::nullopt;
127 }
128
129 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
130 const auto mipmap_it =
131 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
132 if (mipmap_it == mipmap_offsets.end()) {
133 return std::nullopt;
134 }
135
136 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
137 return std::make_pair(layer, level);
138}
139
140std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
141 const u32 layers{params.depth};
142 const u32 mipmaps{params.num_levels};
143 std::vector<CopyParams> result;
144 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
145
146 for (u32 layer = 0; layer < layers; layer++) {
147 for (u32 level = 0; level < mipmaps; level++) {
148 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
149 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
150 result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
151 }
152 }
153 return result;
154}
155
156std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
157 const u32 mipmaps{params.num_levels};
158 std::vector<CopyParams> result;
159 result.reserve(mipmaps);
160
161 for (u32 level = 0; level < mipmaps; level++) {
162 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
163 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
164 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
165 result.emplace_back(width, height, depth, level);
166 }
167 return result;
168}
169
170void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory,
171 const SurfaceParams& surface_params, u8* buffer, u32 level) {
172 const u32 width{surface_params.GetMipWidth(level)};
173 const u32 height{surface_params.GetMipHeight(level)};
174 const u32 block_height{surface_params.GetMipBlockHeight(level)};
175 const u32 block_depth{surface_params.GetMipBlockDepth(level)};
176
177 std::size_t guest_offset{mipmap_offsets[level]};
178 if (surface_params.is_layered) {
179 std::size_t host_offset = 0;
180 const std::size_t guest_stride = layer_size;
181 const std::size_t host_stride = surface_params.GetHostLayerSize(level);
182 for (u32 layer = 0; layer < surface_params.depth; ++layer) {
183 MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height,
184 block_depth, 1, surface_params.tile_width_spacing, buffer + host_offset,
185 memory + guest_offset);
186 guest_offset += guest_stride;
187 host_offset += host_stride;
188 }
189 } else {
190 MortonSwizzle(mode, surface_params.pixel_format, width, block_height, height, block_depth,
191 surface_params.GetMipDepth(level), surface_params.tile_width_spacing, buffer,
192 memory + guest_offset);
193 }
194}
195
196void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
197 StagingCache& staging_cache) {
198 MICROPROFILE_SCOPE(GPU_Load_Texture);
199 auto& staging_buffer = staging_cache.GetBuffer(0);
200 u8* host_ptr;
201 // Use an extra temporal buffer
202 auto& tmp_buffer = staging_cache.GetBuffer(1);
203 tmp_buffer.resize(guest_memory_size);
204 host_ptr = tmp_buffer.data();
205 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
206
207 if (params.is_tiled) {
208 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
209 params.block_width, static_cast<u32>(params.target));
210 for (u32 level = 0; level < params.num_levels; ++level) {
211 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
212 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
213 staging_buffer.data() + host_offset, level);
214 }
215 } else {
216 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
217 const u32 bpp{params.GetBytesPerPixel()};
218 const u32 block_width{params.GetDefaultBlockWidth()};
219 const u32 block_height{params.GetDefaultBlockHeight()};
220 const u32 width{(params.width + block_width - 1) / block_width};
221 const u32 height{(params.height + block_height - 1) / block_height};
222 const u32 copy_size{width * bpp};
223 if (params.pitch == copy_size) {
224 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
225 } else {
226 const u8* start{host_ptr};
227 u8* write_to{staging_buffer.data()};
228 for (u32 h = height; h > 0; --h) {
229 std::memcpy(write_to, start, copy_size);
230 start += params.pitch;
231 write_to += copy_size;
232 }
233 }
234 }
235
236 if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) {
237 return;
238 }
239
240 for (u32 level = params.num_levels; level--;) {
241 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
242 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
243 u8* const in_buffer = staging_buffer.data() + in_host_offset;
244 u8* const out_buffer = staging_buffer.data() + out_host_offset;
245 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
246 params.GetMipWidth(level), params.GetMipHeight(level),
247 params.GetMipDepth(level), true, true);
248 }
249}
250
251void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
252 StagingCache& staging_cache) {
253 MICROPROFILE_SCOPE(GPU_Flush_Texture);
254 auto& staging_buffer = staging_cache.GetBuffer(0);
255 u8* host_ptr;
256
257 // Use an extra temporal buffer
258 auto& tmp_buffer = staging_cache.GetBuffer(1);
259 tmp_buffer.resize(guest_memory_size);
260 host_ptr = tmp_buffer.data();
261
262 if (params.target == SurfaceTarget::Texture3D) {
263 // Special case for 3D texture segments
264 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
265 }
266
267 if (params.is_tiled) {
268 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
269 for (u32 level = 0; level < params.num_levels; ++level) {
270 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
271 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
272 staging_buffer.data() + host_offset, level);
273 }
274 } else if (params.IsBuffer()) {
275 // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
276 // memory.
277 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
278 } else {
279 ASSERT(params.target == SurfaceTarget::Texture2D);
280 ASSERT(params.num_levels == 1);
281
282 const u32 bpp{params.GetBytesPerPixel()};
283 const u32 copy_size{params.width * bpp};
284 if (params.pitch == copy_size) {
285 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
286 } else {
287 u8* start{host_ptr};
288 const u8* read_to{staging_buffer.data()};
289 for (u32 h = params.height; h > 0; --h) {
290 std::memcpy(start, read_to, copy_size);
291 start += params.pitch;
292 read_to += copy_size;
293 }
294 }
295 }
296 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
297}
298
299} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
deleted file mode 100644
index b57135fe4..000000000
--- a/src/video_core/texture_cache/surface_base.h
+++ /dev/null
@@ -1,333 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <tuple>
9#include <unordered_map>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/gpu.h"
14#include "video_core/morton.h"
15#include "video_core/texture_cache/copy_params.h"
16#include "video_core/texture_cache/surface_params.h"
17#include "video_core/texture_cache/surface_view.h"
18
19namespace Tegra {
20class MemoryManager;
21}
22
23namespace VideoCommon {
24
25using VideoCore::MortonSwizzleMode;
26using VideoCore::Surface::SurfaceTarget;
27
28enum class MatchStructureResult : u32 {
29 FullMatch = 0,
30 SemiMatch = 1,
31 None = 2,
32};
33
34enum class MatchTopologyResult : u32 {
35 FullMatch = 0,
36 CompressUnmatch = 1,
37 None = 2,
38};
39
40class StagingCache {
41public:
42 explicit StagingCache();
43 ~StagingCache();
44
45 std::vector<u8>& GetBuffer(std::size_t index) {
46 return staging_buffer[index];
47 }
48
49 const std::vector<u8>& GetBuffer(std::size_t index) const {
50 return staging_buffer[index];
51 }
52
53 void SetSize(std::size_t size) {
54 staging_buffer.resize(size);
55 }
56
57private:
58 std::vector<std::vector<u8>> staging_buffer;
59};
60
61class SurfaceBaseImpl {
62public:
63 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
64
65 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
66
67 GPUVAddr GetGpuAddr() const {
68 return gpu_addr;
69 }
70
71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cpu_addr < end) && (cpu_addr_end > start);
73 }
74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const {
76 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
77 return gpu_addr <= other_start && other_end <= gpu_addr_end;
78 }
79
80 // Use only when recycling a surface
81 void SetGpuAddr(const GPUVAddr new_addr) {
82 gpu_addr = new_addr;
83 }
84
85 VAddr GetCpuAddr() const {
86 return cpu_addr;
87 }
88
89 VAddr GetCpuAddrEnd() const {
90 return cpu_addr_end;
91 }
92
93 void SetCpuAddr(const VAddr new_addr) {
94 cpu_addr = new_addr;
95 cpu_addr_end = new_addr + guest_memory_size;
96 }
97
98 const SurfaceParams& GetSurfaceParams() const {
99 return params;
100 }
101
102 std::size_t GetSizeInBytes() const {
103 return guest_memory_size;
104 }
105
106 std::size_t GetHostSizeInBytes() const {
107 return host_memory_size;
108 }
109
110 std::size_t GetMipmapSize(const u32 level) const {
111 return mipmap_sizes[level];
112 }
113
114 bool IsLinear() const {
115 return !params.is_tiled;
116 }
117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
123 return params.pixel_format == pixel_format;
124 }
125
126 VideoCore::Surface::PixelFormat GetFormat() const {
127 return params.pixel_format;
128 }
129
130 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
131 return params.target == target;
132 }
133
134 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
135
136 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
137
138 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
139 return std::tie(gpu_addr, params.target, params.num_levels) ==
140 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
141 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
142 }
143
144 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
145
146 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
147 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
148 }
149
150protected:
151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr_, const SurfaceParams& params_,
152 bool is_astc_supported_);
153 ~SurfaceBaseImpl() = default;
154
155 virtual void DecorateSurfaceName() = 0;
156
157 const SurfaceParams params;
158 std::size_t layer_size;
159 std::size_t guest_memory_size;
160 std::size_t host_memory_size;
161 GPUVAddr gpu_addr{};
162 VAddr cpu_addr{};
163 VAddr cpu_addr_end{};
164 bool is_converted{};
165
166 std::vector<std::size_t> mipmap_sizes;
167 std::vector<std::size_t> mipmap_offsets;
168
169private:
170 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& surface_params,
171 u8* buffer, u32 level);
172
173 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
174
175 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
176};
177
178template <typename TView>
179class SurfaceBase : public SurfaceBaseImpl {
180public:
181 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
182
183 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
184
185 void MarkAsModified(bool is_modified_, u64 tick) {
186 is_modified = is_modified_ || is_target;
187 modification_tick = tick;
188 }
189
190 void MarkAsRenderTarget(bool is_target_, u32 index_) {
191 is_target = is_target_;
192 index = index_;
193 }
194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
211 void MarkAsPicked(bool is_picked_) {
212 is_picked = is_picked_;
213 }
214
215 bool IsModified() const {
216 return is_modified;
217 }
218
219 bool IsProtected() const {
220 // Only 3D slices are to be protected
221 return is_target && params.target == SurfaceTarget::Texture3D;
222 }
223
224 bool IsRenderTarget() const {
225 return is_target;
226 }
227
228 u32 GetRenderTarget() const {
229 return index;
230 }
231
232 bool IsRegistered() const {
233 return is_registered;
234 }
235
236 bool IsPicked() const {
237 return is_picked;
238 }
239
240 void MarkAsRegistered(bool is_reg) {
241 is_registered = is_reg;
242 }
243
244 u64 GetModificationTick() const {
245 return modification_tick;
246 }
247
248 TView EmplaceOverview(const SurfaceParams& overview_params) {
249 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 }
252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
259 const GPUVAddr view_addr,
260 const std::size_t candidate_size, const u32 mipmap,
261 const u32 layer) {
262 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
263 if (!layer_mipmap) {
264 return {};
265 }
266 const auto [end_layer, end_mipmap] = *layer_mipmap;
267 if (layer != end_layer) {
268 if (mipmap == 0 && end_mipmap == 0) {
269 return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1));
270 }
271 return {};
272 } else {
273 return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap));
274 }
275 }
276
277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
278 const std::size_t candidate_size) {
279 if (params.target == SurfaceTarget::Texture3D ||
280 view_params.target == SurfaceTarget::Texture3D ||
281 (params.num_levels == 1 && !params.is_layered)) {
282 return {};
283 }
284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
285 if (!layer_mipmap) {
286 return {};
287 }
288 const auto [layer, mipmap] = *layer_mipmap;
289 if (GetMipmapSize(mipmap) != candidate_size) {
290 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
291 }
292 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
293 }
294
295 TView GetMainView() const {
296 return main_view;
297 }
298
299protected:
300 explicit SurfaceBase(const GPUVAddr gpu_addr_, const SurfaceParams& params_,
301 bool is_astc_supported_)
302 : SurfaceBaseImpl{gpu_addr_, params_, is_astc_supported_} {}
303
304 ~SurfaceBase() = default;
305
306 virtual TView CreateView(const ViewParams& view_key) = 0;
307
308 TView main_view;
309 std::unordered_map<ViewParams, TView> views;
310
311private:
312 TView GetView(const ViewParams& key) {
313 const auto [entry, is_cache_miss] = views.try_emplace(key);
314 auto& view{entry->second};
315 if (is_cache_miss) {
316 view = CreateView(key);
317 }
318 return view;
319 }
320
321 static constexpr u32 NO_RT = 0xFFFFFFFF;
322
323 bool is_modified{};
324 bool is_target{};
325 bool is_registered{};
326 bool is_picked{};
327 bool is_memory_marked{};
328 bool is_sync_pending{};
329 u32 index{NO_RT};
330 u64 modification_tick{};
331};
332
333} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
deleted file mode 100644
index 96f93246d..000000000
--- a/src/video_core/texture_cache/surface_params.cpp
+++ /dev/null
@@ -1,445 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <string>
7#include <tuple>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "core/core.h"
12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/format_lookup_table.h"
15#include "video_core/texture_cache/surface_params.h"
16
17namespace VideoCommon {
18
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::SurfaceTarget;
23using VideoCore::Surface::SurfaceTargetFromTextureType;
24using VideoCore::Surface::SurfaceType;
25
26namespace {
27
28SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
29 switch (type) {
30 case Tegra::Shader::TextureType::Texture1D:
31 return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
32 case Tegra::Shader::TextureType::Texture2D:
33 return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
34 case Tegra::Shader::TextureType::Texture3D:
35 ASSERT(!is_array);
36 return SurfaceTarget::Texture3D;
37 case Tegra::Shader::TextureType::TextureCube:
38 return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
39 default:
40 UNREACHABLE();
41 return SurfaceTarget::Texture2D;
42 }
43}
44
45SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
46 switch (type) {
47 case Tegra::Shader::ImageType::Texture1D:
48 return SurfaceTarget::Texture1D;
49 case Tegra::Shader::ImageType::TextureBuffer:
50 return SurfaceTarget::TextureBuffer;
51 case Tegra::Shader::ImageType::Texture1DArray:
52 return SurfaceTarget::Texture1DArray;
53 case Tegra::Shader::ImageType::Texture2D:
54 return SurfaceTarget::Texture2D;
55 case Tegra::Shader::ImageType::Texture2DArray:
56 return SurfaceTarget::Texture2DArray;
57 case Tegra::Shader::ImageType::Texture3D:
58 return SurfaceTarget::Texture3D;
59 default:
60 UNREACHABLE();
61 return SurfaceTarget::Texture2D;
62 }
63}
64
65constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
66 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
67}
68
69} // Anonymous namespace
70
71SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
72 const Tegra::Texture::TICEntry& tic,
73 const VideoCommon::Shader::Sampler& entry) {
74 SurfaceParams params;
75 params.is_tiled = tic.IsTiled();
76 params.srgb_conversion = tic.IsSrgbConversionEnabled();
77 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
78 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
79 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
80 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
81 params.pixel_format = lookup_table.GetPixelFormat(
82 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
83 params.type = GetFormatType(params.pixel_format);
84 if (entry.is_shadow && params.type == SurfaceType::ColorTexture) {
85 switch (params.pixel_format) {
86 case PixelFormat::R16_UNORM:
87 case PixelFormat::R16_FLOAT:
88 params.pixel_format = PixelFormat::D16_UNORM;
89 break;
90 case PixelFormat::R32_FLOAT:
91 params.pixel_format = PixelFormat::D32_FLOAT;
92 break;
93 default:
94 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
95 static_cast<u32>(params.pixel_format));
96 }
97 params.type = GetFormatType(params.pixel_format);
98 }
99 // TODO: on 1DBuffer we should use the tic info.
100 if (tic.IsBuffer()) {
101 params.target = SurfaceTarget::TextureBuffer;
102 params.width = tic.Width();
103 params.pitch = params.width * params.GetBytesPerPixel();
104 params.height = 1;
105 params.depth = 1;
106 params.num_levels = 1;
107 params.emulated_levels = 1;
108 params.is_layered = false;
109 } else {
110 params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array);
111 params.width = tic.Width();
112 params.height = tic.Height();
113 params.depth = tic.Depth();
114 params.pitch = params.is_tiled ? 0 : tic.Pitch();
115 if (params.target == SurfaceTarget::TextureCubemap ||
116 params.target == SurfaceTarget::TextureCubeArray) {
117 params.depth *= 6;
118 }
119 params.num_levels = tic.max_mip_level + 1;
120 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
121 params.is_layered = params.IsLayered();
122 }
123 return params;
124}
125
126SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
127 const Tegra::Texture::TICEntry& tic,
128 const VideoCommon::Shader::Image& entry) {
129 SurfaceParams params;
130 params.is_tiled = tic.IsTiled();
131 params.srgb_conversion = tic.IsSrgbConversionEnabled();
132 params.block_width = params.is_tiled ? tic.BlockWidth() : 0;
133 params.block_height = params.is_tiled ? tic.BlockHeight() : 0;
134 params.block_depth = params.is_tiled ? tic.BlockDepth() : 0;
135 params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
136 params.pixel_format = lookup_table.GetPixelFormat(
137 tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
138 params.type = GetFormatType(params.pixel_format);
139 params.target = ImageTypeToSurfaceTarget(entry.type);
140 // TODO: on 1DBuffer we should use the tic info.
141 if (tic.IsBuffer()) {
142 params.target = SurfaceTarget::TextureBuffer;
143 params.width = tic.Width();
144 params.pitch = params.width * params.GetBytesPerPixel();
145 params.height = 1;
146 params.depth = 1;
147 params.num_levels = 1;
148 params.emulated_levels = 1;
149 params.is_layered = false;
150 } else {
151 params.width = tic.Width();
152 params.height = tic.Height();
153 params.depth = tic.Depth();
154 params.pitch = params.is_tiled ? 0 : tic.Pitch();
155 if (params.target == SurfaceTarget::TextureCubemap ||
156 params.target == SurfaceTarget::TextureCubeArray) {
157 params.depth *= 6;
158 }
159 params.num_levels = tic.max_mip_level + 1;
160 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
161 params.is_layered = params.IsLayered();
162 }
163 return params;
164}
165
166SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
167 const auto& regs = maxwell3d.regs;
168 const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
169 const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
170 const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
171 return {
172 .is_tiled = regs.zeta.memory_layout.type ==
173 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
174 .srgb_conversion = false,
175 .is_layered = is_layered,
176 .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U),
177 .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U),
178 .block_depth = block_depth,
179 .tile_width_spacing = 1,
180 .width = regs.zeta_width,
181 .height = regs.zeta_height,
182 .depth = is_layered ? regs.zeta_layers.Value() : 1U,
183 .pitch = 0,
184 .num_levels = 1,
185 .emulated_levels = 1,
186 .pixel_format = pixel_format,
187 .type = GetFormatType(pixel_format),
188 .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D,
189 };
190}
191
192SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
193 std::size_t index) {
194 const auto& config{maxwell3d.regs.rt[index]};
195 SurfaceParams params;
196 params.is_tiled =
197 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
198 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
199 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB;
200 params.block_width = config.memory_layout.block_width;
201 params.block_height = config.memory_layout.block_height;
202 params.block_depth = config.memory_layout.block_depth;
203 params.tile_width_spacing = 1;
204 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
205 params.type = GetFormatType(params.pixel_format);
206 if (params.is_tiled) {
207 params.pitch = 0;
208 params.width = config.width;
209 } else {
210 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
211 params.pitch = config.width;
212 params.width = params.pitch / bpp;
213 }
214 params.height = config.height;
215 params.num_levels = 1;
216 params.emulated_levels = 1;
217
218 if (config.memory_layout.is_3d != 0) {
219 params.depth = config.layers.Value();
220 params.is_layered = false;
221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
231 return params;
232}
233
234SurfaceParams SurfaceParams::CreateForFermiCopySurface(
235 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
236 const bool is_tiled = !config.linear;
237 const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format);
238
239 SurfaceParams params{
240 .is_tiled = is_tiled,
241 .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB ||
242 config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB,
243 .is_layered = false,
244 .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U,
245 .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U,
246 .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U,
247 .tile_width_spacing = 1,
248 .width = config.width,
249 .height = config.height,
250 .depth = 1,
251 .pitch = config.pitch,
252 .num_levels = 1,
253 .emulated_levels = 1,
254 .pixel_format = pixel_format,
255 .type = GetFormatType(pixel_format),
256 // TODO(Rodrigo): Try to guess texture arrays from parameters
257 .target = SurfaceTarget::Texture2D,
258 };
259
260 params.is_layered = params.IsLayered();
261 return params;
262}
263
264VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
265 const VideoCommon::Shader::Sampler& entry) {
266 return TextureTypeToSurfaceTarget(entry.type, entry.is_array);
267}
268
269VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget(
270 const VideoCommon::Shader::Image& entry) {
271 return ImageTypeToSurfaceTarget(entry.type);
272}
273
274bool SurfaceParams::IsLayered() const {
275 switch (target) {
276 case SurfaceTarget::Texture1DArray:
277 case SurfaceTarget::Texture2DArray:
278 case SurfaceTarget::TextureCubemap:
279 case SurfaceTarget::TextureCubeArray:
280 return true;
281 default:
282 return false;
283 }
284}
285
286// Auto block resizing algorithm from:
287// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
288u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
289 if (level == 0) {
290 return this->block_height;
291 }
292
293 const u32 height_new{GetMipHeight(level)};
294 const u32 default_block_height{GetDefaultBlockHeight()};
295 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
296 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
297 return std::clamp(block_height_new, 3U, 7U) - 3U;
298}
299
300u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
301 if (level == 0) {
302 return this->block_depth;
303 }
304 if (is_layered) {
305 return 0;
306 }
307
308 const u32 depth_new{GetMipDepth(level)};
309 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
310 if (block_depth_new > 4) {
311 return 5 - (GetMipBlockHeight(level) >= 2);
312 }
313 return block_depth_new;
314}
315
316std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
317 std::size_t offset = 0;
318 for (u32 i = 0; i < level; i++) {
319 offset += GetInnerMipmapMemorySize(i, false, false);
320 }
321 return offset;
322}
323
324std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
325 std::size_t offset = 0;
326 if (is_converted) {
327 for (u32 i = 0; i < level; ++i) {
328 offset += GetConvertedMipmapSize(i) * GetNumLayers();
329 }
330 } else {
331 for (u32 i = 0; i < level; ++i) {
332 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
333 }
334 }
335 return offset;
336}
337
338std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
339 constexpr std::size_t rgba8_bpp = 4ULL;
340 const std::size_t mip_width = GetMipWidth(level);
341 const std::size_t mip_height = GetMipHeight(level);
342 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
343 return mip_width * mip_height * mip_depth * rgba8_bpp;
344}
345
346std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
347 std::size_t size = 0;
348 for (u32 level = 0; level < num_levels; ++level) {
349 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
350 }
351 if (is_tiled && is_layered) {
352 return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
353 }
354 return size;
355}
356
357std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
358 bool uncompressed) const {
359 const u32 mip_width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
360 const u32 mip_height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
361 const u32 mip_depth{is_layered ? 1U : GetMipDepth(level)};
362 if (is_tiled) {
363 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), mip_width,
364 mip_height, mip_depth, GetMipBlockHeight(level),
365 GetMipBlockDepth(level));
366 } else if (as_host_size || IsBuffer()) {
367 return GetBytesPerPixel() * mip_width * mip_height * mip_depth;
368 } else {
369 // Linear Texture Case
370 return pitch * mip_height * mip_depth;
371 }
372}
373
374bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
375 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
376 height, depth, pitch, num_levels, pixel_format, type, target) ==
377 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
378 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
379 rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
380}
381
382std::string SurfaceParams::TargetName() const {
383 switch (target) {
384 case SurfaceTarget::Texture1D:
385 return "1D";
386 case SurfaceTarget::TextureBuffer:
387 return "TexBuffer";
388 case SurfaceTarget::Texture2D:
389 return "2D";
390 case SurfaceTarget::Texture3D:
391 return "3D";
392 case SurfaceTarget::Texture1DArray:
393 return "1DArray";
394 case SurfaceTarget::Texture2DArray:
395 return "2DArray";
396 case SurfaceTarget::TextureCubemap:
397 return "Cube";
398 case SurfaceTarget::TextureCubeArray:
399 return "CubeArray";
400 default:
401 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", target);
402 UNREACHABLE();
403 return fmt::format("TUK({})", target);
404 }
405}
406
407u32 SurfaceParams::GetBlockSize() const {
408 const u32 x = 64U << block_width;
409 const u32 y = 8U << block_height;
410 const u32 z = 1U << block_depth;
411 return x * y * z;
412}
413
414std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
415 const u32 x_pixels = 64U / GetBytesPerPixel();
416 const u32 x = x_pixels << block_width;
417 const u32 y = 8U << block_height;
418 return {x, y};
419}
420
421std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
422 const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
423 const u32 block_size = GetBlockSize();
424 const u32 block_index = offset / block_size;
425 const u32 gob_offset = offset % block_size;
426 const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE);
427 const u32 x_gob_pixels = 64U / GetBytesPerPixel();
428 const u32 x_block_pixels = x_gob_pixels << block_width;
429 const u32 y_block_pixels = 8U << block_height;
430 const u32 z_block_pixels = 1U << block_depth;
431 const u32 x_blocks = div_ceil(width, x_block_pixels);
432 const u32 y_blocks = div_ceil(height, y_block_pixels);
433 const u32 z_blocks = div_ceil(depth, z_block_pixels);
434 const u32 base_x = block_index % x_blocks;
435 const u32 base_y = (block_index / x_blocks) % y_blocks;
436 const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
437 u32 x = base_x * x_block_pixels;
438 u32 y = base_y * y_block_pixels;
439 u32 z = base_z * z_block_pixels;
440 z += gob_index >> block_height;
441 y += (gob_index * 8U) % y_block_pixels;
442 return {x, y, z};
443}
444
445} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
deleted file mode 100644
index 4466c3c34..000000000
--- a/src/video_core/texture_cache/surface_params.h
+++ /dev/null
@@ -1,294 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <utility>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "common/cityhash.h"
12#include "common/common_types.h"
13#include "video_core/engines/fermi_2d.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/surface.h"
17#include "video_core/textures/decoders.h"
18
19namespace VideoCommon {
20
21class FormatLookupTable;
22
23class SurfaceParams {
24public:
25 /// Creates SurfaceCachedParams from a texture configuration.
26 static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
27 const Tegra::Texture::TICEntry& tic,
28 const VideoCommon::Shader::Sampler& entry);
29
30 /// Creates SurfaceCachedParams from an image configuration.
31 static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
32 const Tegra::Texture::TICEntry& tic,
33 const VideoCommon::Shader::Image& entry);
34
35 /// Creates SurfaceCachedParams for a depth buffer configuration.
36 static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
37
38 /// Creates SurfaceCachedParams from a framebuffer configuration.
39 static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
40 std::size_t index);
41
42 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
43 static SurfaceParams CreateForFermiCopySurface(
44 const Tegra::Engines::Fermi2D::Regs::Surface& config);
45
46 /// Obtains the texture target from a shader's sampler entry.
47 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
48 const VideoCommon::Shader::Sampler& entry);
49
50 /// Obtains the texture target from a shader's sampler entry.
51 static VideoCore::Surface::SurfaceTarget ExpectedTarget(
52 const VideoCommon::Shader::Image& entry);
53
54 std::size_t Hash() const {
55 return static_cast<std::size_t>(
56 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
57 }
58
59 bool operator==(const SurfaceParams& rhs) const;
60
61 bool operator!=(const SurfaceParams& rhs) const {
62 return !operator==(rhs);
63 }
64
65 std::size_t GetGuestSizeInBytes() const {
66 return GetInnerMemorySize(false, false, false);
67 }
68
69 std::size_t GetHostSizeInBytes(bool is_converted) const {
70 if (!is_converted) {
71 return GetInnerMemorySize(true, false, false);
72 }
73 // ASTC is uncompressed in software, in emulated as RGBA8
74 std::size_t host_size_in_bytes = 0;
75 for (u32 level = 0; level < num_levels; ++level) {
76 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
77 }
78 return host_size_in_bytes;
79 }
80
81 u32 GetBlockAlignedWidth() const {
82 return Common::AlignUp(width, 64 / GetBytesPerPixel());
83 }
84
85 /// Returns the width of a given mipmap level.
86 u32 GetMipWidth(u32 level) const {
87 return std::max(1U, width >> level);
88 }
89
90 /// Returns the height of a given mipmap level.
91 u32 GetMipHeight(u32 level) const {
92 return std::max(1U, height >> level);
93 }
94
95 /// Returns the depth of a given mipmap level.
96 u32 GetMipDepth(u32 level) const {
97 return is_layered ? depth : std::max(1U, depth >> level);
98 }
99
100 /// Returns the block height of a given mipmap level.
101 u32 GetMipBlockHeight(u32 level) const;
102
103 /// Returns the block depth of a given mipmap level.
104 u32 GetMipBlockDepth(u32 level) const;
105
106 /// Returns the best possible row/pitch alignment for the surface.
107 u32 GetRowAlignment(u32 level, bool is_converted) const {
108 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
109 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
110 }
111
112 /// Returns the offset in bytes in guest memory of a given mipmap level.
113 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
114
115 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
116 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
117
118 /// Returns the size in bytes in guest memory of a given mipmap level.
119 std::size_t GetGuestMipmapSize(u32 level) const {
120 return GetInnerMipmapMemorySize(level, false, false);
121 }
122
123 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
124 std::size_t GetHostMipmapSize(u32 level) const {
125 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
126 }
127
128 std::size_t GetConvertedMipmapSize(u32 level) const;
129
130 /// Get this texture Tegra Block size in guest memory layout
131 u32 GetBlockSize() const;
132
133 /// Get X, Y coordinates max sizes of a single block.
134 std::pair<u32, u32> GetBlockXY() const;
135
136 /// Get the offset in x, y, z coordinates from a memory offset
137 std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
138
139 /// Returns the size of a layer in bytes in guest memory.
140 std::size_t GetGuestLayerSize() const {
141 return GetLayerSize(false, false);
142 }
143
144 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
145 std::size_t GetHostLayerSize(u32 level) const {
146 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
147 return GetInnerMipmapMemorySize(level, true, false);
148 }
149
150 /// Returns the max possible mipmap that the texture can have in host gpu
151 u32 MaxPossibleMipmap() const {
152 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
153 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
154 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
155 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
156 return max_mipmap;
157 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
158 }
159
160 /// Returns if the guest surface is a compressed surface.
161 bool IsCompressed() const {
162 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
163 }
164
165 /// Returns the default block width.
166 u32 GetDefaultBlockWidth() const {
167 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
168 }
169
170 /// Returns the default block height.
171 u32 GetDefaultBlockHeight() const {
172 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
173 }
174
175 /// Returns the bits per pixel.
176 u32 GetBitsPerPixel() const {
177 return VideoCore::Surface::GetFormatBpp(pixel_format);
178 }
179
180 /// Returns the bytes per pixel.
181 u32 GetBytesPerPixel() const {
182 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
183 }
184
185 /// Returns true if the pixel format is a depth and/or stencil format.
186 bool IsPixelFormatZeta() const {
187 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
188 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
189 }
190
191 /// Returns is the surface is a TextureBuffer type of surface.
192 bool IsBuffer() const {
193 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
194 }
195
196 /// Returns the number of layers in the surface.
197 std::size_t GetNumLayers() const {
198 return is_layered ? depth : 1;
199 }
200
201 /// Returns the debug name of the texture for use in graphic debuggers.
202 std::string TargetName() const;
203
204 // Helper used for out of class size calculations
205 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
206 const u32 block_depth) {
207 return Common::AlignBits(out_size,
208 Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth);
209 }
210
211 /// Converts a width from a type of surface into another. This helps represent the
212 /// equivalent value between compressed/non-compressed textures.
213 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
214 VideoCore::Surface::PixelFormat pixel_format_to) {
215 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
216 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
217 return (width * bw2 + bw1 - 1) / bw1;
218 }
219
220 /// Converts a height from a type of surface into another. This helps represent the
221 /// equivalent value between compressed/non-compressed textures.
222 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
223 VideoCore::Surface::PixelFormat pixel_format_to) {
224 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
225 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
226 return (height * bh2 + bh1 - 1) / bh1;
227 }
228
229 // Finds the maximun possible width between 2 2D layers of different formats
230 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
231 const u32 src_level, const u32 dst_level) {
232 const u32 bw1 = src_params.GetDefaultBlockWidth();
233 const u32 bw2 = dst_params.GetDefaultBlockWidth();
234 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
235 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
236 return std::min(t_src_width, t_dst_width);
237 }
238
239 // Finds the maximun possible height between 2 2D layers of different formats
240 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
241 const u32 src_level, const u32 dst_level) {
242 const u32 bh1 = src_params.GetDefaultBlockHeight();
243 const u32 bh2 = dst_params.GetDefaultBlockHeight();
244 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
245 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
246 return std::min(t_src_height, t_dst_height);
247 }
248
249 bool is_tiled;
250 bool srgb_conversion;
251 bool is_layered;
252 u32 block_width;
253 u32 block_height;
254 u32 block_depth;
255 u32 tile_width_spacing;
256 u32 width;
257 u32 height;
258 u32 depth;
259 u32 pitch;
260 u32 num_levels;
261 u32 emulated_levels;
262 VideoCore::Surface::PixelFormat pixel_format;
263 VideoCore::Surface::SurfaceType type;
264 VideoCore::Surface::SurfaceTarget target;
265
266private:
267 /// Returns the size of a given mipmap level inside a layer.
268 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
269
270 /// Returns the size of all mipmap levels and aligns as needed.
271 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
272 return GetLayerSize(as_host_size, uncompressed) *
273 (layer_only ? 1U : (is_layered ? depth : 1U));
274 }
275
276 /// Returns the size of a layer
277 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
278
279 /// Returns true if these parameters are from a layered surface.
280 bool IsLayered() const;
281};
282
283} // namespace VideoCommon
284
285namespace std {
286
287template <>
288struct hash<VideoCommon::SurfaceParams> {
289 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
290 return k.Hash();
291 }
292};
293
294} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
deleted file mode 100644
index 6b5f5984b..000000000
--- a/src/video_core/texture_cache/surface_view.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23bool ViewParams::operator!=(const ViewParams& rhs) const {
24 return !operator==(rhs);
25}
26
27} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
deleted file mode 100644
index 199f72732..000000000
--- a/src/video_core/texture_cache/surface_view.h
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target_, u32 base_layer_,
17 u32 num_layers_, u32 base_level_, u32 num_levels_)
18 : target{target_}, base_layer{base_layer_}, num_layers{num_layers_},
19 base_level{base_level_}, num_levels{num_levels_} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24 bool operator!=(const ViewParams& rhs) const;
25
26 bool IsLayered() const {
27 switch (target) {
28 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
29 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
30 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
31 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
32 return true;
33 default:
34 return false;
35 }
36 }
37
38 VideoCore::Surface::SurfaceTarget target{};
39 u32 base_layer{};
40 u32 num_layers{};
41 u32 base_level{};
42 u32 num_levels{};
43};
44
45class ViewBase {
46public:
47 constexpr explicit ViewBase(const ViewParams& view_params) : params{view_params} {}
48
49 constexpr const ViewParams& GetViewParams() const {
50 return params;
51 }
52
53protected:
54 ViewParams params;
55};
56
57} // namespace VideoCommon
58
59namespace std {
60
61template <>
62struct hash<VideoCommon::ViewParams> {
63 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
64 return k.Hash();
65 }
66};
67
68} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 581d8dd5b..968059842 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,1298 +6,1449 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list> 9#include <bit>
10#include <memory> 10#include <memory>
11#include <mutex> 11#include <mutex>
12#include <set> 12#include <optional>
13#include <tuple> 13#include <span>
14#include <type_traits>
14#include <unordered_map> 15#include <unordered_map>
16#include <utility>
15#include <vector> 17#include <vector>
16 18
17#include <boost/container/small_vector.hpp> 19#include <boost/container/small_vector.hpp>
18#include <boost/icl/interval_map.hpp>
19#include <boost/range/iterator_range.hpp>
20 20
21#include "common/assert.h" 21#include "common/alignment.h"
22#include "common/common_funcs.h"
22#include "common/common_types.h" 23#include "common/common_types.h"
23#include "common/math_util.h" 24#include "common/logging/log.h"
24#include "core/core.h"
25#include "core/memory.h"
26#include "core/settings.h"
27#include "video_core/compatible_formats.h" 25#include "video_core/compatible_formats.h"
26#include "video_core/delayed_destruction_ring.h"
28#include "video_core/dirty_flags.h" 27#include "video_core/dirty_flags.h"
29#include "video_core/engines/fermi_2d.h" 28#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/kepler_compute.h"
30#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
31#include "video_core/gpu.h"
32#include "video_core/memory_manager.h" 31#include "video_core/memory_manager.h"
33#include "video_core/rasterizer_interface.h" 32#include "video_core/rasterizer_interface.h"
34#include "video_core/surface.h" 33#include "video_core/surface.h"
35#include "video_core/texture_cache/copy_params.h" 34#include "video_core/texture_cache/descriptor_table.h"
36#include "video_core/texture_cache/format_lookup_table.h" 35#include "video_core/texture_cache/format_lookup_table.h"
37#include "video_core/texture_cache/surface_base.h" 36#include "video_core/texture_cache/formatter.h"
38#include "video_core/texture_cache/surface_params.h" 37#include "video_core/texture_cache/image_base.h"
39#include "video_core/texture_cache/surface_view.h" 38#include "video_core/texture_cache/image_info.h"
40 39#include "video_core/texture_cache/image_view_base.h"
41namespace Tegra::Texture { 40#include "video_core/texture_cache/image_view_info.h"
42struct FullTextureInfo; 41#include "video_core/texture_cache/render_targets.h"
43} 42#include "video_core/texture_cache/samples_helper.h"
44 43#include "video_core/texture_cache/slot_vector.h"
45namespace VideoCore { 44#include "video_core/texture_cache/types.h"
46class RasterizerInterface; 45#include "video_core/texture_cache/util.h"
47} 46#include "video_core/textures/texture.h"
48 47
49namespace VideoCommon { 48namespace VideoCommon {
50 49
51using VideoCore::Surface::FormatCompatibility; 50using Tegra::Texture::SwizzleSource;
51using Tegra::Texture::TextureType;
52using Tegra::Texture::TICEntry;
53using Tegra::Texture::TSCEntry;
54using VideoCore::Surface::GetFormatType;
55using VideoCore::Surface::IsCopyCompatible;
52using VideoCore::Surface::PixelFormat; 56using VideoCore::Surface::PixelFormat;
53using VideoCore::Surface::SurfaceTarget; 57using VideoCore::Surface::PixelFormatFromDepthFormat;
54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 58using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
59using VideoCore::Surface::SurfaceType;
55 60
56template <typename TSurface, typename TView> 61template <class P>
57class TextureCache { 62class TextureCache {
58 using VectorSurface = boost::container::small_vector<TSurface, 1>; 63 /// Address shift for caching images into a hash table
64 static constexpr u64 PAGE_SHIFT = 20;
65
66 /// Enables debugging features to the texture cache
67 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
68 /// Implement blits as copies between framebuffers
69 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
70 /// True when some copies have to be emulated
71 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
72
73 /// Image view ID for null descriptors
74 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
75 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0};
77
78 using Runtime = typename P::Runtime;
79 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc;
81 using ImageView = typename P::ImageView;
82 using Sampler = typename P::Sampler;
83 using Framebuffer = typename P::Framebuffer;
84
85 struct BlitImages {
86 ImageId dst_id;
87 ImageId src_id;
88 PixelFormat dst_format;
89 PixelFormat src_format;
90 };
91
92 template <typename T>
93 struct IdentityHash {
94 [[nodiscard]] size_t operator()(T value) const noexcept {
95 return static_cast<size_t>(value);
96 }
97 };
59 98
60public: 99public:
61 void InvalidateRegion(VAddr addr, std::size_t size) { 100 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
62 std::lock_guard lock{mutex}; 101 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
63 102
64 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 103 /// Notify the cache that a new frame has been queued
65 Unregister(surface); 104 void TickFrame();
66 }
67 }
68 105
69 void OnCPUWrite(VAddr addr, std::size_t size) { 106 /// Return an unique mutually exclusive lock for the cache
70 std::lock_guard lock{mutex}; 107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
71 108
72 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 109 /// Return a constant reference to the given image view id
73 if (surface->IsMemoryMarked()) { 110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
74 UnmarkMemory(surface);
75 surface->SetSyncPending(true);
76 marked_for_unregister.emplace_back(surface);
77 }
78 }
79 }
80 111
81 void SyncGuestHost() { 112 /// Return a reference to the given image view id
82 std::lock_guard lock{mutex}; 113 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
83 114
84 for (const auto& surface : marked_for_unregister) { 115 /// Fill image_view_ids with the graphics images in indices
85 if (surface->IsRegistered()) { 116 void FillGraphicsImageViews(std::span<const u32> indices,
86 surface->SetSyncPending(false); 117 std::span<ImageViewId> image_view_ids);
87 Unregister(surface);
88 }
89 }
90 marked_for_unregister.clear();
91 }
92 118
93 /** 119 /// Fill image_view_ids with the compute images in indices
94 * Guarantees that rendertargets don't unregister themselves if the 120 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
95 * collide. Protection is currently only done on 3D slices.
96 */
97 void GuardRenderTargets(bool new_guard) {
98 guard_render_targets = new_guard;
99 }
100 121
101 void GuardSamplers(bool new_guard) { 122 /// Get the sampler from the graphics descriptor table in the specified index
102 guard_samplers = new_guard; 123 Sampler* GetGraphicsSampler(u32 index);
103 }
104 124
105 void FlushRegion(VAddr addr, std::size_t size) { 125 /// Get the sampler from the compute descriptor table in the specified index
106 std::lock_guard lock{mutex}; 126 Sampler* GetComputeSampler(u32 index);
107 127
108 auto surfaces = GetSurfacesInRegion(addr, size); 128 /// Refresh the state for graphics image view and sampler descriptors
109 if (surfaces.empty()) { 129 void SynchronizeGraphicsDescriptors();
110 return;
111 }
112 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
113 return a->GetModificationTick() < b->GetModificationTick();
114 });
115 for (const auto& surface : surfaces) {
116 mutex.unlock();
117 FlushSurface(surface);
118 mutex.lock();
119 }
120 }
121 130
122 bool MustFlushRegion(VAddr addr, std::size_t size) { 131 /// Refresh the state for compute image view and sampler descriptors
123 std::lock_guard lock{mutex}; 132 void SynchronizeComputeDescriptors();
124 133
125 const auto surfaces = GetSurfacesInRegion(addr, size); 134 /// Update bound render targets and upload memory if necessary
126 return std::any_of(surfaces.cbegin(), surfaces.cend(), 135 /// @param is_clear True when the render targets are being used for clears
127 [](const TSurface& surface) { return surface->IsModified(); }); 136 void UpdateRenderTargets(bool is_clear);
128 }
129 137
130 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 138 /// Find a framebuffer with the currently bound render targets
131 const VideoCommon::Shader::Sampler& entry) { 139 /// UpdateRenderTargets should be called before this
132 std::lock_guard lock{mutex}; 140 Framebuffer* GetFramebuffer();
133 const auto gpu_addr{tic.Address()};
134 if (!gpu_addr) {
135 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
136 }
137 141
138 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 142 /// Mark images in a range as modified from the CPU
139 if (!cpu_addr) { 143 void WriteMemory(VAddr cpu_addr, size_t size);
140 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
141 }
142 144
143 if (!IsTypeCompatible(tic.texture_type, entry)) { 145 /// Download contents of host images to guest memory in a region
144 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 146 void DownloadMemory(VAddr cpu_addr, size_t size);
145 }
146 147
147 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 148 /// Remove images in a region
148 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); 149 void UnmapMemory(VAddr cpu_addr, size_t size);
149 if (guard_samplers) {
150 sampled_textures.push_back(surface);
151 }
152 return view;
153 }
154 150
155 TView GetImageSurface(const Tegra::Texture::TICEntry& tic, 151 /// Blit an image with the given parameters
156 const VideoCommon::Shader::Image& entry) { 152 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
157 std::lock_guard lock{mutex}; 153 const Tegra::Engines::Fermi2D::Surface& src,
158 const auto gpu_addr{tic.Address()}; 154 const Tegra::Engines::Fermi2D::Config& copy);
159 if (!gpu_addr) {
160 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
161 }
162 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
163 if (!cpu_addr) {
164 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
165 }
166 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
167 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
168 if (guard_samplers) {
169 sampled_textures.push_back(surface);
170 }
171 return view;
172 }
173 155
174 bool TextureBarrier() { 156 /// Invalidate the contents of the color buffer index
175 const bool any_rt = 157 /// These contents become unspecified, the cache can assume aggressive optimizations.
176 std::any_of(sampled_textures.begin(), sampled_textures.end(), 158 void InvalidateColorBuffer(size_t index);
177 [](const auto& surface) { return surface->IsRenderTarget(); });
178 sampled_textures.clear();
179 return any_rt;
180 }
181 159
182 TView GetDepthBufferSurface(bool preserve_contents) { 160 /// Invalidate the contents of the depth buffer
183 std::lock_guard lock{mutex}; 161 /// These contents become unspecified, the cache can assume aggressive optimizations.
184 auto& dirty = maxwell3d.dirty; 162 void InvalidateDepthBuffer();
185 if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
186 return depth_buffer.view;
187 }
188 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
189 163
190 const auto& regs{maxwell3d.regs}; 164 /// Try to find a cached image view in the given CPU address
191 const auto gpu_addr{regs.zeta.Address()}; 165 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
192 if (!gpu_addr || !regs.zeta_enable) {
193 SetEmptyDepthBuffer();
194 return {};
195 }
196 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
197 if (!cpu_addr) {
198 SetEmptyDepthBuffer();
199 return {};
200 }
201 const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
202 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
203 if (depth_buffer.target)
204 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
205 depth_buffer.target = surface_view.first;
206 depth_buffer.view = surface_view.second;
207 if (depth_buffer.target)
208 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
209 return surface_view.second;
210 }
211
212 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
213 std::lock_guard lock{mutex};
214 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
215 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
216 return render_targets[index].view;
217 }
218 maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false;
219 166
220 const auto& regs{maxwell3d.regs}; 167 /// Return true when there are uncommitted images to be downloaded
221 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 168 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
222 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
223 SetEmptyColorBuffer(index);
224 return {};
225 }
226 169
227 const auto& config{regs.rt[index]}; 170 /// Return true when the caller should wait for async downloads
228 const auto gpu_addr{config.Address()}; 171 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
229 if (!gpu_addr) {
230 SetEmptyColorBuffer(index);
231 return {};
232 }
233 172
234 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 173 /// Commit asynchronous downloads
235 if (!cpu_addr) { 174 void CommitAsyncFlushes();
236 SetEmptyColorBuffer(index); 175
237 return {}; 176 /// Pop asynchronous downloads
238 } 177 void PopAsyncFlushes();
178
179 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
239 181
240 auto surface_view = 182private:
241 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), 183 /// Iterate over all page indices in a range
242 preserve_contents, true); 184 template <typename Func>
243 if (render_targets[index].target) { 185 static void ForEachPage(VAddr addr, size_t size, Func&& func) {
244 auto& surface = render_targets[index].target; 186 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
245 surface->MarkAsRenderTarget(false, NO_RT); 187 const u64 page_end = (addr + size - 1) >> PAGE_SHIFT;
246 const auto& cr_params = surface->GetSurfaceParams(); 188 for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
247 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 189 if constexpr (RETURNS_BOOL) {
248 AsyncFlushSurface(surface); 190 if (func(page)) {
191 break;
192 }
193 } else {
194 func(page);
249 } 195 }
250 } 196 }
251 render_targets[index].target = surface_view.first;
252 render_targets[index].view = surface_view.second;
253 if (render_targets[index].target)
254 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
255 return surface_view.second;
256 } 197 }
257 198
258 void MarkColorBufferInUse(std::size_t index) { 199 /// Fills image_view_ids in the image views in indices
259 if (auto& render_target = render_targets[index].target) { 200 void FillImageViews(DescriptorTable<TICEntry>& table,
260 render_target->MarkAsModified(true, Tick()); 201 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
261 } 202 std::span<ImageViewId> image_view_ids);
262 }
263 203
264 void MarkDepthBufferInUse() { 204 /// Find or create an image view in the guest descriptor table
265 if (depth_buffer.target) { 205 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
266 depth_buffer.target->MarkAsModified(true, Tick()); 206 std::span<ImageViewId> cached_image_view_ids, u32 index);
267 }
268 }
269 207
270 void SetEmptyDepthBuffer() { 208 /// Find or create a framebuffer with the given render target parameters
271 if (depth_buffer.target == nullptr) { 209 FramebufferId GetFramebufferId(const RenderTargets& key);
272 return;
273 }
274 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
275 depth_buffer.target = nullptr;
276 depth_buffer.view = nullptr;
277 }
278 210
279 void SetEmptyColorBuffer(std::size_t index) { 211 /// Refresh the contents (pixel data) of an image
280 if (render_targets[index].target == nullptr) { 212 void RefreshContents(Image& image);
281 return;
282 }
283 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
284 render_targets[index].target = nullptr;
285 render_targets[index].view = nullptr;
286 }
287
288 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
289 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
290 const Tegra::Engines::Fermi2D::Config& copy_config) {
291 std::lock_guard lock{mutex};
292 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
293 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
294 const GPUVAddr src_gpu_addr = src_config.Address();
295 const GPUVAddr dst_gpu_addr = dst_config.Address();
296 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
297
298 const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
299 const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
300 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
301 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
302 ImageBlit(src_surface, dst_surface.second, copy_config);
303 dst_surface.first->MarkAsModified(true, Tick());
304 }
305
306 TSurface TryFindFramebufferSurface(VAddr addr) const {
307 if (!addr) {
308 return nullptr;
309 }
310 const VAddr page = addr >> registry_page_bits;
311 const auto it = registry.find(page);
312 if (it == registry.end()) {
313 return nullptr;
314 }
315 const auto& list = it->second;
316 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
317 return surface->GetCpuAddr() == addr;
318 });
319 return found != list.end() ? *found : nullptr;
320 }
321 213
322 u64 Tick() { 214 /// Upload data from guest to an image
323 return ++ticks; 215 template <typename MapBuffer>
324 } 216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset);
325 217
326 void CommitAsyncFlushes() { 218 /// Find or create an image view from a guest descriptor
327 committed_flushes.push_back(uncommitted_flushes); 219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
328 uncommitted_flushes.reset();
329 }
330 220
331 bool HasUncommittedFlushes() const { 221 /// Create a new image view from a guest descriptor
332 return uncommitted_flushes != nullptr; 222 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
333 }
334 223
335 bool ShouldWaitAsyncFlushes() const { 224 /// Find or create an image from the given parameters
336 return !committed_flushes.empty() && committed_flushes.front() != nullptr; 225 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
337 } 226 RelaxedOptions options = RelaxedOptions{});
338 227
339 void PopAsyncFlushes() { 228 /// Find an image from the given parameters
340 if (committed_flushes.empty()) { 229 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
341 return; 230 RelaxedOptions options);
342 }
343 auto& flush_list = committed_flushes.front();
344 if (!flush_list) {
345 committed_flushes.pop_front();
346 return;
347 }
348 for (TSurface& surface : *flush_list) {
349 FlushSurface(surface);
350 }
351 committed_flushes.pop_front();
352 }
353 231
354protected: 232 /// Create an image from the given parameters
355 explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, 233 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
356 Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, 234 RelaxedOptions options);
357 bool is_astc_supported_)
358 : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
359 gpu_memory{gpu_memory_} {
360 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
361 SetEmptyColorBuffer(i);
362 }
363 235
364 SetEmptyDepthBuffer(); 236 /// Create a new image and join perfectly matching existing images
365 staging_cache.SetSize(2); 237 /// Remove joined images from the cache
238 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
366 239
367 const auto make_siblings = [this](PixelFormat a, PixelFormat b) { 240 /// Return a blit image pair from the given guest blit parameters
368 siblings_table[static_cast<std::size_t>(a)] = b; 241 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
369 siblings_table[static_cast<std::size_t>(b)] = a; 242 const Tegra::Engines::Fermi2D::Surface& src);
370 };
371 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
372 make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM);
373 make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT);
374 make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT);
375 243
376 sampled_textures.reserve(64); 244 /// Find or create a sampler from a guest descriptor sampler
377 } 245 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
378 246
379 ~TextureCache() = default; 247 /// Find or create an image view for the given color buffer index
248 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
380 249
381 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; 250 /// Find or create an image view for the depth buffer
251 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
382 252
383 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, 253 /// Find or create a view for a render target with the given image parameters
384 const CopyParams& copy_params) = 0; 254 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
255 bool is_clear);
385 256
386 virtual void ImageBlit(TView& src_view, TView& dst_view, 257 /// Iterates over all the images in a region calling func
387 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 258 template <typename Func>
259 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
388 260
389 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 261 /// Find or create an image view in the given image with the passed parameters
390 // and reading it from a separate buffer. 262 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
391 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
392 263
393 void ManageRenderTargetUnregister(TSurface& surface) { 264 /// Register image in the page table
394 auto& dirty = maxwell3d.dirty; 265 void RegisterImage(ImageId image);
395 const u32 index = surface->GetRenderTarget(); 266
396 if (index == DEPTH_RT) { 267 /// Unregister image from the page table
397 dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; 268 void UnregisterImage(ImageId image);
398 } else { 269
399 dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; 270 /// Track CPU reads and writes for image
400 } 271 void TrackImage(ImageBase& image);
401 dirty.flags[VideoCommon::Dirty::RenderTargets] = true; 272
273 /// Stop tracking CPU reads and writes for image
274 void UntrackImage(ImageBase& image);
275
276 /// Delete image from the cache
277 void DeleteImage(ImageId image);
278
279 /// Remove image views references from the cache
280 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
281
282 /// Remove framebuffers using the given image views from the cache
283 void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
284
285 /// Mark an image as modified from the GPU
286 void MarkModification(ImageBase& image) noexcept;
287
288 /// Synchronize image aliases, copying data if needed
289 void SynchronizeAliases(ImageId image_id);
290
291 /// Prepare an image to be used
292 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
293
294 /// Prepare an image view to be used
295 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
296
297 /// Execute copies from one image to the other, even if they are incompatible
298 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
299
300 /// Bind an image view as render target, downloading resources preemtively if needed
301 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
302
303 /// Create a render target from a given image and image view parameters
304 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
305 ImageId, const ImageViewInfo& view_info);
306
307 /// Returns true if the current clear parameters clear the whole image of a given image view
308 [[nodiscard]] bool IsFullClear(ImageViewId id);
309
310 Runtime& runtime;
311 VideoCore::RasterizerInterface& rasterizer;
312 Tegra::Engines::Maxwell3D& maxwell3d;
313 Tegra::Engines::KeplerCompute& kepler_compute;
314 Tegra::MemoryManager& gpu_memory;
315
316 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
317 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
318 std::vector<SamplerId> graphics_sampler_ids;
319 std::vector<ImageViewId> graphics_image_view_ids;
320
321 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
322 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
323 std::vector<SamplerId> compute_sampler_ids;
324 std::vector<ImageViewId> compute_image_view_ids;
325
326 RenderTargets render_targets;
327
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
333
334 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
335
336 bool has_deleted_images = false;
337
338 SlotVector<Image> slot_images;
339 SlotVector<ImageView> slot_image_views;
340 SlotVector<ImageAlloc> slot_image_allocs;
341 SlotVector<Sampler> slot_samplers;
342 SlotVector<Framebuffer> slot_framebuffers;
343
344 // TODO: This data structure is not optimal and it should be reworked
345 std::vector<ImageId> uncommitted_downloads;
346 std::queue<std::vector<ImageId>> committed_downloads;
347
348 static constexpr size_t TICKS_TO_DESTROY = 6;
349 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
350 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
351 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
352
353 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
354
355 u64 modification_tick = 0;
356 u64 frame_tick = 0;
357};
358
359template <class P>
360TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
361 Tegra::Engines::Maxwell3D& maxwell3d_,
362 Tegra::Engines::KeplerCompute& kepler_compute_,
363 Tegra::MemoryManager& gpu_memory_)
364 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
365 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
366 // Configure null sampler
367 TSCEntry sampler_descriptor{};
368 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
369 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
370 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
371 sampler_descriptor.cubemap_anisotropy.Assign(1);
372
373 // Make sure the first index is reserved for the null resources
374 // This way the null resource becomes a compile time constant
375 void(slot_image_views.insert(runtime, NullImageParams{}));
376 void(slot_samplers.insert(runtime, sampler_descriptor));
377}
378
379template <class P>
380void TextureCache<P>::TickFrame() {
381 // Tick sentenced resources in this order to ensure they are destroyed in the right order
382 sentenced_images.Tick();
383 sentenced_framebuffers.Tick();
384 sentenced_image_view.Tick();
385 ++frame_tick;
386}
387
388template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id];
396}
397
398template <class P>
399typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
400 return slot_image_views[id];
401}
402
403template <class P>
404void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
405 std::span<ImageViewId> image_view_ids) {
406 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
407}
408
409template <class P>
410void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
411 std::span<ImageViewId> image_view_ids) {
412 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
413}
414
415template <class P>
416typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
417 [[unlikely]] if (index > graphics_sampler_table.Limit()) {
418 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
419 return &slot_samplers[NULL_SAMPLER_ID];
420 }
421 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
422 SamplerId& id = graphics_sampler_ids[index];
423 [[unlikely]] if (is_new) {
424 id = FindSampler(descriptor);
402 } 425 }
426 return &slot_samplers[id];
427}
403 428
404 void Register(TSurface surface) { 429template <class P>
405 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 430typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
406 const std::size_t size = surface->GetSizeInBytes(); 431 [[unlikely]] if (index > compute_sampler_table.Limit()) {
407 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 432 LOG_ERROR(HW_GPU, "Invalid sampler index={}", index);
408 if (!cpu_addr) { 433 return &slot_samplers[NULL_SAMPLER_ID];
409 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 434 }
410 gpu_addr); 435 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
411 return; 436 SamplerId& id = compute_sampler_ids[index];
412 } 437 [[unlikely]] if (is_new) {
413 surface->SetCpuAddr(*cpu_addr); 438 id = FindSampler(descriptor);
414 RegisterInnerCache(surface);
415 surface->MarkAsRegistered(true);
416 surface->SetMemoryMarked(true);
417 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
418 } 439 }
440 return &slot_samplers[id];
441}
419 442
420 void UnmarkMemory(TSurface surface) { 443template <class P>
421 if (!surface->IsMemoryMarked()) { 444void TextureCache<P>::SynchronizeGraphicsDescriptors() {
422 return; 445 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
423 } 446 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
424 const std::size_t size = surface->GetSizeInBytes(); 447 const u32 tic_limit = maxwell3d.regs.tic.limit;
425 const VAddr cpu_addr = surface->GetCpuAddr(); 448 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
426 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 449 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
427 surface->SetMemoryMarked(false); 450 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
428 } 451 }
452 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
453 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
454 }
455}
429 456
430 void Unregister(TSurface surface) { 457template <class P>
431 if (guard_render_targets && surface->IsProtected()) { 458void TextureCache<P>::SynchronizeComputeDescriptors() {
432 return; 459 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
433 } 460 const u32 tic_limit = kepler_compute.regs.tic.limit;
434 if (!guard_render_targets && surface->IsRenderTarget()) { 461 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
435 ManageRenderTargetUnregister(surface); 462 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
436 } 463 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
437 UnmarkMemory(surface); 464 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
438 if (surface->IsSyncPending()) {
439 marked_for_unregister.remove(surface);
440 surface->SetSyncPending(false);
441 }
442 UnregisterInnerCache(surface);
443 surface->MarkAsRegistered(false);
444 ReserveSurface(surface->GetSurfaceParams(), surface);
445 } 465 }
466 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
467 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
468 }
469}
446 470
447 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 471template <class P>
448 if (const auto surface = TryGetReservedSurface(params); surface) { 472void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
449 surface->SetGpuAddr(gpu_addr); 473 using namespace VideoCommon::Dirty;
450 return surface; 474 auto& flags = maxwell3d.dirty.flags;
451 } 475 if (!flags[Dirty::RenderTargets]) {
452 // No reserved surface available, create a new one and reserve it 476 return;
453 auto new_surface{CreateSurface(gpu_addr, params)};
454 return new_surface;
455 } 477 }
478 flags[Dirty::RenderTargets] = false;
456 479
457 const bool is_astc_supported; 480 // Render target control is used on all render targets, so force look ups when this one is up
481 const bool force = flags[Dirty::RenderTargetControl];
482 flags[Dirty::RenderTargetControl] = false;
458 483
459private: 484 for (size_t index = 0; index < NUM_RT; ++index) {
460 enum class RecycleStrategy : u32 { 485 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
461 Ignore = 0, 486 if (flags[Dirty::ColorBuffer0 + index] || force) {
462 Flush = 1, 487 flags[Dirty::ColorBuffer0 + index] = false;
463 BufferCopy = 3, 488 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
464 }; 489 }
490 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
491 }
492 if (flags[Dirty::ZetaBuffer] || force) {
493 flags[Dirty::ZetaBuffer] = false;
494 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
495 }
496 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
497 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
465 498
466 enum class DeductionType : u32 { 499 for (size_t index = 0; index < NUM_RT; ++index) {
467 DeductionComplete, 500 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
468 DeductionIncomplete, 501 }
469 DeductionFailed, 502 render_targets.size = Extent2D{
503 maxwell3d.regs.render_area.width,
504 maxwell3d.regs.render_area.height,
470 }; 505 };
506}
471 507
472 struct Deduction { 508template <class P>
473 DeductionType type{DeductionType::DeductionFailed}; 509typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
474 TSurface surface{}; 510 return &slot_framebuffers[GetFramebufferId(render_targets)];
511}
475 512
476 bool Failed() const { 513template <class P>
477 return type == DeductionType::DeductionFailed; 514void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
478 } 515 std::span<ImageViewId> cached_image_view_ids,
516 std::span<const u32> indices,
517 std::span<ImageViewId> image_view_ids) {
518 ASSERT(indices.size() <= image_view_ids.size());
519 do {
520 has_deleted_images = false;
521 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
522 return VisitImageView(table, cached_image_view_ids, index);
523 });
524 } while (has_deleted_images);
525}
479 526
480 bool Incomplete() const { 527template <class P>
481 return type == DeductionType::DeductionIncomplete; 528ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
482 } 529 std::span<ImageViewId> cached_image_view_ids,
530 u32 index) {
531 if (index > table.Limit()) {
532 LOG_ERROR(HW_GPU, "Invalid image view index={}", index);
533 return NULL_IMAGE_VIEW_ID;
534 }
535 const auto [descriptor, is_new] = table.Read(index);
536 ImageViewId& image_view_id = cached_image_view_ids[index];
537 if (is_new) {
538 image_view_id = FindImageView(descriptor);
539 }
540 if (image_view_id != NULL_IMAGE_VIEW_ID) {
541 PrepareImageView(image_view_id, false, false);
542 }
543 return image_view_id;
544}
483 545
484 bool IsDepth() const { 546template <class P>
485 return surface->GetSurfaceParams().IsPixelFormatZeta(); 547FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
486 } 548 const auto [pair, is_new] = framebuffers.try_emplace(key);
487 }; 549 FramebufferId& framebuffer_id = pair->second;
550 if (!is_new) {
551 return framebuffer_id;
552 }
553 std::array<ImageView*, NUM_RT> color_buffers;
554 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
555 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
556 ImageView* const depth_buffer =
557 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
558 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
559 return framebuffer_id;
560}
488 561
489 /** 562template <class P>
490 * Takes care of selecting a proper strategy to deal with a texture recycle. 563void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
491 * 564 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
492 * @param overlaps The overlapping surfaces registered in the cache. 565 if (True(image.flags & ImageFlagBits::CpuModified)) {
493 * @param params The parameters on the new surface. 566 return;
494 * @param gpu_addr The starting address of the new surface.
495 * @param untopological Indicates to the recycler that the texture has no way
496 * to match the overlaps due to topological reasons.
497 **/
498 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
499 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
500 if (Settings::IsGPULevelExtreme()) {
501 return RecycleStrategy::Flush;
502 }
503 // 3D Textures decision
504 if (params.target == SurfaceTarget::Texture3D) {
505 return RecycleStrategy::Flush;
506 }
507 for (const auto& s : overlaps) {
508 const auto& s_params = s->GetSurfaceParams();
509 if (s_params.target == SurfaceTarget::Texture3D) {
510 return RecycleStrategy::Flush;
511 }
512 }
513 // Untopological decision
514 if (untopological == MatchTopologyResult::CompressUnmatch) {
515 return RecycleStrategy::Flush;
516 }
517 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
518 return RecycleStrategy::Flush;
519 }
520 return RecycleStrategy::Ignore;
521 }
522
523 /**
524 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
525 * strategies: Ignore and Flush.
526 *
527 * - Ignore: Just unregisters all the overlaps and loads the new texture.
528 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
529 *
530 * @param overlaps The overlapping surfaces registered in the cache.
531 * @param params The parameters for the new surface.
532 * @param gpu_addr The starting address of the new surface.
533 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
534 * blank.
535 * @param untopological Indicates to the recycler that the texture has no way to match the
536 * overlaps due to topological reasons.
537 **/
538 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
539 const GPUVAddr gpu_addr, const bool preserve_contents,
540 const MatchTopologyResult untopological) {
541 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
542 for (auto& surface : overlaps) {
543 Unregister(surface);
544 }
545 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
546 case RecycleStrategy::Ignore: {
547 return InitializeSurface(gpu_addr, params, do_load);
548 }
549 case RecycleStrategy::Flush: {
550 std::sort(overlaps.begin(), overlaps.end(),
551 [](const TSurface& a, const TSurface& b) -> bool {
552 return a->GetModificationTick() < b->GetModificationTick();
553 });
554 for (auto& surface : overlaps) {
555 FlushSurface(surface);
556 }
557 return InitializeSurface(gpu_addr, params, preserve_contents);
558 } 567 }
559 case RecycleStrategy::BufferCopy: { 568 image.flags |= ImageFlagBits::CpuModified;
560 auto new_surface = GetUncachedSurface(gpu_addr, params); 569 UntrackImage(image);
561 BufferCopy(overlaps[0], new_surface); 570 });
562 return {new_surface, new_surface->GetMainView()}; 571}
572
573template <class P>
574void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
575 std::vector<ImageId> images;
576 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
577 // Skip images that were not modified from the GPU
578 if (False(image.flags & ImageFlagBits::GpuModified)) {
579 return;
563 } 580 }
564 default: { 581 // Skip images that .are. modified from the CPU
565 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 582 // We don't want to write sensitive data from the guest
566 return InitializeSurface(gpu_addr, params, do_load); 583 if (True(image.flags & ImageFlagBits::CpuModified)) {
584 return;
567 } 585 }
586 if (image.info.num_samples > 1) {
587 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
588 return;
568 } 589 }
590 image.flags &= ~ImageFlagBits::GpuModified;
591 images.push_back(image_id);
592 });
593 if (images.empty()) {
594 return;
595 }
596 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
597 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
598 });
599 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies);
604 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span());
569 } 606 }
607}
570 608
571 /** 609template <class P>
572 * Takes a single surface and recreates into another that may differ in 610void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
573 * format, target or width alignment. 611 std::vector<ImageId> deleted_images;
574 * 612 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
575 * @param current_surface The registered surface in the cache which we want to convert. 613 for (const ImageId id : deleted_images) {
576 * @param params The new surface params which we'll use to recreate the surface. 614 Image& image = slot_images[id];
577 * @param is_render Whether or not the surface is a render target. 615 if (True(image.flags & ImageFlagBits::Tracked)) {
578 **/ 616 UntrackImage(image);
579 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 617 }
580 bool is_render) { 618 UnregisterImage(id);
581 const auto gpu_addr = current_surface->GetGpuAddr(); 619 DeleteImage(id);
582 const auto& cr_params = current_surface->GetSurfaceParams(); 620 }
583 TSurface new_surface; 621}
584 if (cr_params.pixel_format != params.pixel_format && !is_render &&
585 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
586 SurfaceParams new_params = params;
587 new_params.pixel_format = cr_params.pixel_format;
588 new_params.type = cr_params.type;
589 new_surface = GetUncachedSurface(gpu_addr, new_params);
590 } else {
591 new_surface = GetUncachedSurface(gpu_addr, params);
592 }
593 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
594 if (cr_params.type != final_params.type) {
595 if (Settings::IsGPULevelExtreme()) {
596 BufferCopy(current_surface, new_surface);
597 }
598 } else {
599 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
600 for (auto& brick : bricks) {
601 TryCopyImage(current_surface, new_surface, brick);
602 }
603 }
604 Unregister(current_surface);
605 Register(new_surface);
606 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
607 return {new_surface, new_surface->GetMainView()};
608 }
609
610 /**
611 * Takes a single surface and checks with the new surface's params if it's an exact
612 * match, we return the main view of the registered surface. If its formats don't
613 * match, we rebuild the surface. We call this last method a `Mirage`. If formats
614 * match but the targets don't, we create an overview View of the registered surface.
615 *
616 * @param current_surface The registered surface in the cache which we want to convert.
617 * @param params The new surface params which we want to check.
618 * @param is_render Whether or not the surface is a render target.
619 **/
620 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
621 const SurfaceParams& params, bool is_render) {
622 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
623 const bool matches_target = current_surface->MatchTarget(params.target);
624 const auto match_check = [&]() -> std::pair<TSurface, TView> {
625 if (matches_target) {
626 return {current_surface, current_surface->GetMainView()};
627 }
628 return {current_surface, current_surface->EmplaceOverview(params)};
629 };
630 if (!is_mirage) {
631 return match_check();
632 }
633 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
634 return match_check();
635 }
636 return RebuildSurface(current_surface, params, is_render);
637 }
638
639 /**
640 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
641 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
642 * of the new surface, if they all match we end up recreating a surface for them,
643 * else we return nothing.
644 *
645 * @param overlaps The overlapping surfaces registered in the cache.
646 * @param params The parameters on the new surface.
647 * @param gpu_addr The starting address of the new surface.
648 **/
649 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
650 const SurfaceParams& params,
651 GPUVAddr gpu_addr) {
652 if (params.target == SurfaceTarget::Texture3D) {
653 return std::nullopt;
654 }
655 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
656 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
657 622
658 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { 623template <class P>
659 LoadSurface(new_surface); 624void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
660 for (const auto& surface : overlaps) { 625 const Tegra::Engines::Fermi2D::Surface& src,
661 Unregister(surface); 626 const Tegra::Engines::Fermi2D::Config& copy) {
662 } 627 const BlitImages images = GetBlitImages(dst, src);
663 Register(new_surface); 628 const ImageId dst_id = images.dst_id;
664 return {{new_surface, new_surface->GetMainView()}}; 629 const ImageId src_id = images.src_id;
665 } 630 PrepareImage(src_id, false, false);
631 PrepareImage(dst_id, true, false);
632
633 ImageBase& dst_image = slot_images[dst_id];
634 const ImageBase& src_image = slot_images[src_id];
635
636 // TODO: Deduplicate
637 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
638 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
639 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
640 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
641 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
642 const std::array src_region{
643 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
644 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
645 };
666 646
667 std::size_t passed_tests = 0; 647 const std::optional src_base = src_image.TryFindBase(src.Address());
668 for (auto& surface : overlaps) { 648 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
669 const SurfaceParams& src_params = surface->GetSurfaceParams(); 649 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
670 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; 650 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
671 if (!mipmap_layer) { 651 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
672 continue; 652 const std::array dst_region{
673 } 653 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
674 const auto [base_layer, base_mipmap] = *mipmap_layer; 654 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
675 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { 655 };
676 continue;
677 }
678 ++passed_tests;
679
680 // Copy all mipmaps and layers
681 const u32 block_width = params.GetDefaultBlockWidth();
682 const u32 block_height = params.GetDefaultBlockHeight();
683 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
684 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
685 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
686 if (width < block_width || height < block_height) {
687 // Current APIs forbid copying small compressed textures, avoid errors
688 break;
689 }
690 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
691 src_params.depth);
692 TryCopyImage(surface, new_surface, copy_params);
693 }
694 }
695 if (passed_tests == 0) {
696 return std::nullopt;
697 }
698 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
699 // In Accurate GPU all tests should pass, else we recycle
700 return std::nullopt;
701 }
702 656
703 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); 657 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
704 for (const auto& surface : overlaps) { 658 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
705 Unregister(surface); 659 if constexpr (FRAMEBUFFER_BLITS) {
706 } 660 // OpenGL blits from framebuffers, not images
661 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
662 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
663 copy.filter, copy.operation);
664 } else {
665 // Vulkan can blit images, but it lacks format reinterpretations
666 // Provide a framebuffer in case it's necessary
667 ImageView& dst_view = slot_image_views[dst_view_id];
668 ImageView& src_view = slot_image_views[src_view_id];
669 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
670 copy.operation);
671 }
672}
707 673
708 new_surface->MarkAsModified(modified, Tick()); 674template <class P>
709 Register(new_surface); 675void TextureCache<P>::InvalidateColorBuffer(size_t index) {
710 return {{new_surface, new_surface->GetMainView()}}; 676 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
711 } 677 color_buffer_id = FindColorBuffer(index, false);
712 678 if (!color_buffer_id) {
713 /** 679 LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
714 * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D 680 return;
715 * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of 681 }
716 * the HLE methods. 682 // When invalidating a color buffer, the old contents are no longer relevant
717 * 683 ImageView& color_buffer = slot_image_views[color_buffer_id];
718 * @param overlaps The overlapping surfaces registered in the cache. 684 Image& image = slot_images[color_buffer.image_id];
719 * @param params The parameters on the new surface. 685 image.flags &= ~ImageFlagBits::CpuModified;
720 * @param gpu_addr The starting address of the new surface. 686 image.flags &= ~ImageFlagBits::GpuModified;
721 * @param cpu_addr The starting address of the new surface on physical memory.
722 * @param preserve_contents Indicates that the new surface should be loaded from memory or
723 * left blank.
724 */
725 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
726 const SurfaceParams& params,
727 GPUVAddr gpu_addr, VAddr cpu_addr,
728 bool preserve_contents) {
729 if (params.target != SurfaceTarget::Texture3D) {
730 for (const auto& surface : overlaps) {
731 if (!surface->MatchTarget(params.target)) {
732 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
733 if (Settings::IsGPULevelExtreme()) {
734 return std::nullopt;
735 }
736 Unregister(surface);
737 return InitializeSurface(gpu_addr, params, preserve_contents);
738 }
739 return std::nullopt;
740 }
741 if (surface->GetCpuAddr() != cpu_addr) {
742 continue;
743 }
744 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
745 return std::make_pair(surface, surface->GetMainView());
746 }
747 }
748 return InitializeSurface(gpu_addr, params, preserve_contents);
749 }
750 687
751 if (params.num_levels > 1) { 688 runtime.InvalidateColorBuffer(color_buffer, index);
752 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach 689}
753 return std::nullopt;
754 }
755 690
756 if (overlaps.size() == 1) { 691template <class P>
757 const auto& surface = overlaps[0]; 692void TextureCache<P>::InvalidateDepthBuffer() {
758 const SurfaceParams& overlap_params = surface->GetSurfaceParams(); 693 ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
759 // Don't attempt to render to textures with more than one level for now 694 depth_buffer_id = FindDepthBuffer(false);
760 // The texture has to be to the right or the sample address if we want to render to it 695 if (!depth_buffer_id) {
761 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { 696 LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
762 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); 697 return;
763 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 698 }
764 if (slice < overlap_params.depth) { 699 // When invalidating the depth buffer, the old contents are no longer relevant
765 auto view = surface->Emplace3DView(slice, params.depth, 0, 1); 700 ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
766 return std::make_pair(std::move(surface), std::move(view)); 701 image.flags &= ~ImageFlagBits::CpuModified;
767 } 702 image.flags &= ~ImageFlagBits::GpuModified;
768 }
769 }
770 703
771 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 704 ImageView& depth_buffer = slot_image_views[depth_buffer_id];
772 bool modified = false; 705 runtime.InvalidateDepthBuffer(depth_buffer);
706}
773 707
774 for (auto& surface : overlaps) { 708template <class P>
775 const SurfaceParams& src_params = surface->GetSurfaceParams(); 709typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
776 if (src_params.target != SurfaceTarget::Texture2D || 710 // TODO: Properly implement this
777 src_params.height != params.height || 711 const auto it = page_table.find(cpu_addr >> PAGE_SHIFT);
778 src_params.block_depth != params.block_depth || 712 if (it == page_table.end()) {
779 src_params.block_height != params.block_height) { 713 return nullptr;
780 return std::nullopt; 714 }
781 } 715 const auto& image_ids = it->second;
782 modified |= surface->IsModified(); 716 for (const ImageId image_id : image_ids) {
783 717 const ImageBase& image = slot_images[image_id];
784 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); 718 if (image.cpu_addr != cpu_addr) {
785 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); 719 continue;
786 const u32 width = params.width;
787 const u32 height = params.height;
788 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
789 TryCopyImage(surface, new_surface, copy_params);
790 } 720 }
791 for (const auto& surface : overlaps) { 721 if (image.image_view_ids.empty()) {
792 Unregister(surface); 722 continue;
793 } 723 }
794 new_surface->MarkAsModified(modified, Tick()); 724 return &slot_image_views[image.image_view_ids.at(0)];
795 Register(new_surface); 725 }
796 726 return nullptr;
797 TView view = new_surface->GetMainView(); 727}
798 return std::make_pair(std::move(new_surface), std::move(view));
799 }
800
801 /**
802 * Gets the starting address and parameters of a candidate surface and tries
803 * to find a matching surface within the cache. This is done in 3 big steps:
804 *
805 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
806 *
807 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
808 * memory else we move to step 3.
809 *
810 * 3. Consists of figuring out the relationship between the candidate texture and the
811 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
812 * there's many, we just try to reconstruct a new surface out of them based on the
813 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
814 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
815 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
816 * a new surface.
817 *
818 * @param gpu_addr The starting address of the candidate surface.
819 * @param params The parameters on the candidate surface.
820 * @param preserve_contents Indicates that the new surface should be loaded from memory or
821 * left blank.
822 * @param is_render Whether or not the surface is a render target.
823 **/
824 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
825 const SurfaceParams& params, bool preserve_contents,
826 bool is_render) {
827 // Step 1
828 // Check Level 1 Cache for a fast structural match. If candidate surface
829 // matches at certain level we are pretty much done.
830 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
831 TSurface& current_surface = iter->second;
832 const auto topological_result = current_surface->MatchesTopology(params);
833 if (topological_result != MatchTopologyResult::FullMatch) {
834 VectorSurface overlaps{current_surface};
835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
836 topological_result);
837 }
838 728
839 const auto struct_result = current_surface->MatchesStructure(params); 729template <class P>
840 if (struct_result != MatchStructureResult::None) { 730bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
841 const auto& old_params = current_surface->GetSurfaceParams(); 731 return !uncommitted_downloads.empty();
842 const bool not_3d = params.target != SurfaceTarget::Texture3D && 732}
843 old_params.target != SurfaceTarget::Texture3D;
844 if (not_3d || current_surface->MatchTarget(params.target)) {
845 if (struct_result == MatchStructureResult::FullMatch) {
846 return ManageStructuralMatch(current_surface, params, is_render);
847 } else {
848 return RebuildSurface(current_surface, params, is_render);
849 }
850 }
851 }
852 }
853 733
854 // Step 2 734template <class P>
855 // Obtain all possible overlaps in the memory region 735bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
856 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 736 return !committed_downloads.empty() && !committed_downloads.front().empty();
857 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; 737}
858 738
859 // If none are found, we are done. we just load the surface and create it. 739template <class P>
860 if (overlaps.empty()) { 740void TextureCache<P>::CommitAsyncFlushes() {
861 return InitializeSurface(gpu_addr, params, preserve_contents); 741 // This is intentionally passing the value by copy
862 } 742 committed_downloads.push(uncommitted_downloads);
743 uncommitted_downloads.clear();
744}
863 745
864 // Step 3 746template <class P>
865 // Now we need to figure the relationship between the texture and its overlaps 747void TextureCache<P>::PopAsyncFlushes() {
866 // we do a topological test to ensure we can find some relationship. If it fails 748 if (committed_downloads.empty()) {
867 // immediately recycle the texture 749 return;
868 for (const auto& surface : overlaps) { 750 }
869 const auto topological_result = surface->MatchesTopology(params); 751 const std::span<const ImageId> download_ids = committed_downloads.front();
870 if (topological_result != MatchTopologyResult::FullMatch) { 752 if (download_ids.empty()) {
871 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 753 committed_downloads.pop();
872 topological_result); 754 return;
873 } 755 }
874 } 756 size_t total_size_bytes = 0;
757 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes);
761 size_t buffer_offset = 0;
762 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id];
764 const auto copies = FullDownloadCopies(image.info);
765 image.DownloadMemory(download_map, buffer_offset, copies);
766 buffer_offset += image.unswizzled_size_bytes;
767 }
768 // Wait for downloads to finish
769 runtime.Finish();
770
771 buffer_offset = 0;
772 const std::span<u8> download_span = download_map.Span();
773 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info);
776 const std::span<u8> image_download_span = download_span.subspan(buffer_offset);
777 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span);
778 buffer_offset += image.unswizzled_size_bytes;
779 }
780 committed_downloads.pop();
781}
875 782
876 // Manage 3D textures 783template <class P>
877 if (params.block_depth > 0) { 784bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
878 auto surface = 785 bool is_modified = false;
879 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 786 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
880 if (surface) { 787 if (False(image.flags & ImageFlagBits::GpuModified)) {
881 return *surface; 788 return false;
882 }
883 } 789 }
790 is_modified = true;
791 return true;
792 });
793 return is_modified;
794}
884 795
885 // Split cases between 1 overlap or many. 796template <class P>
886 if (overlaps.size() == 1) { 797void TextureCache<P>::RefreshContents(Image& image) {
887 TSurface current_surface = overlaps[0]; 798 if (False(image.flags & ImageFlagBits::CpuModified)) {
888 // First check if the surface is within the overlap. If not, it means 799 // Only upload modified images
889 // two things either the candidate surface is a supertexture of the overlap 800 return;
890 // or they don't match in any known way. 801 }
891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 802 image.flags &= ~ImageFlagBits::CpuModified;
892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); 803 TrackImage(image);
893 if (view) {
894 return *view;
895 }
896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
897 MatchTopologyResult::FullMatch);
898 }
899 // Now we check if the candidate is a mipmap/layer of the overlap
900 std::optional<TView> view =
901 current_surface->EmplaceView(params, gpu_addr, candidate_size);
902 if (view) {
903 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
904 if (is_mirage) {
905 // On a mirage view, we need to recreate the surface under this new view
906 // and then obtain a view again.
907 SurfaceParams new_params = current_surface->GetSurfaceParams();
908 const u32 wh = SurfaceParams::ConvertWidth(
909 new_params.width, new_params.pixel_format, params.pixel_format);
910 const u32 hh = SurfaceParams::ConvertHeight(
911 new_params.height, new_params.pixel_format, params.pixel_format);
912 new_params.width = wh;
913 new_params.height = hh;
914 new_params.pixel_format = params.pixel_format;
915 std::pair<TSurface, TView> pair =
916 RebuildSurface(current_surface, new_params, is_render);
917 std::optional<TView> mirage_view =
918 pair.first->EmplaceView(params, gpu_addr, candidate_size);
919 if (mirage_view)
920 return {pair.first, *mirage_view};
921 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
922 MatchTopologyResult::FullMatch);
923 }
924 return {current_surface, *view};
925 }
926 } else {
927 // If there are many overlaps, odds are they are subtextures of the candidate
928 // surface. We try to construct a new surface based on the candidate parameters,
929 // using the overlaps. If a single overlap fails, this will fail.
930 std::optional<std::pair<TSurface, TView>> view =
931 TryReconstructSurface(overlaps, params, gpu_addr);
932 if (view) {
933 return *view;
934 }
935 }
936 // We failed all the tests, recycle the overlaps into a new texture.
937 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
938 MatchTopologyResult::FullMatch);
939 }
940
941 /**
942 * Gets the starting address and parameters of a candidate surface and tries to find a
943 * matching surface within the cache that's similar to it. If there are many textures
944 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
945 * blit will be unsuccessful.
946 *
947 * @param gpu_addr The starting address of the candidate surface.
948 * @param params The parameters on the candidate surface.
949 **/
950 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
951 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
952
953 if (!cpu_addr) {
954 Deduction result{};
955 result.type = DeductionType::DeductionFailed;
956 return result;
957 }
958 804
959 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { 805 if (image.info.num_samples > 1) {
960 TSurface& current_surface = iter->second; 806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
961 const auto topological_result = current_surface->MatchesTopology(params); 807 return;
962 if (topological_result != MatchTopologyResult::FullMatch) { 808 }
963 Deduction result{}; 809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image));
964 result.type = DeductionType::DeductionFailed; 810 UploadImageContents(image, map, 0);
965 return result; 811 runtime.InsertUploadMemoryBarrier();
966 } 812}
967 const auto struct_result = current_surface->MatchesStructure(params);
968 if (struct_result != MatchStructureResult::None &&
969 current_surface->MatchTarget(params.target)) {
970 Deduction result{};
971 result.type = DeductionType::DeductionComplete;
972 result.surface = current_surface;
973 return result;
974 }
975 }
976 813
977 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 814template <class P>
978 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; 815template <typename MapBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset);
818 const GPUVAddr gpu_addr = image.gpu_addr;
819
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
821 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
822 const auto uploads = FullUploadSwizzles(image.info);
823 runtime.AccelerateImageUpload(image, map, buffer_offset, uploads);
824 } else if (True(image.flags & ImageFlagBits::Converted)) {
825 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
826 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
827 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
828 image.UploadMemory(map, buffer_offset, copies);
829 } else if (image.info.type == ImageType::Buffer) {
830 const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)};
831 image.UploadMemory(map, buffer_offset, copies);
832 } else {
833 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
834 image.UploadMemory(map, buffer_offset, copies);
835 }
836}
979 837
980 if (overlaps.empty()) { 838template <class P>
981 Deduction result{}; 839ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
982 result.type = DeductionType::DeductionIncomplete; 840 if (!IsValidAddress(gpu_memory, config)) {
983 return result; 841 return NULL_IMAGE_VIEW_ID;
984 } 842 }
843 const auto [pair, is_new] = image_views.try_emplace(config);
844 ImageViewId& image_view_id = pair->second;
845 if (is_new) {
846 image_view_id = CreateImageView(config);
847 }
848 return image_view_id;
849}
985 850
986 if (overlaps.size() > 1) { 851template <class P>
987 Deduction result{}; 852ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
988 result.type = DeductionType::DeductionFailed; 853 const ImageInfo info(config);
989 return result; 854 const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride;
990 } else { 855 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
991 Deduction result{}; 856 if (!image_id) {
992 result.type = DeductionType::DeductionComplete; 857 return NULL_IMAGE_VIEW_ID;
993 result.surface = overlaps[0];
994 return result;
995 }
996 } 858 }
859 ImageBase& image = slot_images[image_id];
860 const SubresourceBase base = image.TryFindBase(config.Address()).value();
861 ASSERT(base.level == 0);
862 const ImageViewInfo view_info(config, base.layer);
863 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
864 ImageViewBase& image_view = slot_image_views[image_view_id];
865 image_view.flags |= ImageViewFlagBits::Strong;
866 image.flags |= ImageFlagBits::Strong;
867 return image_view_id;
868}
997 869
998 /** 870template <class P>
999 * Gets a null surface based on a target texture. 871ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1000 * @param target The target of the null surface. 872 RelaxedOptions options) {
1001 */ 873 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
1002 TView GetNullSurface(SurfaceTarget target) { 874 return image_id;
1003 const u32 i_target = static_cast<u32>(target); 875 }
1004 if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { 876 return InsertImage(info, gpu_addr, options);
1005 return it->second->GetMainView(); 877}
1006 } 878
1007 SurfaceParams params{}; 879template <class P>
1008 params.target = target; 880ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1009 params.is_tiled = false; 881 RelaxedOptions options) {
1010 params.srgb_conversion = false; 882 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1011 params.is_layered = 883 if (!cpu_addr) {
1012 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || 884 return ImageId{};
1013 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; 885 }
1014 params.block_width = 0; 886 ImageId image_id;
1015 params.block_height = 0; 887 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1016 params.block_depth = 0; 888 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1017 params.tile_width_spacing = 1; 889 const bool strict_size = False(options & RelaxedOptions::Size) &&
1018 params.width = 1; 890 True(existing_image.flags & ImageFlagBits::Strong);
1019 params.height = 1; 891 const ImageInfo& existing = existing_image.info;
1020 params.depth = 1; 892 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
1021 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { 893 existing.pitch == info.pitch &&
1022 params.depth = 6; 894 IsPitchLinearSameSize(existing, info, strict_size) &&
1023 } 895 IsViewCompatible(existing.format, info.format)) {
1024 params.pitch = 4; 896 image_id = existing_image_id;
1025 params.num_levels = 1; 897 return true;
1026 params.emulated_levels = 1; 898 }
1027 params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; 899 } else if (IsSubresource(info, existing_image, gpu_addr, options)) {
1028 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 900 image_id = existing_image_id;
1029 auto surface = CreateSurface(0ULL, params); 901 return true;
1030 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
1031 surface->UploadTexture(invalid_memory);
1032 surface->MarkAsModified(false, Tick());
1033 invalid_cache.emplace(i_target, surface);
1034 return surface->GetMainView();
1035 }
1036
1037 /**
1038 * Gets the a source and destination starting address and parameters,
1039 * and tries to deduce if they are supposed to be depth textures. If so, their
1040 * parameters are modified and fixed into so.
1041 *
1042 * @param src_params The parameters of the candidate surface.
1043 * @param dst_params The parameters of the destination surface.
1044 * @param src_gpu_addr The starting address of the candidate surface.
1045 * @param dst_gpu_addr The starting address of the destination surface.
1046 **/
1047 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1048 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1049 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1050 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1051 if (deduced_src.Failed() || deduced_dst.Failed()) {
1052 return;
1053 } 902 }
903 return false;
904 };
905 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
906 return image_id;
907}
1054 908
1055 const bool incomplete_src = deduced_src.Incomplete(); 909template <class P>
1056 const bool incomplete_dst = deduced_dst.Incomplete(); 910ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
911 RelaxedOptions options) {
912 const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
913 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
914 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
915 const Image& image = slot_images[image_id];
916 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
917 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
918 if (is_new) {
919 it->second = slot_image_allocs.insert();
920 }
921 slot_image_allocs[it->second].images.push_back(image_id);
922 return image_id;
923}
1057 924
1058 if (incomplete_src && incomplete_dst) { 925template <class P>
926ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
927 ImageInfo new_info = info;
928 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
929 std::vector<ImageId> overlap_ids;
930 std::vector<ImageId> left_aliased_ids;
931 std::vector<ImageId> right_aliased_ids;
932 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
933 if (info.type != overlap.info.type) {
1059 return; 934 return;
1060 } 935 }
1061 936 if (info.type == ImageType::Linear) {
1062 const bool any_incomplete = incomplete_src || incomplete_dst; 937 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1063 938 // Alias linear images with the same pitch
1064 if (!any_incomplete) { 939 left_aliased_ids.push_back(overlap_id);
1065 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
1066 return;
1067 }
1068 } else {
1069 if (incomplete_src && !(deduced_dst.IsDepth())) {
1070 return;
1071 }
1072
1073 if (incomplete_dst && !(deduced_src.IsDepth())) {
1074 return;
1075 } 940 }
941 return;
942 }
943 const auto solution = ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, true);
944 if (solution) {
945 gpu_addr = solution->gpu_addr;
946 cpu_addr = solution->cpu_addr;
947 new_info.resources = solution->resources;
948 overlap_ids.push_back(overlap_id);
949 return;
950 }
951 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
952 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
953 if (IsSubresource(new_info, overlap, gpu_addr, options)) {
954 left_aliased_ids.push_back(overlap_id);
955 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) {
956 right_aliased_ids.push_back(overlap_id);
1076 } 957 }
958 });
959 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
960 Image& new_image = slot_images[new_image_id];
1077 961
1078 const auto inherit_format = [](SurfaceParams& to, TSurface from) { 962 // TODO: Only upload what we need
1079 const SurfaceParams& params = from->GetSurfaceParams(); 963 RefreshContents(new_image);
1080 to.pixel_format = params.pixel_format; 964
1081 to.type = params.type; 965 for (const ImageId overlap_id : overlap_ids) {
1082 }; 966 Image& overlap = slot_images[overlap_id];
1083 // Now we got the cases where one or both is Depth and the other is not known 967 if (overlap.info.num_samples != new_image.info.num_samples) {
1084 if (!incomplete_src) { 968 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1085 inherit_format(src_params, deduced_src.surface);
1086 } else { 969 } else {
1087 inherit_format(src_params, deduced_dst.surface); 970 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
971 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
972 runtime.CopyImage(new_image, overlap, copies);
1088 } 973 }
1089 if (!incomplete_dst) { 974 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1090 inherit_format(dst_params, deduced_dst.surface); 975 UntrackImage(overlap);
1091 } else {
1092 inherit_format(dst_params, deduced_src.surface);
1093 } 976 }
977 UnregisterImage(overlap_id);
978 DeleteImage(overlap_id);
979 }
980 ImageBase& new_image_base = new_image;
981 for (const ImageId aliased_id : right_aliased_ids) {
982 ImageBase& aliased = slot_images[aliased_id];
983 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
984 }
985 for (const ImageId aliased_id : left_aliased_ids) {
986 ImageBase& aliased = slot_images[aliased_id];
987 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1094 } 988 }
989 RegisterImage(new_image_id);
990 return new_image_id;
991}
1095 992
1096 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 993template <class P>
1097 bool preserve_contents) { 994typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1098 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 995 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
1099 Register(new_surface); 996 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
1100 if (preserve_contents) { 997 const GPUVAddr dst_addr = dst.Address();
1101 LoadSurface(new_surface); 998 const GPUVAddr src_addr = src.Address();
1102 } 999 ImageInfo dst_info(dst);
1103 return {new_surface, new_surface->GetMainView()}; 1000 ImageInfo src_info(src);
1001 ImageId dst_id;
1002 ImageId src_id;
1003 do {
1004 has_deleted_images = false;
1005 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
1006 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
1007 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1008 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1009 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1010 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1011 continue;
1012 }
1013 if (!dst_id) {
1014 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1015 }
1016 if (!src_id) {
1017 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1018 }
1019 } while (has_deleted_images);
1020 return BlitImages{
1021 .dst_id = dst_id,
1022 .src_id = src_id,
1023 .dst_format = dst_info.format,
1024 .src_format = src_info.format,
1025 };
1026}
1027
1028template <class P>
1029SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1030 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1031 return NULL_SAMPLER_ID;
1032 }
1033 const auto [pair, is_new] = samplers.try_emplace(config);
1034 if (is_new) {
1035 pair->second = slot_samplers.insert(runtime, config);
1104 } 1036 }
1037 return pair->second;
1038}
1105 1039
1106 void LoadSurface(const TSurface& surface) { 1040template <class P>
1107 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1041ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1108 surface->LoadBuffer(gpu_memory, staging_cache); 1042 const auto& regs = maxwell3d.regs;
1109 surface->UploadTexture(staging_cache.GetBuffer(0)); 1043 if (index >= regs.rt_control.count) {
1110 surface->MarkAsModified(false, Tick()); 1044 return ImageViewId{};
1045 }
1046 const auto& rt = regs.rt[index];
1047 const GPUVAddr gpu_addr = rt.Address();
1048 if (gpu_addr == 0) {
1049 return ImageViewId{};
1050 }
1051 if (rt.format == Tegra::RenderTargetFormat::NONE) {
1052 return ImageViewId{};
1111 } 1053 }
1054 const ImageInfo info(regs, index);
1055 return FindRenderTargetView(info, gpu_addr, is_clear);
1056}
1112 1057
1113 void FlushSurface(const TSurface& surface) { 1058template <class P>
1114 if (!surface->IsModified()) { 1059ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1115 return; 1060 const auto& regs = maxwell3d.regs;
1116 } 1061 if (!regs.zeta_enable) {
1117 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); 1062 return ImageViewId{};
1118 surface->DownloadTexture(staging_cache.GetBuffer(0)); 1063 }
1119 surface->FlushBuffer(gpu_memory, staging_cache); 1064 const GPUVAddr gpu_addr = regs.zeta.Address();
1120 surface->MarkAsModified(false, Tick()); 1065 if (gpu_addr == 0) {
1121 } 1066 return ImageViewId{};
1122
1123 void RegisterInnerCache(TSurface& surface) {
1124 const VAddr cpu_addr = surface->GetCpuAddr();
1125 VAddr start = cpu_addr >> registry_page_bits;
1126 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1127 l1_cache[cpu_addr] = surface;
1128 while (start <= end) {
1129 registry[start].push_back(surface);
1130 start++;
1131 }
1132 } 1067 }
1068 const ImageInfo info(regs);
1069 return FindRenderTargetView(info, gpu_addr, is_clear);
1070}
1133 1071
1134 void UnregisterInnerCache(TSurface& surface) { 1072template <class P>
1135 const VAddr cpu_addr = surface->GetCpuAddr(); 1073ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1136 VAddr start = cpu_addr >> registry_page_bits; 1074 bool is_clear) {
1137 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; 1075 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1138 l1_cache.erase(cpu_addr); 1076 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
1139 while (start <= end) { 1077 if (!image_id) {
1140 auto& reg{registry[start]}; 1078 return NULL_IMAGE_VIEW_ID;
1141 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1079 }
1142 start++; 1080 Image& image = slot_images[image_id];
1143 } 1081 const ImageViewType view_type = RenderTargetImageViewType(info);
1082 SubresourceBase base;
1083 if (image.info.type == ImageType::Linear) {
1084 base = SubresourceBase{.level = 0, .layer = 0};
1085 } else {
1086 base = image.TryFindBase(gpu_addr).value();
1144 } 1087 }
1088 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
1089 const SubresourceRange range{
1090 .base = base,
1091 .extent = {.levels = 1, .layers = layers},
1092 };
1093 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
1094}
1145 1095
1146 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1096template <class P>
1147 if (size == 0) { 1097template <typename Func>
1148 return {}; 1098void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
1099 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1100 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1101 boost::container::small_vector<ImageId, 32> images;
1102 ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
1103 const auto it = page_table.find(page);
1104 if (it == page_table.end()) {
1105 if constexpr (BOOL_BREAK) {
1106 return false;
1107 } else {
1108 return;
1109 }
1149 } 1110 }
1150 const VAddr cpu_addr_end = cpu_addr + size; 1111 for (const ImageId image_id : it->second) {
1151 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1112 Image& image = slot_images[image_id];
1152 VectorSurface surfaces; 1113 if (True(image.flags & ImageFlagBits::Picked)) {
1153 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1154 const auto it = registry.find(start);
1155 if (it == registry.end()) {
1156 continue; 1114 continue;
1157 } 1115 }
1158 for (auto& surface : it->second) { 1116 if (!image.Overlaps(cpu_addr, size)) {
1159 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { 1117 continue;
1160 continue; 1118 }
1119 image.flags |= ImageFlagBits::Picked;
1120 images.push_back(image_id);
1121 if constexpr (BOOL_BREAK) {
1122 if (func(image_id, image)) {
1123 return true;
1161 } 1124 }
1162 surface->MarkAsPicked(true); 1125 } else {
1163 surfaces.push_back(surface); 1126 func(image_id, image);
1164 } 1127 }
1165 } 1128 }
1166 for (auto& surface : surfaces) { 1129 if constexpr (BOOL_BREAK) {
1167 surface->MarkAsPicked(false); 1130 return false;
1168 } 1131 }
1169 return surfaces; 1132 });
1133 for (const ImageId image_id : images) {
1134 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1170 } 1135 }
1136}
1171 1137
1172 void ReserveSurface(const SurfaceParams& params, TSurface surface) { 1138template <class P>
1173 surface_reserve[params].push_back(std::move(surface)); 1139ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1140 Image& image = slot_images[image_id];
1141 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1142 return image_view_id;
1174 } 1143 }
1144 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1145 image.InsertView(info, image_view_id);
1146 return image_view_id;
1147}
1148
1149template <class P>
1150void TextureCache<P>::RegisterImage(ImageId image_id) {
1151 ImageBase& image = slot_images[image_id];
1152 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1153 "Trying to register an already registered image");
1154 image.flags |= ImageFlagBits::Registered;
1155 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1156 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1157}
1175 1158
1176 TSurface TryGetReservedSurface(const SurfaceParams& params) { 1159template <class P>
1177 auto search{surface_reserve.find(params)}; 1160void TextureCache<P>::UnregisterImage(ImageId image_id) {
1178 if (search == surface_reserve.end()) { 1161 Image& image = slot_images[image_id];
1179 return {}; 1162 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1163 "Trying to unregister an already registered image");
1164 image.flags &= ~ImageFlagBits::Registered;
1165 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1166 const auto page_it = page_table.find(page);
1167 if (page_it == page_table.end()) {
1168 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_SHIFT);
1169 return;
1180 } 1170 }
1181 for (auto& surface : search->second) { 1171 std::vector<ImageId>& image_ids = page_it->second;
1182 if (!surface->IsRegistered()) { 1172 const auto vector_it = std::ranges::find(image_ids, image_id);
1183 return surface; 1173 if (vector_it == image_ids.end()) {
1184 } 1174 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_SHIFT);
1175 return;
1185 } 1176 }
1186 return {}; 1177 image_ids.erase(vector_it);
1187 } 1178 });
1179}
1188 1180
1189 /// Try to do an image copy logging when formats are incompatible. 1181template <class P>
1190 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { 1182void TextureCache<P>::TrackImage(ImageBase& image) {
1191 const SurfaceParams& src_params = src->GetSurfaceParams(); 1183 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 const SurfaceParams& dst_params = dst->GetSurfaceParams(); 1184 image.flags |= ImageFlagBits::Tracked;
1193 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { 1185 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1194 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", dst_params.pixel_format, 1186}
1195 src_params.pixel_format); 1187
1196 return; 1188template <class P>
1189void TextureCache<P>::UntrackImage(ImageBase& image) {
1190 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1191 image.flags &= ~ImageFlagBits::Tracked;
1192 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1193}
1194
1195template <class P>
1196void TextureCache<P>::DeleteImage(ImageId image_id) {
1197 ImageBase& image = slot_images[image_id];
1198 const GPUVAddr gpu_addr = image.gpu_addr;
1199 const auto alloc_it = image_allocs_table.find(gpu_addr);
1200 if (alloc_it == image_allocs_table.end()) {
1201 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1202 gpu_addr);
1203 return;
1204 }
1205 const ImageAllocId alloc_id = alloc_it->second;
1206 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1207 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1208 if (alloc_image_it == alloc_images.end()) {
1209 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1210 return;
1211 }
1212 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1213 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1214
1215 // Mark render targets as dirty
1216 auto& dirty = maxwell3d.dirty.flags;
1217 dirty[Dirty::RenderTargets] = true;
1218 dirty[Dirty::ZetaBuffer] = true;
1219 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1220 dirty[Dirty::ColorBuffer0 + rt] = true;
1221 }
1222 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1223 for (const ImageViewId image_view_id : image_view_ids) {
1224 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1225 if (render_targets.depth_buffer_id == image_view_id) {
1226 render_targets.depth_buffer_id = ImageViewId{};
1197 } 1227 }
1198 ImageCopy(src, dst, copy);
1199 } 1228 }
1229 RemoveImageViewReferences(image_view_ids);
1230 RemoveFramebuffers(image_view_ids);
1231
1232 for (const AliasedImage& alias : image.aliased_images) {
1233 ImageBase& other_image = slot_images[alias.id];
1234 [[maybe_unused]] const size_t num_removed_aliases =
1235 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1236 return other_alias.id == image_id;
1237 });
1238 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1239 num_removed_aliases);
1240 }
1241 for (const ImageViewId image_view_id : image_view_ids) {
1242 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1243 slot_image_views.erase(image_view_id);
1244 }
1245 sentenced_images.Push(std::move(slot_images[image_id]));
1246 slot_images.erase(image_id);
1200 1247
1201 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1248 alloc_images.erase(alloc_image_it);
1202 return siblings_table[static_cast<std::size_t>(format)]; 1249 if (alloc_images.empty()) {
1250 image_allocs_table.erase(alloc_it);
1203 } 1251 }
1252 if constexpr (ENABLE_VALIDATION) {
1253 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1254 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1255 }
1256 graphics_image_table.Invalidate();
1257 compute_image_table.Invalidate();
1258 has_deleted_images = true;
1259}
1204 1260
1205 /// Returns true the shader sampler entry is compatible with the TIC texture type. 1261template <class P>
1206 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, 1262void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1207 const VideoCommon::Shader::Sampler& entry) { 1263 auto it = image_views.begin();
1208 const auto shader_type = entry.type; 1264 while (it != image_views.end()) {
1209 switch (tic_type) { 1265 const auto found = std::ranges::find(removed_views, it->second);
1210 case Tegra::Texture::TextureType::Texture1D: 1266 if (found != removed_views.end()) {
1211 case Tegra::Texture::TextureType::Texture1DArray: 1267 it = image_views.erase(it);
1212 return shader_type == Tegra::Shader::TextureType::Texture1D; 1268 } else {
1213 case Tegra::Texture::TextureType::Texture1DBuffer: 1269 ++it;
1214 // TODO(Rodrigo): Assume as valid for now
1215 return true;
1216 case Tegra::Texture::TextureType::Texture2D:
1217 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1218 return shader_type == Tegra::Shader::TextureType::Texture2D;
1219 case Tegra::Texture::TextureType::Texture2DArray:
1220 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1221 shader_type == Tegra::Shader::TextureType::TextureCube;
1222 case Tegra::Texture::TextureType::Texture3D:
1223 return shader_type == Tegra::Shader::TextureType::Texture3D;
1224 case Tegra::Texture::TextureType::TextureCubeArray:
1225 case Tegra::Texture::TextureType::TextureCubemap:
1226 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1227 return true;
1228 }
1229 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array;
1230 } 1270 }
1231 UNREACHABLE();
1232 return true;
1233 } 1271 }
1272}
1234 1273
1235 struct FramebufferTargetInfo { 1274template <class P>
1236 TSurface target; 1275void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1237 TView view; 1276 auto it = framebuffers.begin();
1238 }; 1277 while (it != framebuffers.end()) {
1239 1278 if (it->first.Contains(removed_views)) {
1240 void AsyncFlushSurface(TSurface& surface) { 1279 it = framebuffers.erase(it);
1241 if (!uncommitted_flushes) { 1280 } else {
1242 uncommitted_flushes = std::make_shared<std::list<TSurface>>(); 1281 ++it;
1243 } 1282 }
1244 uncommitted_flushes->push_back(surface);
1245 } 1283 }
1284}
1246 1285
1247 VideoCore::RasterizerInterface& rasterizer; 1286template <class P>
1248 Tegra::Engines::Maxwell3D& maxwell3d; 1287void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1249 Tegra::MemoryManager& gpu_memory; 1288 image.flags |= ImageFlagBits::GpuModified;
1250 1289 image.modification_tick = ++modification_tick;
1251 FormatLookupTable format_lookup_table; 1290}
1252 FormatCompatibility format_compatibility;
1253
1254 u64 ticks{};
1255
1256 // Guards the cache for protection conflicts.
1257 bool guard_render_targets{};
1258 bool guard_samplers{};
1259
1260 // The siblings table is for formats that can inter exchange with one another
1261 // without causing issues. This is only valid when a conflict occurs on a non
1262 // rendering use.
1263 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
1264
1265 // The internal Cache is different for the Texture Cache. It's based on buckets
1266 // of 1MB. This fits better for the purpose of this cache as textures are normaly
1267 // large in size.
1268 static constexpr u64 registry_page_bits{20};
1269 static constexpr u64 registry_page_size{1 << registry_page_bits};
1270 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1271 1291
1272 static constexpr u32 DEPTH_RT = 8; 1292template <class P>
1273 static constexpr u32 NO_RT = 0xFFFFFFFF; 1293void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1294 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1295 ImageBase& image = slot_images[image_id];
1296 u64 most_recent_tick = image.modification_tick;
1297 for (const AliasedImage& aliased : image.aliased_images) {
1298 ImageBase& aliased_image = slot_images[aliased.id];
1299 if (image.modification_tick < aliased_image.modification_tick) {
1300 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1301 aliased_images.push_back(&aliased);
1302 }
1303 }
1304 if (aliased_images.empty()) {
1305 return;
1306 }
1307 image.modification_tick = most_recent_tick;
1308 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1309 const ImageBase& lhs_image = slot_images[lhs->id];
1310 const ImageBase& rhs_image = slot_images[rhs->id];
1311 return lhs_image.modification_tick < rhs_image.modification_tick;
1312 });
1313 for (const AliasedImage* const aliased : aliased_images) {
1314 CopyImage(image_id, aliased->id, aliased->copies);
1315 }
1316}
1274 1317
1275 // The L1 Cache is used for fast texture lookup before checking the overlaps 1318template <class P>
1276 // This avoids calculating size and other stuffs. 1319void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1277 std::unordered_map<VAddr, TSurface> l1_cache; 1320 Image& image = slot_images[image_id];
1321 if (invalidate) {
1322 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1323 if (False(image.flags & ImageFlagBits::Tracked)) {
1324 TrackImage(image);
1325 }
1326 } else {
1327 RefreshContents(image);
1328 SynchronizeAliases(image_id);
1329 }
1330 if (is_modification) {
1331 MarkModification(image);
1332 }
1333 image.frame_tick = frame_tick;
1334}
1278 1335
1279 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1336template <class P>
1280 /// previously been used. This is to prevent surfaces from being constantly created and 1337void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1281 /// destroyed when used with different surface parameters. 1338 bool invalidate) {
1282 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; 1339 if (!image_view_id) {
1283 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> 1340 return;
1284 render_targets; 1341 }
1285 FramebufferTargetInfo depth_buffer; 1342 const ImageViewBase& image_view = slot_image_views[image_view_id];
1343 PrepareImage(image_view.image_id, is_modification, invalidate);
1344}
1286 1345
1287 std::vector<TSurface> sampled_textures; 1346template <class P>
1347void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1348 Image& dst = slot_images[dst_id];
1349 Image& src = slot_images[src_id];
1350 const auto dst_format_type = GetFormatType(dst.info.format);
1351 const auto src_format_type = GetFormatType(src.info.format);
1352 if (src_format_type == dst_format_type) {
1353 if constexpr (HAS_EMULATED_COPIES) {
1354 if (!runtime.CanImageBeCopied(dst, src)) {
1355 return runtime.EmulateCopyImage(dst, src, copies);
1356 }
1357 }
1358 return runtime.CopyImage(dst, src, copies);
1359 }
1360 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1361 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1362 for (const ImageCopy& copy : copies) {
1363 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1364 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1365 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1366 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1367
1368 const SubresourceBase dst_base{
1369 .level = copy.dst_subresource.base_level,
1370 .layer = copy.dst_subresource.base_layer,
1371 };
1372 const SubresourceBase src_base{
1373 .level = copy.src_subresource.base_level,
1374 .layer = copy.src_subresource.base_layer,
1375 };
1376 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1377 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1378 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1379 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1380 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1381 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1382 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1383 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1384 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1385 ImageView& dst_view = slot_image_views[dst_view_id];
1386 ImageView& src_view = slot_image_views[src_view_id];
1387 [[maybe_unused]] const Extent3D expected_size{
1388 .width = std::min(dst_view.size.width, src_view.size.width),
1389 .height = std::min(dst_view.size.height, src_view.size.height),
1390 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1391 };
1392 UNIMPLEMENTED_IF(copy.extent != expected_size);
1288 1393
1289 /// This cache stores null surfaces in order to be used as a placeholder 1394 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1290 /// for invalid texture calls. 1395 }
1291 std::unordered_map<u32, TSurface> invalid_cache; 1396}
1292 std::vector<u8> invalid_memory;
1293 1397
1294 std::list<TSurface> marked_for_unregister; 1398template <class P>
1399void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1400 if (*old_id == new_id) {
1401 return;
1402 }
1403 if (*old_id) {
1404 const ImageViewBase& old_view = slot_image_views[*old_id];
1405 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1406 uncommitted_downloads.push_back(old_view.image_id);
1407 }
1408 }
1409 *old_id = new_id;
1410}
1295 1411
1296 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; 1412template <class P>
1297 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; 1413std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1414 ImageId image_id, const ImageViewInfo& view_info) {
1415 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1416 const ImageBase& image = slot_images[image_id];
1417 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1418 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1419 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1420 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1421 const u32 num_samples = image.info.num_samples;
1422 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1423 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1424 .color_buffer_ids = {color_view_id},
1425 .depth_buffer_id = depth_view_id,
1426 .size = {extent.width >> samples_x, extent.height >> samples_y},
1427 });
1428 return {framebuffer_id, view_id};
1429}
1298 1430
1299 StagingCache staging_cache; 1431template <class P>
1300 std::recursive_mutex mutex; 1432bool TextureCache<P>::IsFullClear(ImageViewId id) {
1301}; 1433 if (!id) {
1434 return true;
1435 }
1436 const ImageViewBase& image_view = slot_image_views[id];
1437 const ImageBase& image = slot_images[image_view.image_id];
1438 const Extent3D size = image_view.size;
1439 const auto& regs = maxwell3d.regs;
1440 const auto& scissor = regs.scissor_test[0];
1441 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1442 // Images with multiple resources can't be cleared in a single call
1443 return false;
1444 }
1445 if (regs.clear_flags.scissor == 0) {
1446 // If scissor testing is disabled, the clear is always full
1447 return true;
1448 }
1449 // Make sure the clear covers all texels in the subresource
1450 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1451 scissor.max_y >= size.height;
1452}
1302 1453
1303} // namespace VideoCommon 1454} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h
new file mode 100644
index 000000000..2ad2d72a6
--- /dev/null
+++ b/src/video_core/texture_cache/types.h
@@ -0,0 +1,140 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9#include "video_core/texture_cache/slot_vector.h"
10
11namespace VideoCommon {
12
13constexpr size_t NUM_RT = 8;
14constexpr size_t MAX_MIP_LEVELS = 14;
15
16constexpr SlotId CORRUPT_ID{0xfffffffe};
17
18using ImageId = SlotId;
19using ImageViewId = SlotId;
20using ImageAllocId = SlotId;
21using SamplerId = SlotId;
22using FramebufferId = SlotId;
23
24enum class ImageType : u32 {
25 e1D,
26 e2D,
27 e3D,
28 Linear,
29 Buffer,
30};
31
32enum class ImageViewType : u32 {
33 e1D,
34 e2D,
35 Cube,
36 e3D,
37 e1DArray,
38 e2DArray,
39 CubeArray,
40 Rect,
41 Buffer,
42};
43constexpr size_t NUM_IMAGE_VIEW_TYPES = 9;
44
45enum class RelaxedOptions : u32 {
46 Size = 1 << 0,
47 Format = 1 << 1,
48 Samples = 1 << 2,
49};
50DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions)
51
52struct Offset2D {
53 constexpr auto operator<=>(const Offset2D&) const noexcept = default;
54
55 s32 x;
56 s32 y;
57};
58
59struct Offset3D {
60 constexpr auto operator<=>(const Offset3D&) const noexcept = default;
61
62 s32 x;
63 s32 y;
64 s32 z;
65};
66
67struct Extent2D {
68 constexpr auto operator<=>(const Extent2D&) const noexcept = default;
69
70 u32 width;
71 u32 height;
72};
73
74struct Extent3D {
75 constexpr auto operator<=>(const Extent3D&) const noexcept = default;
76
77 u32 width;
78 u32 height;
79 u32 depth;
80};
81
82struct SubresourceLayers {
83 s32 base_level = 0;
84 s32 base_layer = 0;
85 s32 num_layers = 1;
86};
87
88struct SubresourceBase {
89 constexpr auto operator<=>(const SubresourceBase&) const noexcept = default;
90
91 s32 level = 0;
92 s32 layer = 0;
93};
94
95struct SubresourceExtent {
96 constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default;
97
98 s32 levels = 1;
99 s32 layers = 1;
100};
101
102struct SubresourceRange {
103 constexpr auto operator<=>(const SubresourceRange&) const noexcept = default;
104
105 SubresourceBase base;
106 SubresourceExtent extent;
107};
108
109struct ImageCopy {
110 SubresourceLayers src_subresource;
111 SubresourceLayers dst_subresource;
112 Offset3D src_offset;
113 Offset3D dst_offset;
114 Extent3D extent;
115};
116
117struct BufferImageCopy {
118 size_t buffer_offset;
119 size_t buffer_size;
120 u32 buffer_row_length;
121 u32 buffer_image_height;
122 SubresourceLayers image_subresource;
123 Offset3D image_offset;
124 Extent3D image_extent;
125};
126
127struct BufferCopy {
128 size_t src_offset;
129 size_t dst_offset;
130 size_t size;
131};
132
133struct SwizzleParameters {
134 Extent3D num_tiles;
135 Extent3D block;
136 size_t buffer_offset;
137 s32 level;
138};
139
140} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
new file mode 100644
index 000000000..9ed1fc007
--- /dev/null
+++ b/src/video_core/texture_cache/util.cpp
@@ -0,0 +1,1232 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// This files contains code from Ryujinx
6// A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx
7// The sections using code from Ryujinx are marked with a link to the original version
8
9// MIT License
10//
11// Copyright (c) Ryujinx Team and Contributors
12//
13// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
14// associated documentation files (the "Software"), to deal in the Software without restriction,
15// including without limitation the rights to use, copy, modify, merge, publish, distribute,
16// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
17// furnished to do so, subject to the following conditions:
18//
19// The above copyright notice and this permission notice shall be included in all copies or
20// substantial portions of the Software.
21//
22// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
23// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
25// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27//
28
29#include <algorithm>
30#include <array>
31#include <numeric>
32#include <optional>
33#include <span>
34#include <vector>
35
36#include "common/alignment.h"
37#include "common/assert.h"
38#include "common/bit_util.h"
39#include "common/common_types.h"
40#include "common/div_ceil.h"
41#include "video_core/compatible_formats.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/surface.h"
45#include "video_core/texture_cache/decode_bc4.h"
46#include "video_core/texture_cache/format_lookup_table.h"
47#include "video_core/texture_cache/formatter.h"
48#include "video_core/texture_cache/samples_helper.h"
49#include "video_core/texture_cache/util.h"
50#include "video_core/textures/astc.h"
51#include "video_core/textures/decoders.h"
52
53namespace VideoCommon {
54
55namespace {
56
57using Tegra::Texture::GOB_SIZE;
58using Tegra::Texture::GOB_SIZE_SHIFT;
59using Tegra::Texture::GOB_SIZE_X;
60using Tegra::Texture::GOB_SIZE_X_SHIFT;
61using Tegra::Texture::GOB_SIZE_Y;
62using Tegra::Texture::GOB_SIZE_Y_SHIFT;
63using Tegra::Texture::GOB_SIZE_Z;
64using Tegra::Texture::GOB_SIZE_Z_SHIFT;
65using Tegra::Texture::MsaaMode;
66using Tegra::Texture::SwizzleTexture;
67using Tegra::Texture::TextureFormat;
68using Tegra::Texture::TextureType;
69using Tegra::Texture::TICEntry;
70using Tegra::Texture::UnswizzleTexture;
71using VideoCore::Surface::BytesPerBlock;
72using VideoCore::Surface::DefaultBlockHeight;
73using VideoCore::Surface::DefaultBlockWidth;
74using VideoCore::Surface::IsCopyCompatible;
75using VideoCore::Surface::IsPixelFormatASTC;
76using VideoCore::Surface::IsViewCompatible;
77using VideoCore::Surface::PixelFormatFromDepthFormat;
78using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
79using VideoCore::Surface::SurfaceType;
80
81constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
82
83struct LevelInfo {
84 Extent3D size;
85 Extent3D block;
86 Extent2D tile_size;
87 u32 bpp_log2;
88 u32 tile_width_spacing;
89};
90
91[[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) {
92 if (shift == 0) {
93 return 0;
94 }
95 u32 x = unit_factor << (shift - 1);
96 if (x >= dimension) {
97 while (--shift) {
98 x >>= 1;
99 if (x < dimension) {
100 break;
101 }
102 }
103 }
104 return shift;
105}
106
107[[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) {
108 return std::max<u32>(size >> level, 1);
109}
110
111[[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) {
112 return Extent3D{
113 .width = AdjustMipSize(size.width, level),
114 .height = AdjustMipSize(size.height, level),
115 .depth = AdjustMipSize(size.depth, level),
116 };
117}
118
119[[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) {
120 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
121 return Extent3D{
122 .width = size.width >> samples_x,
123 .height = size.height >> samples_y,
124 .depth = size.depth,
125 };
126}
127
128template <u32 GOB_EXTENT>
129[[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) {
130 do {
131 while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) {
132 --block_size;
133 }
134 } while (level--);
135 return block_size;
136}
137
138[[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size,
139 u32 level) {
140 return {
141 .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level),
142 .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level),
143 .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level),
144 };
145}
146
147[[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) {
148 return {
149 .width = Common::DivCeil(size.width, tile_size.width),
150 .height = Common::DivCeil(size.height, tile_size.height),
151 .depth = size.depth,
152 };
153}
154
155[[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) {
156 return std::countl_zero(bytes_per_block) ^ 0x1F;
157}
158
159[[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) {
160 return BytesPerBlockLog2(BytesPerBlock(format));
161}
162
163[[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) {
164 const Extent3D num_blocks = AdjustTileSize(size, tile_size);
165 return num_blocks.width * num_blocks.height * num_blocks.depth;
166}
167
168[[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) {
169 return Common::DivCeil(AdjustMipSize(size, level), block_size);
170}
171
172[[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) {
173 return config.Width() * config.Height() * BytesPerBlock(format);
174}
175
176[[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) {
177 switch (type) {
178 case TextureType::Texture2D:
179 case TextureType::Texture2DArray:
180 case TextureType::Texture2DNoMipmap:
181 case TextureType::Texture3D:
182 case TextureType::TextureCubeArray:
183 case TextureType::TextureCubemap:
184 return true;
185 case TextureType::Texture1D:
186 case TextureType::Texture1DArray:
187 case TextureType::Texture1DBuffer:
188 return false;
189 }
190 return false;
191}
192
193[[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) {
194 switch (type) {
195 case ImageType::e2D:
196 case ImageType::e3D:
197 case ImageType::Linear:
198 return true;
199 case ImageType::e1D:
200 case ImageType::Buffer:
201 return false;
202 }
203 UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type));
204}
205
206[[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) {
207 switch (num_samples) {
208 case 1:
209 return {1, 1};
210 case 2:
211 return {2, 1};
212 case 4:
213 return {2, 2};
214 case 8:
215 return {4, 2};
216 case 16:
217 return {4, 4};
218 }
219 UNREACHABLE_MSG("Invalid number of samples={}", num_samples);
220 return {1, 1};
221}
222
223[[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) {
224 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
225}
226
227[[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) {
228 return Extent3D{
229 .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2,
230 .height = AdjustSize(info.size.height, level, info.tile_size.height),
231 .depth = AdjustMipSize(info.size.depth, level),
232 };
233}
234
235[[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) {
236 const Extent3D blocks = NumLevelBlocks(info, level);
237 return Extent3D{
238 .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width),
239 .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height),
240 .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth),
241 };
242}
243
244[[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) {
245 return Extent2D{
246 .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing,
247 .height = GOB_SIZE_Y_SHIFT + block_height,
248 };
249}
250
251[[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob,
252 u32 block_depth) {
253 return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) ||
254 num_tiles.depth < (1U << block_depth);
255}
256
257[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob,
258 u32 bpp_log2) {
259 if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) {
260 return GOB_SIZE_X_SHIFT - bpp_log2;
261 } else {
262 return gob.width;
263 }
264}
265
266[[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2,
267 u32 tile_width_spacing) {
268 const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing);
269 return StrideAlignment(num_tiles, block, gob, bpp_log2);
270}
271
272[[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) {
273 const Extent3D blocks = NumLevelBlocks(info, level);
274 const Extent2D gobs{
275 .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT),
276 .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT),
277 };
278 const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing);
279 const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth);
280 const u32 alignment = is_small ? 0 : info.tile_width_spacing;
281 return Extent2D{
282 .width = Common::AlignBits(gobs.width, alignment),
283 .height = gobs.height,
284 };
285}
286
287[[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) {
288 const Extent3D blocks = NumLevelBlocks(info, level);
289 const Extent3D tile_shift = TileShift(info, level);
290 const Extent2D gobs = NumGobs(info, level);
291 return Extent3D{
292 .width = Common::DivCeilLog2(gobs.width, tile_shift.width),
293 .height = Common::DivCeilLog2(gobs.height, tile_shift.height),
294 .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth),
295 };
296}
297
298[[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) {
299 const Extent3D tile_shift = TileShift(info, level);
300 const Extent3D tiles = LevelTiles(info, level);
301 const u32 num_tiles = tiles.width * tiles.height * tiles.depth;
302 const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth;
303 return num_tiles << shift;
304}
305
306[[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info,
307 u32 num_levels) {
308 ASSERT(num_levels <= MAX_MIP_LEVELS);
309 std::array<u32, MAX_MIP_LEVELS> sizes{};
310 for (u32 level = 0; level < num_levels; ++level) {
311 sizes[level] = CalculateLevelSize(info, level);
312 }
313 return sizes;
314}
315
316[[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block,
317 u32 num_samples, u32 tile_width_spacing) {
318 const auto [samples_x, samples_y] = Samples(num_samples);
319 const u32 bytes_per_block = BytesPerBlock(format);
320 return {
321 .size =
322 {
323 .width = size.width * samples_x,
324 .height = size.height * samples_y,
325 .depth = size.depth,
326 },
327 .block = block,
328 .tile_size = DefaultBlockSize(format),
329 .bpp_log2 = BytesPerBlockLog2(bytes_per_block),
330 .tile_width_spacing = tile_width_spacing,
331 };
332}
333
334[[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) {
335 return MakeLevelInfo(info.format, info.size, info.block, info.num_samples,
336 info.tile_width_spacing);
337}
338
339[[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block,
340 u32 num_samples, u32 tile_width_spacing,
341 u32 level) {
342 const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing);
343 u32 offset = 0;
344 for (u32 current_level = 0; current_level < level; ++current_level) {
345 offset += CalculateLevelSize(info, current_level);
346 }
347 return offset;
348}
349
350[[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block,
351 u32 tile_size_y, u32 tile_width_spacing) {
352 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134
353 if (tile_width_spacing > 0) {
354 const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth;
355 return Common::AlignBits(size_bytes, alignment_log2);
356 }
357 const u32 aligned_height = Common::AlignUp(size.height, tile_size_y);
358 while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) {
359 --block.height;
360 }
361 while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) {
362 --block.depth;
363 }
364 const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth;
365 const u32 num_blocks = size_bytes >> block_shift;
366 if (size_bytes != num_blocks << block_shift) {
367 return (num_blocks + 1) << block_shift;
368 }
369 return size_bytes;
370}
371
372[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info,
373 const ImageBase& overlap,
374 bool strict_size) {
375 const ImageInfo& info = overlap.info;
376 if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) {
377 return std::nullopt;
378 }
379 if (new_info.block != info.block) {
380 return std::nullopt;
381 }
382 const SubresourceExtent resources = new_info.resources;
383 return SubresourceExtent{
384 .levels = std::max(resources.levels, info.resources.levels),
385 .layers = std::max(resources.layers, info.resources.layers),
386 };
387}
388
389[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
390 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
391 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info);
392 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
393 const auto it = std::ranges::find(slice_offsets, diff);
394 if (it == slice_offsets.end()) {
395 return std::nullopt;
396 }
397 const std::vector subresources = CalculateSliceSubresources(new_info);
398 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
399 const ImageInfo& info = overlap.info;
400 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
401 return std::nullopt;
402 }
403 const u32 mip_depth = std::max(1U, new_info.size.depth << base.level);
404 if (mip_depth < info.size.depth + base.layer) {
405 return std::nullopt;
406 }
407 if (MipBlockSize(new_info, base.level) != info.block) {
408 return std::nullopt;
409 }
410 return SubresourceExtent{
411 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
412 .layers = 1,
413 };
414}
415
416[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D(
417 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
418 const u32 layer_stride = new_info.layer_stride;
419 const s32 new_size = layer_stride * new_info.resources.layers;
420 const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr);
421 if (diff > new_size) {
422 return std::nullopt;
423 }
424 const s32 base_layer = diff / layer_stride;
425 const s32 mip_offset = diff % layer_stride;
426 const std::array offsets = CalculateMipLevelOffsets(new_info);
427 const auto end = offsets.begin() + new_info.resources.levels;
428 const auto it = std::find(offsets.begin(), end, mip_offset);
429 if (it == end) {
430 // Mipmap is not aligned to any valid size
431 return std::nullopt;
432 }
433 const SubresourceBase base{
434 .level = static_cast<s32>(std::distance(offsets.begin(), it)),
435 .layer = base_layer,
436 };
437 const ImageInfo& info = overlap.info;
438 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
439 return std::nullopt;
440 }
441 if (MipBlockSize(new_info, base.level) != info.block) {
442 return std::nullopt;
443 }
444 return SubresourceExtent{
445 .levels = std::max(new_info.resources.levels, info.resources.levels + base.level),
446 .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer),
447 };
448}
449
450[[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info,
451 GPUVAddr gpu_addr,
452 VAddr cpu_addr,
453 const ImageBase& overlap,
454 bool strict_size) {
455 std::optional<SubresourceExtent> resources;
456 if (new_info.type != ImageType::e3D) {
457 resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size);
458 } else {
459 resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size);
460 }
461 if (!resources) {
462 return std::nullopt;
463 }
464 return OverlapResult{
465 .gpu_addr = gpu_addr,
466 .cpu_addr = cpu_addr,
467 .resources = *resources,
468 };
469}
470
471[[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info,
472 GPUVAddr gpu_addr,
473 VAddr cpu_addr,
474 const ImageBase& overlap,
475 bool strict_size) {
476 const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr);
477 if (!base) {
478 return std::nullopt;
479 }
480 const ImageInfo& info = overlap.info;
481 if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) {
482 return std::nullopt;
483 }
484 if (new_info.block != MipBlockSize(info, base->level)) {
485 return std::nullopt;
486 }
487 const SubresourceExtent resources = new_info.resources;
488 s32 layers = 1;
489 if (info.type != ImageType::e3D) {
490 layers = std::max(resources.layers, info.resources.layers + base->layer);
491 }
492 return OverlapResult{
493 .gpu_addr = overlap.gpu_addr,
494 .cpu_addr = overlap.cpu_addr,
495 .resources =
496 {
497 .levels = std::max(resources.levels + base->level, info.resources.levels),
498 .layers = layers,
499 },
500 };
501}
502
503[[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) {
504 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212
505 static constexpr u32 STRIDE_ALIGNMENT = 32;
506 ASSERT(info.type == ImageType::Linear);
507 const Extent2D num_tiles{
508 .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)),
509 .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)),
510 };
511 const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format);
512 return Extent2D{
513 .width = Common::AlignUp(num_tiles.width, width_alignment),
514 .height = num_tiles.height,
515 };
516}
517
518[[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) {
519 // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176
520 ASSERT(info.type != ImageType::Linear);
521 const Extent3D size = AdjustMipSize(info.size, level);
522 const Extent3D num_tiles{
523 .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)),
524 .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)),
525 .depth = size.depth,
526 };
527 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
528 const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing);
529 const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0);
530 return Extent3D{
531 .width = Common::AlignBits(num_tiles.width, alignment),
532 .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height),
533 .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth),
534 };
535}
536
537[[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept {
538 u32 num_blocks = 0;
539 for (s32 level = 0; level < info.resources.levels; ++level) {
540 const Extent3D mip_size = AdjustMipSize(info.size, level);
541 num_blocks += NumBlocks(mip_size, tile_size);
542 }
543 return num_blocks;
544}
545
546[[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept {
547 ASSERT(info.type == ImageType::e3D);
548 u32 num_slices = 0;
549 for (s32 level = 0; level < info.resources.levels; ++level) {
550 num_slices += AdjustMipSize(info.size.depth, level);
551 }
552 return num_slices;
553}
554
555void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
556 const ImageInfo& info, const BufferImageCopy& copy,
557 std::span<const u8> memory) {
558 ASSERT(copy.image_offset.z == 0);
559 ASSERT(copy.image_extent.depth == 1);
560 ASSERT(copy.image_subresource.base_level == 0);
561 ASSERT(copy.image_subresource.base_layer == 0);
562 ASSERT(copy.image_subresource.num_layers == 1);
563
564 const u32 bytes_per_block = BytesPerBlock(info.format);
565 const u32 row_length = copy.image_extent.width * bytes_per_block;
566 const u32 guest_offset_x = copy.image_offset.x * bytes_per_block;
567
568 for (u32 line = 0; line < copy.image_extent.height; ++line) {
569 const u32 host_offset_y = line * info.pitch;
570 const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch;
571 const u32 guest_offset = guest_offset_x + guest_offset_y;
572 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y,
573 row_length);
574 }
575}
576
577void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
578 const ImageInfo& info, const BufferImageCopy& copy,
579 std::span<const u8> input) {
580 const Extent3D size = info.size;
581 const LevelInfo level_info = MakeLevelInfo(info);
582 const Extent2D tile_size = DefaultBlockSize(info.format);
583 const u32 bytes_per_block = BytesPerBlock(info.format);
584
585 const s32 level = copy.image_subresource.base_level;
586 const Extent3D level_size = AdjustMipSize(size, level);
587 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
588 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
589
590 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
591
592 UNIMPLEMENTED_IF(copy.image_offset.x != 0);
593 UNIMPLEMENTED_IF(copy.image_offset.y != 0);
594 UNIMPLEMENTED_IF(copy.image_offset.z != 0);
595 UNIMPLEMENTED_IF(copy.image_extent != level_size);
596
597 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
598 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
599
600 size_t host_offset = copy.buffer_offset;
601
602 const u32 num_levels = info.resources.levels;
603 const std::array sizes = CalculateLevelSizes(level_info, num_levels);
604 size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0);
605 const size_t layer_stride =
606 AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size,
607 level_info.block, tile_size.height, info.tile_width_spacing);
608 const size_t subresource_size = sizes[level];
609
610 const auto dst_data = std::make_unique<u8[]>(subresource_size);
611 const std::span<u8> dst(dst_data.get(), subresource_size);
612
613 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
614 const std::span<const u8> src = input.subspan(host_offset);
615 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
616 num_tiles.depth, block.height, block.depth);
617
618 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
619
620 host_offset += host_bytes_per_layer;
621 guest_offset += layer_stride;
622 }
623 ASSERT(host_offset - copy.buffer_offset == copy.buffer_size);
624}
625
626} // Anonymous namespace
627
628u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept {
629 if (info.type == ImageType::Buffer) {
630 return info.size.width * BytesPerBlock(info.format);
631 }
632 if (info.type == ImageType::Linear) {
633 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
634 }
635 if (info.resources.layers > 1) {
636 ASSERT(info.layer_stride != 0);
637 return info.layer_stride * info.resources.layers;
638 } else {
639 return CalculateLayerSize(info);
640 }
641}
642
643u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept {
644 if (info.type == ImageType::Buffer) {
645 return info.size.width * BytesPerBlock(info.format);
646 }
647 if (info.num_samples > 1) {
648 // Multisample images can't be uploaded or downloaded to the host
649 return 0;
650 }
651 if (info.type == ImageType::Linear) {
652 return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format));
653 }
654 const Extent2D tile_size = DefaultBlockSize(info.format);
655 return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format);
656}
657
658u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
659 if (info.type == ImageType::Buffer) {
660 return info.size.width * BytesPerBlock(info.format);
661 }
662 static constexpr Extent2D TILE_SIZE{1, 1};
663 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
664}
665
666u32 CalculateLayerStride(const ImageInfo& info) noexcept {
667 ASSERT(info.type != ImageType::Linear);
668 const u32 layer_size = CalculateLayerSize(info);
669 const Extent3D size = info.size;
670 const Extent3D block = info.block;
671 const u32 tile_size_y = DefaultBlockHeight(info.format);
672 return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing);
673}
674
675u32 CalculateLayerSize(const ImageInfo& info) noexcept {
676 ASSERT(info.type != ImageType::Linear);
677 return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples,
678 info.tile_width_spacing, info.resources.levels);
679}
680
681std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
682 ASSERT(info.resources.levels <= MAX_MIP_LEVELS);
683 const LevelInfo level_info = MakeLevelInfo(info);
684 std::array<u32, MAX_MIP_LEVELS> offsets{};
685 u32 offset = 0;
686 for (s32 level = 0; level < info.resources.levels; ++level) {
687 offsets[level] = offset;
688 offset += CalculateLevelSize(level_info, level);
689 }
690 return offsets;
691}
692
693std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
694 ASSERT(info.type == ImageType::e3D);
695 std::vector<u32> offsets;
696 offsets.reserve(NumSlices(info));
697
698 const LevelInfo level_info = MakeLevelInfo(info);
699 u32 mip_offset = 0;
700 for (s32 level = 0; level < info.resources.levels; ++level) {
701 const Extent3D tile_shift = TileShift(level_info, level);
702 const Extent3D tiles = LevelTiles(level_info, level);
703 const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT;
704 const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift;
705 const u32 z_mask = (1U << tile_shift.depth) - 1;
706 const u32 depth = AdjustMipSize(info.size.depth, level);
707 for (u32 slice = 0; slice < depth; ++slice) {
708 const u32 z_low = slice & z_mask;
709 const u32 z_high = slice & ~z_mask;
710 offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size));
711 }
712 mip_offset += CalculateLevelSize(level_info, level);
713 }
714 return offsets;
715}
716
717std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) {
718 ASSERT(info.type == ImageType::e3D);
719 std::vector<SubresourceBase> subresources;
720 subresources.reserve(NumSlices(info));
721 for (s32 level = 0; level < info.resources.levels; ++level) {
722 const s32 depth = AdjustMipSize(info.size.depth, level);
723 for (s32 slice = 0; slice < depth; ++slice) {
724 subresources.emplace_back(SubresourceBase{
725 .level = level,
726 .layer = slice,
727 });
728 }
729 }
730 return subresources;
731}
732
733u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) {
734 const Extent2D tile_size = DefaultBlockSize(info.format);
735 const Extent3D level_size = AdjustMipSize(info.size, level);
736 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
737 const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level);
738 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
739 return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing);
740}
741
742PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept {
743 return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
744 config.a_type, config.srgb_conversion);
745}
746
747ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
748 switch (info.type) {
749 case ImageType::e2D:
750 return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D;
751 case ImageType::e3D:
752 return ImageViewType::e2DArray;
753 case ImageType::Linear:
754 return ImageViewType::e2D;
755 default:
756 UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type));
757 return ImageViewType{};
758 }
759}
760
761std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src,
762 SubresourceBase base) {
763 ASSERT(dst.resources.levels >= src.resources.levels);
764 ASSERT(dst.num_samples == src.num_samples);
765
766 const bool is_dst_3d = dst.type == ImageType::e3D;
767 if (is_dst_3d) {
768 ASSERT(src.type == ImageType::e3D);
769 ASSERT(src.resources.levels == 1);
770 }
771
772 std::vector<ImageCopy> copies;
773 copies.reserve(src.resources.levels);
774 for (s32 level = 0; level < src.resources.levels; ++level) {
775 ImageCopy& copy = copies.emplace_back();
776 copy.src_subresource = SubresourceLayers{
777 .base_level = level,
778 .base_layer = 0,
779 .num_layers = src.resources.layers,
780 };
781 copy.dst_subresource = SubresourceLayers{
782 .base_level = base.level + level,
783 .base_layer = is_dst_3d ? 0 : base.layer,
784 .num_layers = is_dst_3d ? 1 : src.resources.layers,
785 };
786 copy.src_offset = Offset3D{
787 .x = 0,
788 .y = 0,
789 .z = 0,
790 };
791 copy.dst_offset = Offset3D{
792 .x = 0,
793 .y = 0,
794 .z = is_dst_3d ? base.layer : 0,
795 };
796 const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level);
797 copy.extent = AdjustSamplesSize(mip_size, dst.num_samples);
798 if (is_dst_3d) {
799 copy.extent.depth = src.size.depth;
800 }
801 }
802 return copies;
803}
804
805bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
806 if (config.Address() == 0) {
807 return false;
808 }
809 if (config.Address() > (u64(1) << 48)) {
810 return false;
811 }
812 return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
813}
814
815std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
816 const ImageInfo& info, std::span<u8> output) {
817 const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
818 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
819 const Extent3D size = info.size;
820
821 if (info.type == ImageType::Linear) {
822 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
823
824 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
825 return {{
826 .buffer_offset = 0,
827 .buffer_size = guest_size_bytes,
828 .buffer_row_length = info.pitch >> bpp_log2,
829 .buffer_image_height = size.height,
830 .image_subresource =
831 {
832 .base_level = 0,
833 .base_layer = 0,
834 .num_layers = 1,
835 },
836 .image_offset = {0, 0, 0},
837 .image_extent = size,
838 }};
839 }
840 const auto input_data = std::make_unique<u8[]>(guest_size_bytes);
841 gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes);
842 const std::span<const u8> input(input_data.get(), guest_size_bytes);
843
844 const LevelInfo level_info = MakeLevelInfo(info);
845 const s32 num_layers = info.resources.layers;
846 const s32 num_levels = info.resources.levels;
847 const Extent2D tile_size = DefaultBlockSize(info.format);
848 const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
849 const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
850 const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0);
851 const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height,
852 info.tile_width_spacing);
853 size_t guest_offset = 0;
854 u32 host_offset = 0;
855 std::vector<BufferImageCopy> copies(num_levels);
856
857 for (s32 level = 0; level < num_levels; ++level) {
858 const Extent3D level_size = AdjustMipSize(size, level);
859 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
860 const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2;
861 copies[level] = BufferImageCopy{
862 .buffer_offset = host_offset,
863 .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers,
864 .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width),
865 .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height),
866 .image_subresource =
867 {
868 .base_level = level,
869 .base_layer = 0,
870 .num_layers = info.resources.layers,
871 },
872 .image_offset = {0, 0, 0},
873 .image_extent = level_size,
874 };
875 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
876 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
877 const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2);
878 size_t guest_layer_offset = 0;
879
880 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
881 const std::span<u8> dst = output.subspan(host_offset);
882 const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset);
883 UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height,
884 num_tiles.depth, block.height, block.depth, stride_alignment);
885 guest_layer_offset += layer_stride;
886 host_offset += host_bytes_per_layer;
887 }
888 guest_offset += level_sizes[level];
889 }
890 return copies;
891}
892
893BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
894 const ImageBase& image, std::span<u8> output) {
895 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
896 return BufferCopy{
897 .src_offset = 0,
898 .dst_offset = 0,
899 .size = image.guest_size_bytes,
900 };
901}
902
903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
904 std::span<BufferImageCopy> copies) {
905 u32 output_offset = 0;
906
907 const Extent2D tile_size = DefaultBlockSize(info.format);
908 for (BufferImageCopy& copy : copies) {
909 const u32 level = copy.image_subresource.base_level;
910 const Extent3D mip_size = AdjustMipSize(info.size, level);
911 ASSERT(copy.image_offset == Offset3D{});
912 ASSERT(copy.image_subresource.base_layer == 0);
913 ASSERT(copy.image_extent == mip_size);
914 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
915 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
916
917 if (IsPixelFormatASTC(info.format)) {
918 ASSERT(copy.image_extent.depth == 1);
919 Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset),
920 copy.image_extent.width, copy.image_extent.height,
921 copy.image_subresource.num_layers, tile_size.width,
922 tile_size.height, output.subspan(output_offset));
923 } else {
924 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent,
925 output.subspan(output_offset));
926 }
927 copy.buffer_offset = output_offset;
928 copy.buffer_row_length = mip_size.width;
929 copy.buffer_image_height = mip_size.height;
930
931 output_offset += copy.image_extent.width * copy.image_extent.height *
932 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
933 }
934}
935
936std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
937 const Extent3D size = info.size;
938 const u32 bytes_per_block = BytesPerBlock(info.format);
939 if (info.type == ImageType::Linear) {
940 ASSERT(info.pitch % bytes_per_block == 0);
941 return {{
942 .buffer_offset = 0,
943 .buffer_size = static_cast<size_t>(info.pitch) * size.height,
944 .buffer_row_length = info.pitch / bytes_per_block,
945 .buffer_image_height = size.height,
946 .image_subresource =
947 {
948 .base_level = 0,
949 .base_layer = 0,
950 .num_layers = 1,
951 },
952 .image_offset = {0, 0, 0},
953 .image_extent = size,
954 }};
955 }
956 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
957
958 const s32 num_layers = info.resources.layers;
959 const s32 num_levels = info.resources.levels;
960 const Extent2D tile_size = DefaultBlockSize(info.format);
961
962 u32 host_offset = 0;
963
964 std::vector<BufferImageCopy> copies(num_levels);
965 for (s32 level = 0; level < num_levels; ++level) {
966 const Extent3D level_size = AdjustMipSize(size, level);
967 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
968 const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers;
969 copies[level] = BufferImageCopy{
970 .buffer_offset = host_offset,
971 .buffer_size = host_bytes_per_level,
972 .buffer_row_length = level_size.width,
973 .buffer_image_height = level_size.height,
974 .image_subresource =
975 {
976 .base_level = level,
977 .base_layer = 0,
978 .num_layers = info.resources.layers,
979 },
980 .image_offset = {0, 0, 0},
981 .image_extent = level_size,
982 };
983 host_offset += host_bytes_per_level;
984 }
985 return copies;
986}
987
988Extent3D MipSize(Extent3D size, u32 level) {
989 return AdjustMipSize(size, level);
990}
991
992Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
993 const LevelInfo level_info = MakeLevelInfo(info);
994 const Extent2D tile_size = DefaultBlockSize(info.format);
995 const Extent3D level_size = AdjustMipSize(info.size, level);
996 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
997 return AdjustMipBlockSize(num_tiles, level_info.block, level);
998}
999
1000std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1001 const Extent2D tile_size = DefaultBlockSize(info.format);
1002 if (info.type == ImageType::Linear) {
1003 return std::vector{SwizzleParameters{
1004 .num_tiles = AdjustTileSize(info.size, tile_size),
1005 .block = {},
1006 .buffer_offset = 0,
1007 .level = 0,
1008 }};
1009 }
1010 const LevelInfo level_info = MakeLevelInfo(info);
1011 const Extent3D size = info.size;
1012 const s32 num_levels = info.resources.levels;
1013
1014 u32 guest_offset = 0;
1015 std::vector<SwizzleParameters> params(num_levels);
1016 for (s32 level = 0; level < num_levels; ++level) {
1017 const Extent3D level_size = AdjustMipSize(size, level);
1018 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
1019 const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level);
1020 params[level] = SwizzleParameters{
1021 .num_tiles = num_tiles,
1022 .block = block,
1023 .buffer_offset = guest_offset,
1024 .level = level,
1025 };
1026 guest_offset += CalculateLevelSize(level_info, level);
1027 }
1028 return params;
1029}
1030
1031void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
1032 std::span<const BufferImageCopy> copies, std::span<const u8> memory) {
1033 const bool is_pitch_linear = info.type == ImageType::Linear;
1034 for (const BufferImageCopy& copy : copies) {
1035 if (is_pitch_linear) {
1036 SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1037 } else {
1038 SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory);
1039 }
1040 }
1041}
1042
1043bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level,
1044 u32 rhs_level, bool strict_size) noexcept {
1045 ASSERT(lhs.type != ImageType::Linear);
1046 ASSERT(rhs.type != ImageType::Linear);
1047 if (strict_size) {
1048 const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level);
1049 const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level);
1050 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1051 } else {
1052 const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level);
1053 const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level);
1054 return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height;
1055 }
1056}
1057
1058bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept {
1059 ASSERT(lhs.type == ImageType::Linear);
1060 ASSERT(rhs.type == ImageType::Linear);
1061 if (strict_size) {
1062 return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height;
1063 } else {
1064 const Extent2D lhs_size = PitchLinearAlignedSize(lhs);
1065 const Extent2D rhs_size = PitchLinearAlignedSize(rhs);
1066 return lhs_size == rhs_size;
1067 }
1068}
1069
1070std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
1071 VAddr cpu_addr, const ImageBase& overlap,
1072 bool strict_size) {
1073 ASSERT(new_info.type != ImageType::Linear);
1074 ASSERT(overlap.info.type != ImageType::Linear);
1075 if (!IsLayerStrideCompatible(new_info, overlap.info)) {
1076 return std::nullopt;
1077 }
1078 if (!IsViewCompatible(overlap.info.format, new_info.format)) {
1079 return std::nullopt;
1080 }
1081 if (gpu_addr == overlap.gpu_addr) {
1082 const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size);
1083 if (!solution) {
1084 return std::nullopt;
1085 }
1086 return OverlapResult{
1087 .gpu_addr = gpu_addr,
1088 .cpu_addr = cpu_addr,
1089 .resources = *solution,
1090 };
1091 }
1092 if (overlap.gpu_addr > gpu_addr) {
1093 return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1094 }
1095 // if overlap.gpu_addr < gpu_addr
1096 return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size);
1097}
1098
1099bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
1100 // If either of the layer strides is zero, we can assume they are compatible
1101 // These images generally come from rendertargets
1102 if (lhs.layer_stride == 0) {
1103 return true;
1104 }
1105 if (rhs.layer_stride == 0) {
1106 return true;
1107 }
1108 // It's definitely compatible if the layer stride matches
1109 if (lhs.layer_stride == rhs.layer_stride) {
1110 return true;
1111 }
1112 // Although we also have to compare for cases where it can be unaligned
1113 // This can happen if the image doesn't have layers, so the stride is not aligned
1114 if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) {
1115 return true;
1116 }
1117 return false;
1118}
1119
1120std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
1121 GPUVAddr candidate_addr, RelaxedOptions options) {
1122 const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
1123 if (!base) {
1124 return std::nullopt;
1125 }
1126 const ImageInfo& existing = image.info;
1127 if (False(options & RelaxedOptions::Format)) {
1128 if (!IsViewCompatible(existing.format, candidate.format)) {
1129 return std::nullopt;
1130 }
1131 }
1132 if (!IsLayerStrideCompatible(existing, candidate)) {
1133 return std::nullopt;
1134 }
1135 if (existing.type != candidate.type) {
1136 return std::nullopt;
1137 }
1138 if (False(options & RelaxedOptions::Samples)) {
1139 if (existing.num_samples != candidate.num_samples) {
1140 return std::nullopt;
1141 }
1142 }
1143 if (existing.resources.levels < candidate.resources.levels + base->level) {
1144 return std::nullopt;
1145 }
1146 if (existing.type == ImageType::e3D) {
1147 const u32 mip_depth = std::max(1U, existing.size.depth << base->level);
1148 if (mip_depth < candidate.size.depth + base->layer) {
1149 return std::nullopt;
1150 }
1151 } else {
1152 if (existing.resources.layers < candidate.resources.layers + base->layer) {
1153 return std::nullopt;
1154 }
1155 }
1156 const bool strict_size = False(options & RelaxedOptions::Size);
1157 if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) {
1158 return std::nullopt;
1159 }
1160 // TODO: compare block sizes
1161 return base;
1162}
1163
1164bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
1165 RelaxedOptions options) {
1166 return FindSubresource(candidate, image, candidate_addr, options).has_value();
1167}
1168
1169void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
1170 const ImageBase* src) {
1171 if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1172 src_info.format = src->info.format;
1173 }
1174 if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1175 dst_info.format = dst->info.format;
1176 }
1177 if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) {
1178 dst_info.format = src->info.format;
1179 }
1180 if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) {
1181 src_info.format = src->info.format;
1182 }
1183}
1184
1185u32 MapSizeBytes(const ImageBase& image) {
1186 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
1187 return image.guest_size_bytes;
1188 } else if (True(image.flags & ImageFlagBits::Converted)) {
1189 return image.converted_size_bytes;
1190 } else {
1191 return image.unswizzled_size_bytes;
1192 }
1193}
1194
1195using P = PixelFormat;
1196
1197static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000);
1198static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000);
1199
1200static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00);
1201static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) ==
1202 0x50d200);
1203
1204static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0);
1205static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000);
1206static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000);
1207static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000);
1208static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000);
1209static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000);
1210static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000);
1211static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400);
1212static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600);
1213static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800);
1214
1215constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height,
1216 u32 tile_width_spacing, u32 level) {
1217 const Extent3D size{width, height, 1};
1218 const Extent3D block{0, block_height, 0};
1219 const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level);
1220 return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing);
1221}
1222
1223static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800);
1224static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000);
1225static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000);
1226
1227static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000,
1228 "Tile width spacing is not working");
1229static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000,
1230 "Compressed tile width spacing is not working");
1231
1232} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
new file mode 100644
index 000000000..dbbbd33cd
--- /dev/null
+++ b/src/video_core/texture_cache/util.h
@@ -0,0 +1,107 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <span>
9
10#include "common/common_types.h"
11
12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/surface.h"
14#include "video_core/texture_cache/image_base.h"
15#include "video_core/texture_cache/image_view_base.h"
16#include "video_core/texture_cache/types.h"
17#include "video_core/textures/texture.h"
18
19namespace VideoCommon {
20
21using Tegra::Texture::TICEntry;
22
23struct OverlapResult {
24 GPUVAddr gpu_addr;
25 VAddr cpu_addr;
26 SubresourceExtent resources;
27};
28
29[[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept;
30
31[[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept;
32
33[[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept;
34
35[[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept;
36
37[[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept;
38
39[[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(
40 const ImageInfo& info) noexcept;
41
42[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
43
44[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
45
46[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
47
48[[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC(
49 const Tegra::Texture::TICEntry& config) noexcept;
50
51[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
52
53[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst,
54 const ImageInfo& src,
55 SubresourceBase base);
56
57[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
58
59[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
60 GPUVAddr gpu_addr, const ImageInfo& info,
61 std::span<u8> output);
62
63[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
64 const ImageBase& image, std::span<u8> output);
65
66void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
67 std::span<BufferImageCopy> copies);
68
69[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
70
71[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
72
73[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
74
75[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info);
76
77void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
78 std::span<const BufferImageCopy> copies, std::span<const u8> memory);
79
80[[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info,
81 const ImageInfo& overlap_info, u32 new_level,
82 u32 overlap_level, bool strict_size) noexcept;
83
84[[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs,
85 bool strict_size) noexcept;
86
87[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
88 GPUVAddr gpu_addr, VAddr cpu_addr,
89 const ImageBase& overlap,
90 bool strict_size);
91
92[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
93
94[[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate,
95 const ImageBase& image,
96 GPUVAddr candidate_addr,
97 RelaxedOptions options);
98
99[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
100 GPUVAddr candidate_addr, RelaxedOptions options);
101
102void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
103 const ImageBase* src);
104
105[[nodiscard]] u32 MapSizeBytes(const ImageBase& image);
106
107} // namespace VideoCommon