diff options
| author | 2021-01-10 22:09:56 -0700 | |
|---|---|---|
| committer | 2021-01-10 22:09:56 -0700 | |
| commit | 7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch) | |
| tree | 5056f9406dec188439cb0deb87603498243a9412 /src/video_core/texture_cache | |
| parent | More forgetting... duh (diff) | |
| parent | Merge pull request #5229 from Morph1984/fullscreen-opt (diff) | |
| download | yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip | |
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/texture_cache')
31 files changed, 4644 insertions, 2849 deletions
diff --git a/src/video_core/texture_cache/accelerated_swizzle.cpp b/src/video_core/texture_cache/accelerated_swizzle.cpp new file mode 100644 index 000000000..a4fc1184b --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.cpp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/alignment.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/div_ceil.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/accelerated_swizzle.h" | ||
| 13 | #include "video_core/texture_cache/util.h" | ||
| 14 | #include "video_core/textures/decoders.h" | ||
| 15 | |||
| 16 | namespace VideoCommon::Accelerated { | ||
| 17 | |||
| 18 | using Tegra::Texture::GOB_SIZE_SHIFT; | ||
| 19 | using Tegra::Texture::GOB_SIZE_X; | ||
| 20 | using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||
| 21 | using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||
| 22 | using VideoCore::Surface::BytesPerBlock; | ||
| 23 | |||
| 24 | BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams(const SwizzleParameters& swizzle, | ||
| 25 | const ImageInfo& info) { | ||
| 26 | const Extent3D block = swizzle.block; | ||
| 27 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 28 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 29 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||
| 30 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||
| 31 | const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); | ||
| 32 | return BlockLinearSwizzle2DParams{ | ||
| 33 | .origin{0, 0, 0}, | ||
| 34 | .destination{0, 0, 0}, | ||
| 35 | .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||
| 36 | .layer_stride = info.layer_stride, | ||
| 37 | .block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth), | ||
| 38 | .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||
| 39 | .block_height = block.height, | ||
| 40 | .block_height_mask = (1U << block.height) - 1, | ||
| 41 | }; | ||
| 42 | } | ||
| 43 | |||
| 44 | BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams(const SwizzleParameters& swizzle, | ||
| 45 | const ImageInfo& info) { | ||
| 46 | const Extent3D block = swizzle.block; | ||
| 47 | const Extent3D num_tiles = swizzle.num_tiles; | ||
| 48 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 49 | const u32 stride_alignment = CalculateLevelStrideAlignment(info, swizzle.level); | ||
| 50 | const u32 stride = Common::AlignBits(num_tiles.width, stride_alignment) * bytes_per_block; | ||
| 51 | |||
| 52 | const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) >> GOB_SIZE_X_SHIFT; | ||
| 53 | const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block.height + block.depth); | ||
| 54 | const u32 slice_size = | ||
| 55 | Common::DivCeilLog2(num_tiles.height, block.height + GOB_SIZE_Y_SHIFT) * block_size; | ||
| 56 | return BlockLinearSwizzle3DParams{ | ||
| 57 | .origin{0, 0, 0}, | ||
| 58 | .destination{0, 0, 0}, | ||
| 59 | .bytes_per_block_log2 = static_cast<u32>(std::countr_zero(bytes_per_block)), | ||
| 60 | .slice_size = slice_size, | ||
| 61 | .block_size = block_size, | ||
| 62 | .x_shift = GOB_SIZE_SHIFT + block.height + block.depth, | ||
| 63 | .block_height = block.height, | ||
| 64 | .block_height_mask = (1U << block.height) - 1, | ||
| 65 | .block_depth = block.depth, | ||
| 66 | .block_depth_mask = (1U << block.depth) - 1, | ||
| 67 | }; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace VideoCommon::Accelerated \ No newline at end of file | ||
diff --git a/src/video_core/texture_cache/accelerated_swizzle.h b/src/video_core/texture_cache/accelerated_swizzle.h new file mode 100644 index 000000000..6ec5c78c4 --- /dev/null +++ b/src/video_core/texture_cache/accelerated_swizzle.h | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/image_info.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Accelerated { | ||
| 14 | |||
| 15 | struct BlockLinearSwizzle2DParams { | ||
| 16 | std::array<u32, 3> origin; | ||
| 17 | std::array<s32, 3> destination; | ||
| 18 | u32 bytes_per_block_log2; | ||
| 19 | u32 layer_stride; | ||
| 20 | u32 block_size; | ||
| 21 | u32 x_shift; | ||
| 22 | u32 block_height; | ||
| 23 | u32 block_height_mask; | ||
| 24 | }; | ||
| 25 | |||
| 26 | struct BlockLinearSwizzle3DParams { | ||
| 27 | std::array<u32, 3> origin; | ||
| 28 | std::array<s32, 3> destination; | ||
| 29 | u32 bytes_per_block_log2; | ||
| 30 | u32 slice_size; | ||
| 31 | u32 block_size; | ||
| 32 | u32 x_shift; | ||
| 33 | u32 block_height; | ||
| 34 | u32 block_height_mask; | ||
| 35 | u32 block_depth; | ||
| 36 | u32 block_depth_mask; | ||
| 37 | }; | ||
| 38 | |||
| 39 | [[nodiscard]] BlockLinearSwizzle2DParams MakeBlockLinearSwizzle2DParams( | ||
| 40 | const SwizzleParameters& swizzle, const ImageInfo& info); | ||
| 41 | |||
| 42 | [[nodiscard]] BlockLinearSwizzle3DParams MakeBlockLinearSwizzle3DParams( | ||
| 43 | const SwizzleParameters& swizzle, const ImageInfo& info); | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Accelerated | ||
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h deleted file mode 100644 index 9c21a0649..000000000 --- a/src/video_core/texture_cache/copy_params.h +++ /dev/null | |||
| @@ -1,36 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | struct CopyParams { | ||
| 12 | constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, | ||
| 13 | u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, | ||
| 14 | u32 depth) | ||
| 15 | : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, | ||
| 16 | dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, | ||
| 17 | dest_level{dest_level}, width{width}, height{height}, depth{depth} {} | ||
| 18 | |||
| 19 | constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) | ||
| 20 | : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, | ||
| 21 | dest_level{level}, width{width}, height{height}, depth{depth} {} | ||
| 22 | |||
| 23 | u32 source_x; | ||
| 24 | u32 source_y; | ||
| 25 | u32 source_z; | ||
| 26 | u32 dest_x; | ||
| 27 | u32 dest_y; | ||
| 28 | u32 dest_z; | ||
| 29 | u32 source_level; | ||
| 30 | u32 dest_level; | ||
| 31 | u32 width; | ||
| 32 | u32 height; | ||
| 33 | u32 depth; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp new file mode 100644 index 000000000..017327975 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.cpp | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 17 | [[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { | ||
| 18 | const u32 code_offset = 16 + 3 * (4 * y + x); | ||
| 19 | const u32 code = (bits >> code_offset) & 7; | ||
| 20 | const u32 red0 = (bits >> 0) & 0xff; | ||
| 21 | const u32 red1 = (bits >> 8) & 0xff; | ||
| 22 | if (red0 > red1) { | ||
| 23 | switch (code) { | ||
| 24 | case 0: | ||
| 25 | return red0; | ||
| 26 | case 1: | ||
| 27 | return red1; | ||
| 28 | case 2: | ||
| 29 | return (6 * red0 + 1 * red1) / 7; | ||
| 30 | case 3: | ||
| 31 | return (5 * red0 + 2 * red1) / 7; | ||
| 32 | case 4: | ||
| 33 | return (4 * red0 + 3 * red1) / 7; | ||
| 34 | case 5: | ||
| 35 | return (3 * red0 + 4 * red1) / 7; | ||
| 36 | case 6: | ||
| 37 | return (2 * red0 + 5 * red1) / 7; | ||
| 38 | case 7: | ||
| 39 | return (1 * red0 + 6 * red1) / 7; | ||
| 40 | } | ||
| 41 | } else { | ||
| 42 | switch (code) { | ||
| 43 | case 0: | ||
| 44 | return red0; | ||
| 45 | case 1: | ||
| 46 | return red1; | ||
| 47 | case 2: | ||
| 48 | return (4 * red0 + 1 * red1) / 5; | ||
| 49 | case 3: | ||
| 50 | return (3 * red0 + 2 * red1) / 5; | ||
| 51 | case 4: | ||
| 52 | return (2 * red0 + 3 * red1) / 5; | ||
| 53 | case 5: | ||
| 54 | return (1 * red0 + 4 * red1) / 5; | ||
| 55 | case 6: | ||
| 56 | return 0; | ||
| 57 | case 7: | ||
| 58 | return 0xff; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | return 0; | ||
| 62 | } | ||
| 63 | |||
| 64 | void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { | ||
| 65 | UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); | ||
| 66 | UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); | ||
| 67 | static constexpr u32 BLOCK_SIZE = 4; | ||
| 68 | size_t input_offset = 0; | ||
| 69 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 70 | for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { | ||
| 71 | for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { | ||
| 72 | u64 bits; | ||
| 73 | std::memcpy(&bits, &input[input_offset], sizeof(bits)); | ||
| 74 | input_offset += sizeof(bits); | ||
| 75 | |||
| 76 | for (u32 y = 0; y < BLOCK_SIZE; ++y) { | ||
| 77 | for (u32 x = 0; x < BLOCK_SIZE; ++x) { | ||
| 78 | const u32 linear_z = slice; | ||
| 79 | const u32 linear_y = block_y * BLOCK_SIZE + y; | ||
| 80 | const u32 linear_x = block_x * BLOCK_SIZE + x; | ||
| 81 | const u32 offset_z = linear_z * extent.width * extent.height; | ||
| 82 | const u32 offset_y = linear_y * extent.width; | ||
| 83 | const u32 offset_x = linear_x; | ||
| 84 | const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; | ||
| 85 | const u32 color = DecompressBlock(bits, x, y); | ||
| 86 | output[output_offset + 0] = static_cast<u8>(color); | ||
| 87 | output[output_offset + 1] = 0; | ||
| 88 | output[output_offset + 2] = 0; | ||
| 89 | output[output_offset + 3] = 0xff; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc4.h new file mode 100644 index 000000000..63fb23508 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc4.h | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); | ||
| 15 | |||
| 16 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h new file mode 100644 index 000000000..3a03b786f --- /dev/null +++ b/src/video_core/texture_cache/descriptor_table.h | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/div_ceil.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/rasterizer_interface.h" | ||
| 15 | |||
| 16 | namespace VideoCommon { | ||
| 17 | |||
| 18 | template <typename Descriptor> | ||
| 19 | class DescriptorTable { | ||
| 20 | public: | ||
| 21 | explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} | ||
| 22 | |||
| 23 | [[nodiscard]] bool Synchornize(GPUVAddr gpu_addr, u32 limit) { | ||
| 24 | [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { | ||
| 25 | return false; | ||
| 26 | } | ||
| 27 | Refresh(gpu_addr, limit); | ||
| 28 | return true; | ||
| 29 | } | ||
| 30 | |||
| 31 | void Invalidate() noexcept { | ||
| 32 | std::ranges::fill(read_descriptors, 0); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) { | ||
| 36 | DEBUG_ASSERT(index <= current_limit); | ||
| 37 | const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); | ||
| 38 | std::pair<Descriptor, bool> result; | ||
| 39 | gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); | ||
| 40 | if (IsDescriptorRead(index)) { | ||
| 41 | result.second = result.first != descriptors[index]; | ||
| 42 | } else { | ||
| 43 | MarkDescriptorAsRead(index); | ||
| 44 | result.second = true; | ||
| 45 | } | ||
| 46 | if (result.second) { | ||
| 47 | descriptors[index] = result.first; | ||
| 48 | } | ||
| 49 | return result; | ||
| 50 | } | ||
| 51 | |||
| 52 | [[nodiscard]] u32 Limit() const noexcept { | ||
| 53 | return current_limit; | ||
| 54 | } | ||
| 55 | |||
| 56 | private: | ||
| 57 | void Refresh(GPUVAddr gpu_addr, u32 limit) { | ||
| 58 | current_gpu_addr = gpu_addr; | ||
| 59 | current_limit = limit; | ||
| 60 | |||
| 61 | const size_t num_descriptors = static_cast<size_t>(limit) + 1; | ||
| 62 | read_descriptors.clear(); | ||
| 63 | read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); | ||
| 64 | descriptors.resize(num_descriptors); | ||
| 65 | } | ||
| 66 | |||
| 67 | void MarkDescriptorAsRead(u32 index) noexcept { | ||
| 68 | read_descriptors[index / 64] |= 1ULL << (index % 64); | ||
| 69 | } | ||
| 70 | |||
| 71 | [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { | ||
| 72 | return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | Tegra::MemoryManager& gpu_memory; | ||
| 76 | GPUVAddr current_gpu_addr{}; | ||
| 77 | u32 current_limit{}; | ||
| 78 | std::vector<u64> read_descriptors; | ||
| 79 | std::vector<Descriptor> descriptors; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7d5a75648..ddfb726fe 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 7 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 8 | #include "video_core/texture_cache/format_lookup_table.h" | 7 | #include "video_core/texture_cache/format_lookup_table.h" |
| @@ -20,198 +19,207 @@ constexpr auto UNORM = ComponentType::UNORM; | |||
| 20 | constexpr auto SINT = ComponentType::SINT; | 19 | constexpr auto SINT = ComponentType::SINT; |
| 21 | constexpr auto UINT = ComponentType::UINT; | 20 | constexpr auto UINT = ComponentType::UINT; |
| 22 | constexpr auto FLOAT = ComponentType::FLOAT; | 21 | constexpr auto FLOAT = ComponentType::FLOAT; |
| 23 | constexpr bool C = false; // Normal color | 22 | constexpr bool LINEAR = false; |
| 24 | constexpr bool S = true; // Srgb | 23 | constexpr bool SRGB = true; |
| 25 | 24 | ||
| 26 | struct Table { | 25 | constexpr u32 Hash(TextureFormat format, ComponentType red_component, ComponentType green_component, |
| 27 | constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component, | 26 | ComponentType blue_component, ComponentType alpha_component, bool is_srgb) { |
| 28 | ComponentType green_component, ComponentType blue_component, | 27 | u32 hash = is_srgb ? 1 : 0; |
| 29 | ComponentType alpha_component, PixelFormat pixel_format) | 28 | hash |= static_cast<u32>(red_component) << 1; |
| 30 | : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component}, | 29 | hash |= static_cast<u32>(green_component) << 4; |
| 31 | green_component{green_component}, blue_component{blue_component}, | 30 | hash |= static_cast<u32>(blue_component) << 7; |
| 32 | alpha_component{alpha_component}, is_srgb{is_srgb} {} | 31 | hash |= static_cast<u32>(alpha_component) << 10; |
| 33 | 32 | hash |= static_cast<u32>(format) << 13; | |
| 34 | TextureFormat texture_format; | 33 | return hash; |
| 35 | PixelFormat pixel_format; | 34 | } |
| 36 | ComponentType red_component; | ||
| 37 | ComponentType green_component; | ||
| 38 | ComponentType blue_component; | ||
| 39 | ComponentType alpha_component; | ||
| 40 | bool is_srgb; | ||
| 41 | }; | ||
| 42 | constexpr std::array<Table, 86> DefinitionTable = {{ | ||
| 43 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_UNORM}, | ||
| 44 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::A8B8G8R8_SNORM}, | ||
| 45 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::A8B8G8R8_UINT}, | ||
| 46 | {TextureFormat::A8R8G8B8, C, SINT, SINT, SINT, SINT, PixelFormat::A8B8G8R8_SINT}, | ||
| 47 | {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::A8B8G8R8_SRGB}, | ||
| 48 | |||
| 49 | {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5_UNORM}, | ||
| 50 | |||
| 51 | {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10_UNORM}, | ||
| 52 | {TextureFormat::A2B10G10R10, C, UINT, UINT, UINT, UINT, PixelFormat::A2B10G10R10_UINT}, | ||
| 53 | |||
| 54 | {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5_UNORM}, | ||
| 55 | |||
| 56 | {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A4B4G4R4_UNORM}, | ||
| 57 | |||
| 58 | {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8_UNORM}, | ||
| 59 | {TextureFormat::R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8_SNORM}, | ||
| 60 | {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8_UINT}, | ||
| 61 | {TextureFormat::R8, C, SINT, SINT, SINT, SINT, PixelFormat::R8_SINT}, | ||
| 62 | |||
| 63 | {TextureFormat::R8G8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8G8_UNORM}, | ||
| 64 | {TextureFormat::R8G8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R8G8_SNORM}, | ||
| 65 | {TextureFormat::R8G8, C, UINT, UINT, UINT, UINT, PixelFormat::R8G8_UINT}, | ||
| 66 | {TextureFormat::R8G8, C, SINT, SINT, SINT, SINT, PixelFormat::R8G8_SINT}, | ||
| 67 | |||
| 68 | {TextureFormat::R16G16B16A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16B16A16_SNORM}, | ||
| 69 | {TextureFormat::R16G16B16A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16B16A16_UNORM}, | ||
| 70 | {TextureFormat::R16G16B16A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16B16A16_FLOAT}, | ||
| 71 | {TextureFormat::R16G16B16A16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16B16A16_UINT}, | ||
| 72 | {TextureFormat::R16G16B16A16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16B16A16_SINT}, | ||
| 73 | |||
| 74 | {TextureFormat::R16G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16G16_FLOAT}, | ||
| 75 | {TextureFormat::R16G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16G16_UNORM}, | ||
| 76 | {TextureFormat::R16G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16G16_SNORM}, | ||
| 77 | {TextureFormat::R16G16, C, UINT, UINT, UINT, UINT, PixelFormat::R16G16_UINT}, | ||
| 78 | {TextureFormat::R16G16, C, SINT, SINT, SINT, SINT, PixelFormat::R16G16_SINT}, | ||
| 79 | |||
| 80 | {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16_FLOAT}, | ||
| 81 | {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16_UNORM}, | ||
| 82 | {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16_SNORM}, | ||
| 83 | {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16_UINT}, | ||
| 84 | {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16_SINT}, | ||
| 85 | |||
| 86 | {TextureFormat::B10G11R11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::B10G11R11_FLOAT}, | ||
| 87 | |||
| 88 | {TextureFormat::R32G32B32A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32A32_FLOAT}, | ||
| 89 | {TextureFormat::R32G32B32A32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32B32A32_UINT}, | ||
| 90 | {TextureFormat::R32G32B32A32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32B32A32_SINT}, | ||
| 91 | |||
| 92 | {TextureFormat::R32G32B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32B32_FLOAT}, | ||
| 93 | |||
| 94 | {TextureFormat::R32G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32G32_FLOAT}, | ||
| 95 | {TextureFormat::R32G32, C, UINT, UINT, UINT, UINT, PixelFormat::R32G32_UINT}, | ||
| 96 | {TextureFormat::R32G32, C, SINT, SINT, SINT, SINT, PixelFormat::R32G32_SINT}, | ||
| 97 | |||
| 98 | {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32_FLOAT}, | ||
| 99 | {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32_UINT}, | ||
| 100 | {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32_SINT}, | ||
| 101 | |||
| 102 | {TextureFormat::E5B9G9R9, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9_FLOAT}, | ||
| 103 | |||
| 104 | {TextureFormat::D32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::D32_FLOAT}, | ||
| 105 | {TextureFormat::D16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::D16_UNORM}, | ||
| 106 | {TextureFormat::S8D24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, | ||
| 107 | {TextureFormat::R8G24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8_UINT_D24_UNORM}, | ||
| 108 | {TextureFormat::D32S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::D32_FLOAT_S8_UINT}, | ||
| 109 | |||
| 110 | {TextureFormat::BC1_RGBA, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_UNORM}, | ||
| 111 | {TextureFormat::BC1_RGBA, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC1_RGBA_SRGB}, | ||
| 112 | |||
| 113 | {TextureFormat::BC2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_UNORM}, | ||
| 114 | {TextureFormat::BC2, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC2_SRGB}, | ||
| 115 | |||
| 116 | {TextureFormat::BC3, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_UNORM}, | ||
| 117 | {TextureFormat::BC3, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC3_SRGB}, | ||
| 118 | |||
| 119 | {TextureFormat::BC4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC4_UNORM}, | ||
| 120 | {TextureFormat::BC4, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC4_SNORM}, | ||
| 121 | |||
| 122 | {TextureFormat::BC5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC5_UNORM}, | ||
| 123 | {TextureFormat::BC5, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::BC5_SNORM}, | ||
| 124 | |||
| 125 | {TextureFormat::BC7, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_UNORM}, | ||
| 126 | {TextureFormat::BC7, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7_SRGB}, | ||
| 127 | |||
| 128 | {TextureFormat::BC6H_SFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SFLOAT}, | ||
| 129 | {TextureFormat::BC6H_UFLOAT, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UFLOAT}, | ||
| 130 | |||
| 131 | {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_UNORM}, | ||
| 132 | {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB}, | ||
| 133 | |||
| 134 | {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_UNORM}, | ||
| 135 | {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB}, | ||
| 136 | |||
| 137 | {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_UNORM}, | ||
| 138 | {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB}, | ||
| 139 | |||
| 140 | {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_UNORM}, | ||
| 141 | {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB}, | ||
| 142 | |||
| 143 | {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_UNORM}, | ||
| 144 | {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB}, | ||
| 145 | |||
| 146 | {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_UNORM}, | ||
| 147 | {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB}, | ||
| 148 | |||
| 149 | {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_UNORM}, | ||
| 150 | {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB}, | ||
| 151 | |||
| 152 | {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_UNORM}, | ||
| 153 | {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB}, | ||
| 154 | |||
| 155 | {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_UNORM}, | ||
| 156 | {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB}, | ||
| 157 | |||
| 158 | {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_UNORM}, | ||
| 159 | {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB}, | ||
| 160 | 35 | ||
| 161 | {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_UNORM}, | 36 | constexpr u32 Hash(TextureFormat format, ComponentType component, bool is_srgb = LINEAR) { |
| 162 | {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB}, | 37 | return Hash(format, component, component, component, component, is_srgb); |
| 163 | }}; | 38 | } |
| 164 | 39 | ||
| 165 | } // Anonymous namespace | 40 | } // Anonymous namespace |
| 166 | 41 | ||
| 167 | FormatLookupTable::FormatLookupTable() { | 42 | PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, ComponentType green, |
| 168 | table.fill(static_cast<u8>(PixelFormat::Invalid)); | 43 | ComponentType blue, ComponentType alpha, |
| 169 | 44 | bool is_srgb) noexcept { | |
| 170 | for (const auto& entry : DefinitionTable) { | 45 | switch (Hash(format, red, green, blue, alpha, is_srgb)) { |
| 171 | table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component, | 46 | case Hash(TextureFormat::A8R8G8B8, UNORM): |
| 172 | entry.green_component, entry.blue_component, entry.alpha_component)] = | 47 | return PixelFormat::A8B8G8R8_UNORM; |
| 173 | static_cast<u8>(entry.pixel_format); | 48 | case Hash(TextureFormat::A8R8G8B8, SNORM): |
| 174 | } | 49 | return PixelFormat::A8B8G8R8_SNORM; |
| 175 | } | 50 | case Hash(TextureFormat::A8R8G8B8, UINT): |
| 176 | 51 | return PixelFormat::A8B8G8R8_UINT; | |
| 177 | PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb, | 52 | case Hash(TextureFormat::A8R8G8B8, SINT): |
| 178 | ComponentType red_component, | 53 | return PixelFormat::A8B8G8R8_SINT; |
| 179 | ComponentType green_component, | 54 | case Hash(TextureFormat::A8R8G8B8, UNORM, SRGB): |
| 180 | ComponentType blue_component, | 55 | return PixelFormat::A8B8G8R8_SRGB; |
| 181 | ComponentType alpha_component) const noexcept { | 56 | case Hash(TextureFormat::B5G6R5, UNORM): |
| 182 | const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex( | 57 | return PixelFormat::B5G6R5_UNORM; |
| 183 | format, is_srgb, red_component, green_component, blue_component, alpha_component)]); | 58 | case Hash(TextureFormat::A2B10G10R10, UNORM): |
| 184 | // [[likely]] | 59 | return PixelFormat::A2B10G10R10_UNORM; |
| 185 | if (pixel_format != PixelFormat::Invalid) { | 60 | case Hash(TextureFormat::A2B10G10R10, UINT): |
| 186 | return pixel_format; | 61 | return PixelFormat::A2B10G10R10_UINT; |
| 62 | case Hash(TextureFormat::A1B5G5R5, UNORM): | ||
| 63 | return PixelFormat::A1B5G5R5_UNORM; | ||
| 64 | case Hash(TextureFormat::A4B4G4R4, UNORM): | ||
| 65 | return PixelFormat::A4B4G4R4_UNORM; | ||
| 66 | case Hash(TextureFormat::R8, UNORM): | ||
| 67 | return PixelFormat::R8_UNORM; | ||
| 68 | case Hash(TextureFormat::R8, SNORM): | ||
| 69 | return PixelFormat::R8_SNORM; | ||
| 70 | case Hash(TextureFormat::R8, UINT): | ||
| 71 | return PixelFormat::R8_UINT; | ||
| 72 | case Hash(TextureFormat::R8, SINT): | ||
| 73 | return PixelFormat::R8_SINT; | ||
| 74 | case Hash(TextureFormat::R8G8, UNORM): | ||
| 75 | return PixelFormat::R8G8_UNORM; | ||
| 76 | case Hash(TextureFormat::R8G8, SNORM): | ||
| 77 | return PixelFormat::R8G8_SNORM; | ||
| 78 | case Hash(TextureFormat::R8G8, UINT): | ||
| 79 | return PixelFormat::R8G8_UINT; | ||
| 80 | case Hash(TextureFormat::R8G8, SINT): | ||
| 81 | return PixelFormat::R8G8_SINT; | ||
| 82 | case Hash(TextureFormat::R16G16B16A16, FLOAT): | ||
| 83 | return PixelFormat::R16G16B16A16_FLOAT; | ||
| 84 | case Hash(TextureFormat::R16G16B16A16, UNORM): | ||
| 85 | return PixelFormat::R16G16B16A16_UNORM; | ||
| 86 | case Hash(TextureFormat::R16G16B16A16, SNORM): | ||
| 87 | return PixelFormat::R16G16B16A16_SNORM; | ||
| 88 | case Hash(TextureFormat::R16G16B16A16, UINT): | ||
| 89 | return PixelFormat::R16G16B16A16_UINT; | ||
| 90 | case Hash(TextureFormat::R16G16B16A16, SINT): | ||
| 91 | return PixelFormat::R16G16B16A16_SINT; | ||
| 92 | case Hash(TextureFormat::R16G16, FLOAT): | ||
| 93 | return PixelFormat::R16G16_FLOAT; | ||
| 94 | case Hash(TextureFormat::R16G16, UNORM): | ||
| 95 | return PixelFormat::R16G16_UNORM; | ||
| 96 | case Hash(TextureFormat::R16G16, SNORM): | ||
| 97 | return PixelFormat::R16G16_SNORM; | ||
| 98 | case Hash(TextureFormat::R16G16, UINT): | ||
| 99 | return PixelFormat::R16G16_UINT; | ||
| 100 | case Hash(TextureFormat::R16G16, SINT): | ||
| 101 | return PixelFormat::R16G16_SINT; | ||
| 102 | case Hash(TextureFormat::R16, FLOAT): | ||
| 103 | return PixelFormat::R16_FLOAT; | ||
| 104 | case Hash(TextureFormat::R16, UNORM): | ||
| 105 | return PixelFormat::R16_UNORM; | ||
| 106 | case Hash(TextureFormat::R16, SNORM): | ||
| 107 | return PixelFormat::R16_SNORM; | ||
| 108 | case Hash(TextureFormat::R16, UINT): | ||
| 109 | return PixelFormat::R16_UINT; | ||
| 110 | case Hash(TextureFormat::R16, SINT): | ||
| 111 | return PixelFormat::R16_SINT; | ||
| 112 | case Hash(TextureFormat::B10G11R11, FLOAT): | ||
| 113 | return PixelFormat::B10G11R11_FLOAT; | ||
| 114 | case Hash(TextureFormat::R32G32B32A32, FLOAT): | ||
| 115 | return PixelFormat::R32G32B32A32_FLOAT; | ||
| 116 | case Hash(TextureFormat::R32G32B32A32, UINT): | ||
| 117 | return PixelFormat::R32G32B32A32_UINT; | ||
| 118 | case Hash(TextureFormat::R32G32B32A32, SINT): | ||
| 119 | return PixelFormat::R32G32B32A32_SINT; | ||
| 120 | case Hash(TextureFormat::R32G32B32, FLOAT): | ||
| 121 | return PixelFormat::R32G32B32_FLOAT; | ||
| 122 | case Hash(TextureFormat::R32G32, FLOAT): | ||
| 123 | return PixelFormat::R32G32_FLOAT; | ||
| 124 | case Hash(TextureFormat::R32G32, UINT): | ||
| 125 | return PixelFormat::R32G32_UINT; | ||
| 126 | case Hash(TextureFormat::R32G32, SINT): | ||
| 127 | return PixelFormat::R32G32_SINT; | ||
| 128 | case Hash(TextureFormat::R32, FLOAT): | ||
| 129 | return PixelFormat::R32_FLOAT; | ||
| 130 | case Hash(TextureFormat::R32, UINT): | ||
| 131 | return PixelFormat::R32_UINT; | ||
| 132 | case Hash(TextureFormat::R32, SINT): | ||
| 133 | return PixelFormat::R32_SINT; | ||
| 134 | case Hash(TextureFormat::E5B9G9R9, FLOAT): | ||
| 135 | return PixelFormat::E5B9G9R9_FLOAT; | ||
| 136 | case Hash(TextureFormat::D32, FLOAT): | ||
| 137 | return PixelFormat::D32_FLOAT; | ||
| 138 | case Hash(TextureFormat::D16, UNORM): | ||
| 139 | return PixelFormat::D16_UNORM; | ||
| 140 | case Hash(TextureFormat::S8D24, UINT, UNORM, UNORM, UNORM, LINEAR): | ||
| 141 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 142 | case Hash(TextureFormat::R8G24, UINT, UNORM, UNORM, UNORM, LINEAR): | ||
| 143 | return PixelFormat::S8_UINT_D24_UNORM; | ||
| 144 | case Hash(TextureFormat::D32S8, FLOAT, UINT, UNORM, UNORM, LINEAR): | ||
| 145 | return PixelFormat::D32_FLOAT_S8_UINT; | ||
| 146 | case Hash(TextureFormat::BC1_RGBA, UNORM, LINEAR): | ||
| 147 | return PixelFormat::BC1_RGBA_UNORM; | ||
| 148 | case Hash(TextureFormat::BC1_RGBA, UNORM, SRGB): | ||
| 149 | return PixelFormat::BC1_RGBA_SRGB; | ||
| 150 | case Hash(TextureFormat::BC2, UNORM, LINEAR): | ||
| 151 | return PixelFormat::BC2_UNORM; | ||
| 152 | case Hash(TextureFormat::BC2, UNORM, SRGB): | ||
| 153 | return PixelFormat::BC2_SRGB; | ||
| 154 | case Hash(TextureFormat::BC3, UNORM, LINEAR): | ||
| 155 | return PixelFormat::BC3_UNORM; | ||
| 156 | case Hash(TextureFormat::BC3, UNORM, SRGB): | ||
| 157 | return PixelFormat::BC3_SRGB; | ||
| 158 | case Hash(TextureFormat::BC4, UNORM): | ||
| 159 | return PixelFormat::BC4_UNORM; | ||
| 160 | case Hash(TextureFormat::BC4, SNORM): | ||
| 161 | return PixelFormat::BC4_SNORM; | ||
| 162 | case Hash(TextureFormat::BC5, UNORM): | ||
| 163 | return PixelFormat::BC5_UNORM; | ||
| 164 | case Hash(TextureFormat::BC5, SNORM): | ||
| 165 | return PixelFormat::BC5_SNORM; | ||
| 166 | case Hash(TextureFormat::BC7, UNORM, LINEAR): | ||
| 167 | return PixelFormat::BC7_UNORM; | ||
| 168 | case Hash(TextureFormat::BC7, UNORM, SRGB): | ||
| 169 | return PixelFormat::BC7_SRGB; | ||
| 170 | case Hash(TextureFormat::BC6H_SFLOAT, FLOAT): | ||
| 171 | return PixelFormat::BC6H_SFLOAT; | ||
| 172 | case Hash(TextureFormat::BC6H_UFLOAT, FLOAT): | ||
| 173 | return PixelFormat::BC6H_UFLOAT; | ||
| 174 | case Hash(TextureFormat::ASTC_2D_4X4, UNORM, LINEAR): | ||
| 175 | return PixelFormat::ASTC_2D_4X4_UNORM; | ||
| 176 | case Hash(TextureFormat::ASTC_2D_4X4, UNORM, SRGB): | ||
| 177 | return PixelFormat::ASTC_2D_4X4_SRGB; | ||
| 178 | case Hash(TextureFormat::ASTC_2D_5X4, UNORM, LINEAR): | ||
| 179 | return PixelFormat::ASTC_2D_5X4_UNORM; | ||
| 180 | case Hash(TextureFormat::ASTC_2D_5X4, UNORM, SRGB): | ||
| 181 | return PixelFormat::ASTC_2D_5X4_SRGB; | ||
| 182 | case Hash(TextureFormat::ASTC_2D_5X5, UNORM, LINEAR): | ||
| 183 | return PixelFormat::ASTC_2D_5X5_UNORM; | ||
| 184 | case Hash(TextureFormat::ASTC_2D_5X5, UNORM, SRGB): | ||
| 185 | return PixelFormat::ASTC_2D_5X5_SRGB; | ||
| 186 | case Hash(TextureFormat::ASTC_2D_8X8, UNORM, LINEAR): | ||
| 187 | return PixelFormat::ASTC_2D_8X8_UNORM; | ||
| 188 | case Hash(TextureFormat::ASTC_2D_8X8, UNORM, SRGB): | ||
| 189 | return PixelFormat::ASTC_2D_8X8_SRGB; | ||
| 190 | case Hash(TextureFormat::ASTC_2D_8X5, UNORM, LINEAR): | ||
| 191 | return PixelFormat::ASTC_2D_8X5_UNORM; | ||
| 192 | case Hash(TextureFormat::ASTC_2D_8X5, UNORM, SRGB): | ||
| 193 | return PixelFormat::ASTC_2D_8X5_SRGB; | ||
| 194 | case Hash(TextureFormat::ASTC_2D_10X8, UNORM, LINEAR): | ||
| 195 | return PixelFormat::ASTC_2D_10X8_UNORM; | ||
| 196 | case Hash(TextureFormat::ASTC_2D_10X8, UNORM, SRGB): | ||
| 197 | return PixelFormat::ASTC_2D_10X8_SRGB; | ||
| 198 | case Hash(TextureFormat::ASTC_2D_6X6, UNORM, LINEAR): | ||
| 199 | return PixelFormat::ASTC_2D_6X6_UNORM; | ||
| 200 | case Hash(TextureFormat::ASTC_2D_6X6, UNORM, SRGB): | ||
| 201 | return PixelFormat::ASTC_2D_6X6_SRGB; | ||
| 202 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, LINEAR): | ||
| 203 | return PixelFormat::ASTC_2D_10X10_UNORM; | ||
| 204 | case Hash(TextureFormat::ASTC_2D_10X10, UNORM, SRGB): | ||
| 205 | return PixelFormat::ASTC_2D_10X10_SRGB; | ||
| 206 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, LINEAR): | ||
| 207 | return PixelFormat::ASTC_2D_12X12_UNORM; | ||
| 208 | case Hash(TextureFormat::ASTC_2D_12X12, UNORM, SRGB): | ||
| 209 | return PixelFormat::ASTC_2D_12X12_SRGB; | ||
| 210 | case Hash(TextureFormat::ASTC_2D_8X6, UNORM, LINEAR): | ||
| 211 | return PixelFormat::ASTC_2D_8X6_UNORM; | ||
| 212 | case Hash(TextureFormat::ASTC_2D_8X6, UNORM, SRGB): | ||
| 213 | return PixelFormat::ASTC_2D_8X6_SRGB; | ||
| 214 | case Hash(TextureFormat::ASTC_2D_6X5, UNORM, LINEAR): | ||
| 215 | return PixelFormat::ASTC_2D_6X5_UNORM; | ||
| 216 | case Hash(TextureFormat::ASTC_2D_6X5, UNORM, SRGB): | ||
| 217 | return PixelFormat::ASTC_2D_6X5_SRGB; | ||
| 187 | } | 218 | } |
| 188 | UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", | 219 | UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}", |
| 189 | static_cast<int>(format), is_srgb, static_cast<int>(red_component), | 220 | static_cast<int>(format), is_srgb, static_cast<int>(red), |
| 190 | static_cast<int>(green_component), static_cast<int>(blue_component), | 221 | static_cast<int>(green), static_cast<int>(blue), static_cast<int>(alpha)); |
| 191 | static_cast<int>(alpha_component)); | ||
| 192 | return PixelFormat::A8B8G8R8_UNORM; | 222 | return PixelFormat::A8B8G8R8_UNORM; |
| 193 | } | 223 | } |
| 194 | 224 | ||
| 195 | void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component, | ||
| 196 | ComponentType green_component, ComponentType blue_component, | ||
| 197 | ComponentType alpha_component, PixelFormat pixel_format) {} | ||
| 198 | |||
| 199 | std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb, | ||
| 200 | ComponentType red_component, | ||
| 201 | ComponentType green_component, | ||
| 202 | ComponentType blue_component, | ||
| 203 | ComponentType alpha_component) noexcept { | ||
| 204 | const auto format_index = static_cast<std::size_t>(format); | ||
| 205 | const auto red_index = static_cast<std::size_t>(red_component); | ||
| 206 | const auto green_index = static_cast<std::size_t>(green_component); | ||
| 207 | const auto blue_index = static_cast<std::size_t>(blue_component); | ||
| 208 | const auto alpha_index = static_cast<std::size_t>(alpha_component); | ||
| 209 | const std::size_t srgb_index = is_srgb ? 1 : 0; | ||
| 210 | |||
| 211 | return format_index * PerFormat + | ||
| 212 | srgb_index * PerComponent * PerComponent * PerComponent * PerComponent + | ||
| 213 | alpha_index * PerComponent * PerComponent * PerComponent + | ||
| 214 | blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index; | ||
| 215 | } | ||
| 216 | |||
| 217 | } // namespace VideoCommon | 225 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h index aa77e0a5a..729533999 100644 --- a/src/video_core/texture_cache/format_lookup_table.h +++ b/src/video_core/texture_cache/format_lookup_table.h | |||
| @@ -4,48 +4,14 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <limits> | ||
| 9 | #include "video_core/surface.h" | 7 | #include "video_core/surface.h" |
| 10 | #include "video_core/textures/texture.h" | 8 | #include "video_core/textures/texture.h" |
| 11 | 9 | ||
| 12 | namespace VideoCommon { | 10 | namespace VideoCommon { |
| 13 | 11 | ||
| 14 | class FormatLookupTable { | 12 | VideoCore::Surface::PixelFormat PixelFormatFromTextureInfo( |
| 15 | public: | 13 | Tegra::Texture::TextureFormat format, Tegra::Texture::ComponentType red_component, |
| 16 | explicit FormatLookupTable(); | 14 | Tegra::Texture::ComponentType green_component, Tegra::Texture::ComponentType blue_component, |
| 17 | 15 | Tegra::Texture::ComponentType alpha_component, bool is_srgb) noexcept; | |
| 18 | VideoCore::Surface::PixelFormat GetPixelFormat( | ||
| 19 | Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 20 | Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component, | ||
| 21 | Tegra::Texture::ComponentType blue_component, | ||
| 22 | Tegra::Texture::ComponentType alpha_component) const noexcept; | ||
| 23 | |||
| 24 | private: | ||
| 25 | static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max()); | ||
| 26 | |||
| 27 | static constexpr std::size_t NumTextureFormats = 128; | ||
| 28 | |||
| 29 | static constexpr std::size_t PerComponent = 8; | ||
| 30 | static constexpr std::size_t PerComponents2 = PerComponent * PerComponent; | ||
| 31 | static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent; | ||
| 32 | static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent; | ||
| 33 | static constexpr std::size_t PerFormat = PerComponents4 * 2; | ||
| 34 | |||
| 35 | static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 36 | Tegra::Texture::ComponentType red_component, | ||
| 37 | Tegra::Texture::ComponentType green_component, | ||
| 38 | Tegra::Texture::ComponentType blue_component, | ||
| 39 | Tegra::Texture::ComponentType alpha_component) noexcept; | ||
| 40 | |||
| 41 | void Set(Tegra::Texture::TextureFormat format, bool is_srgb, | ||
| 42 | Tegra::Texture::ComponentType red_component, | ||
| 43 | Tegra::Texture::ComponentType green_component, | ||
| 44 | Tegra::Texture::ComponentType blue_component, | ||
| 45 | Tegra::Texture::ComponentType alpha_component, | ||
| 46 | VideoCore::Surface::PixelFormat pixel_format); | ||
| 47 | |||
| 48 | std::array<u8, NumTextureFormats * PerFormat> table; | ||
| 49 | }; | ||
| 50 | 16 | ||
| 51 | } // namespace VideoCommon | 17 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp new file mode 100644 index 000000000..d10ba4ccd --- /dev/null +++ b/src/video_core/texture_cache/formatter.cpp | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include "video_core/texture_cache/formatter.h" | ||
| 9 | #include "video_core/texture_cache/image_base.h" | ||
| 10 | #include "video_core/texture_cache/image_info.h" | ||
| 11 | #include "video_core/texture_cache/image_view_base.h" | ||
| 12 | #include "video_core/texture_cache/render_targets.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | std::string Name(const ImageBase& image) { | ||
| 17 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 18 | const ImageInfo& info = image.info; | ||
| 19 | const u32 width = info.size.width; | ||
| 20 | const u32 height = info.size.height; | ||
| 21 | const u32 depth = info.size.depth; | ||
| 22 | const u32 num_layers = image.info.resources.layers; | ||
| 23 | const u32 num_levels = image.info.resources.levels; | ||
| 24 | std::string resource; | ||
| 25 | if (num_layers > 1) { | ||
| 26 | resource += fmt::format(":L{}", num_layers); | ||
| 27 | } | ||
| 28 | if (num_levels > 1) { | ||
| 29 | resource += fmt::format(":M{}", num_levels); | ||
| 30 | } | ||
| 31 | switch (image.info.type) { | ||
| 32 | case ImageType::e1D: | ||
| 33 | return fmt::format("Image 1D 0x{:x} {}{}", gpu_addr, width, resource); | ||
| 34 | case ImageType::e2D: | ||
| 35 | return fmt::format("Image 2D 0x{:x} {}x{}{}", gpu_addr, width, height, resource); | ||
| 36 | case ImageType::e3D: | ||
| 37 | return fmt::format("Image 2D 0x{:x} {}x{}x{}{}", gpu_addr, width, height, depth, resource); | ||
| 38 | case ImageType::Linear: | ||
| 39 | return fmt::format("Image Linear 0x{:x} {}x{}", gpu_addr, width, height); | ||
| 40 | case ImageType::Buffer: | ||
| 41 | return fmt::format("Buffer 0x{:x} {}", image.gpu_addr, image.info.size.width); | ||
| 42 | } | ||
| 43 | return "Invalid"; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { | ||
| 47 | const u32 width = image_view.size.width; | ||
| 48 | const u32 height = image_view.size.height; | ||
| 49 | const u32 depth = image_view.size.depth; | ||
| 50 | const u32 num_levels = image_view.range.extent.levels; | ||
| 51 | const u32 num_layers = image_view.range.extent.layers; | ||
| 52 | |||
| 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | ||
| 54 | switch (type.value_or(image_view.type)) { | ||
| 55 | case ImageViewType::e1D: | ||
| 56 | return fmt::format("ImageView 1D {}{}", width, level); | ||
| 57 | case ImageViewType::e2D: | ||
| 58 | return fmt::format("ImageView 2D {}x{}{}", width, height, level); | ||
| 59 | case ImageViewType::Cube: | ||
| 60 | return fmt::format("ImageView Cube {}x{}{}", width, height, level); | ||
| 61 | case ImageViewType::e3D: | ||
| 62 | return fmt::format("ImageView 3D {}x{}x{}{}", width, height, depth, level); | ||
| 63 | case ImageViewType::e1DArray: | ||
| 64 | return fmt::format("ImageView 1DArray {}{}|{}", width, level, num_layers); | ||
| 65 | case ImageViewType::e2DArray: | ||
| 66 | return fmt::format("ImageView 2DArray {}x{}{}|{}", width, height, level, num_layers); | ||
| 67 | case ImageViewType::CubeArray: | ||
| 68 | return fmt::format("ImageView CubeArray {}x{}{}|{}", width, height, level, num_layers); | ||
| 69 | case ImageViewType::Rect: | ||
| 70 | return fmt::format("ImageView Rect {}x{}{}", width, height, level); | ||
| 71 | case ImageViewType::Buffer: | ||
| 72 | return fmt::format("BufferView {}", width); | ||
| 73 | } | ||
| 74 | return "Invalid"; | ||
| 75 | } | ||
| 76 | |||
| 77 | std::string Name(const RenderTargets& render_targets) { | ||
| 78 | std::string_view debug_prefix; | ||
| 79 | const auto num_color = std::ranges::count_if( | ||
| 80 | render_targets.color_buffer_ids, [](ImageViewId id) { return static_cast<bool>(id); }); | ||
| 81 | if (render_targets.depth_buffer_id) { | ||
| 82 | debug_prefix = num_color > 0 ? "R" : "Z"; | ||
| 83 | } else { | ||
| 84 | debug_prefix = num_color > 0 ? "C" : "X"; | ||
| 85 | } | ||
| 86 | const Extent2D size = render_targets.size; | ||
| 87 | if (num_color > 0) { | ||
| 88 | return fmt::format("Framebuffer {}{} {}x{}", debug_prefix, num_color, size.width, | ||
| 89 | size.height); | ||
| 90 | } else { | ||
| 91 | return fmt::format("Framebuffer {} {}x{}", debug_prefix, size.width, size.height); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h new file mode 100644 index 000000000..a48413983 --- /dev/null +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -0,0 +1,263 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | template <> | ||
| 15 | struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::string_view> { | ||
| 16 | template <typename FormatContext> | ||
| 17 | auto format(VideoCore::Surface::PixelFormat format, FormatContext& ctx) { | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | const string_view name = [format] { | ||
| 20 | switch (format) { | ||
| 21 | case PixelFormat::A8B8G8R8_UNORM: | ||
| 22 | return "A8B8G8R8_UNORM"; | ||
| 23 | case PixelFormat::A8B8G8R8_SNORM: | ||
| 24 | return "A8B8G8R8_SNORM"; | ||
| 25 | case PixelFormat::A8B8G8R8_SINT: | ||
| 26 | return "A8B8G8R8_SINT"; | ||
| 27 | case PixelFormat::A8B8G8R8_UINT: | ||
| 28 | return "A8B8G8R8_UINT"; | ||
| 29 | case PixelFormat::R5G6B5_UNORM: | ||
| 30 | return "R5G6B5_UNORM"; | ||
| 31 | case PixelFormat::B5G6R5_UNORM: | ||
| 32 | return "B5G6R5_UNORM"; | ||
| 33 | case PixelFormat::A1R5G5B5_UNORM: | ||
| 34 | return "A1R5G5B5_UNORM"; | ||
| 35 | case PixelFormat::A2B10G10R10_UNORM: | ||
| 36 | return "A2B10G10R10_UNORM"; | ||
| 37 | case PixelFormat::A2B10G10R10_UINT: | ||
| 38 | return "A2B10G10R10_UINT"; | ||
| 39 | case PixelFormat::A1B5G5R5_UNORM: | ||
| 40 | return "A1B5G5R5_UNORM"; | ||
| 41 | case PixelFormat::R8_UNORM: | ||
| 42 | return "R8_UNORM"; | ||
| 43 | case PixelFormat::R8_SNORM: | ||
| 44 | return "R8_SNORM"; | ||
| 45 | case PixelFormat::R8_SINT: | ||
| 46 | return "R8_SINT"; | ||
| 47 | case PixelFormat::R8_UINT: | ||
| 48 | return "R8_UINT"; | ||
| 49 | case PixelFormat::R16G16B16A16_FLOAT: | ||
| 50 | return "R16G16B16A16_FLOAT"; | ||
| 51 | case PixelFormat::R16G16B16A16_UNORM: | ||
| 52 | return "R16G16B16A16_UNORM"; | ||
| 53 | case PixelFormat::R16G16B16A16_SNORM: | ||
| 54 | return "R16G16B16A16_SNORM"; | ||
| 55 | case PixelFormat::R16G16B16A16_SINT: | ||
| 56 | return "R16G16B16A16_SINT"; | ||
| 57 | case PixelFormat::R16G16B16A16_UINT: | ||
| 58 | return "R16G16B16A16_UINT"; | ||
| 59 | case PixelFormat::B10G11R11_FLOAT: | ||
| 60 | return "B10G11R11_FLOAT"; | ||
| 61 | case PixelFormat::R32G32B32A32_UINT: | ||
| 62 | return "R32G32B32A32_UINT"; | ||
| 63 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 64 | return "BC1_RGBA_UNORM"; | ||
| 65 | case PixelFormat::BC2_UNORM: | ||
| 66 | return "BC2_UNORM"; | ||
| 67 | case PixelFormat::BC3_UNORM: | ||
| 68 | return "BC3_UNORM"; | ||
| 69 | case PixelFormat::BC4_UNORM: | ||
| 70 | return "BC4_UNORM"; | ||
| 71 | case PixelFormat::BC4_SNORM: | ||
| 72 | return "BC4_SNORM"; | ||
| 73 | case PixelFormat::BC5_UNORM: | ||
| 74 | return "BC5_UNORM"; | ||
| 75 | case PixelFormat::BC5_SNORM: | ||
| 76 | return "BC5_SNORM"; | ||
| 77 | case PixelFormat::BC7_UNORM: | ||
| 78 | return "BC7_UNORM"; | ||
| 79 | case PixelFormat::BC6H_UFLOAT: | ||
| 80 | return "BC6H_UFLOAT"; | ||
| 81 | case PixelFormat::BC6H_SFLOAT: | ||
| 82 | return "BC6H_SFLOAT"; | ||
| 83 | case PixelFormat::ASTC_2D_4X4_UNORM: | ||
| 84 | return "ASTC_2D_4X4_UNORM"; | ||
| 85 | case PixelFormat::B8G8R8A8_UNORM: | ||
| 86 | return "B8G8R8A8_UNORM"; | ||
| 87 | case PixelFormat::R32G32B32A32_FLOAT: | ||
| 88 | return "R32G32B32A32_FLOAT"; | ||
| 89 | case PixelFormat::R32G32B32A32_SINT: | ||
| 90 | return "R32G32B32A32_SINT"; | ||
| 91 | case PixelFormat::R32G32_FLOAT: | ||
| 92 | return "R32G32_FLOAT"; | ||
| 93 | case PixelFormat::R32G32_SINT: | ||
| 94 | return "R32G32_SINT"; | ||
| 95 | case PixelFormat::R32_FLOAT: | ||
| 96 | return "R32_FLOAT"; | ||
| 97 | case PixelFormat::R16_FLOAT: | ||
| 98 | return "R16_FLOAT"; | ||
| 99 | case PixelFormat::R16_UNORM: | ||
| 100 | return "R16_UNORM"; | ||
| 101 | case PixelFormat::R16_SNORM: | ||
| 102 | return "R16_SNORM"; | ||
| 103 | case PixelFormat::R16_UINT: | ||
| 104 | return "R16_UINT"; | ||
| 105 | case PixelFormat::R16_SINT: | ||
| 106 | return "R16_SINT"; | ||
| 107 | case PixelFormat::R16G16_UNORM: | ||
| 108 | return "R16G16_UNORM"; | ||
| 109 | case PixelFormat::R16G16_FLOAT: | ||
| 110 | return "R16G16_FLOAT"; | ||
| 111 | case PixelFormat::R16G16_UINT: | ||
| 112 | return "R16G16_UINT"; | ||
| 113 | case PixelFormat::R16G16_SINT: | ||
| 114 | return "R16G16_SINT"; | ||
| 115 | case PixelFormat::R16G16_SNORM: | ||
| 116 | return "R16G16_SNORM"; | ||
| 117 | case PixelFormat::R32G32B32_FLOAT: | ||
| 118 | return "R32G32B32_FLOAT"; | ||
| 119 | case PixelFormat::A8B8G8R8_SRGB: | ||
| 120 | return "A8B8G8R8_SRGB"; | ||
| 121 | case PixelFormat::R8G8_UNORM: | ||
| 122 | return "R8G8_UNORM"; | ||
| 123 | case PixelFormat::R8G8_SNORM: | ||
| 124 | return "R8G8_SNORM"; | ||
| 125 | case PixelFormat::R8G8_SINT: | ||
| 126 | return "R8G8_SINT"; | ||
| 127 | case PixelFormat::R8G8_UINT: | ||
| 128 | return "R8G8_UINT"; | ||
| 129 | case PixelFormat::R32G32_UINT: | ||
| 130 | return "R32G32_UINT"; | ||
| 131 | case PixelFormat::R16G16B16X16_FLOAT: | ||
| 132 | return "R16G16B16X16_FLOAT"; | ||
| 133 | case PixelFormat::R32_UINT: | ||
| 134 | return "R32_UINT"; | ||
| 135 | case PixelFormat::R32_SINT: | ||
| 136 | return "R32_SINT"; | ||
| 137 | case PixelFormat::ASTC_2D_8X8_UNORM: | ||
| 138 | return "ASTC_2D_8X8_UNORM"; | ||
| 139 | case PixelFormat::ASTC_2D_8X5_UNORM: | ||
| 140 | return "ASTC_2D_8X5_UNORM"; | ||
| 141 | case PixelFormat::ASTC_2D_5X4_UNORM: | ||
| 142 | return "ASTC_2D_5X4_UNORM"; | ||
| 143 | case PixelFormat::B8G8R8A8_SRGB: | ||
| 144 | return "B8G8R8A8_SRGB"; | ||
| 145 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 146 | return "BC1_RGBA_SRGB"; | ||
| 147 | case PixelFormat::BC2_SRGB: | ||
| 148 | return "BC2_SRGB"; | ||
| 149 | case PixelFormat::BC3_SRGB: | ||
| 150 | return "BC3_SRGB"; | ||
| 151 | case PixelFormat::BC7_SRGB: | ||
| 152 | return "BC7_SRGB"; | ||
| 153 | case PixelFormat::A4B4G4R4_UNORM: | ||
| 154 | return "A4B4G4R4_UNORM"; | ||
| 155 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 156 | return "ASTC_2D_4X4_SRGB"; | ||
| 157 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 158 | return "ASTC_2D_8X8_SRGB"; | ||
| 159 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 160 | return "ASTC_2D_8X5_SRGB"; | ||
| 161 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 162 | return "ASTC_2D_5X4_SRGB"; | ||
| 163 | case PixelFormat::ASTC_2D_5X5_UNORM: | ||
| 164 | return "ASTC_2D_5X5_UNORM"; | ||
| 165 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 166 | return "ASTC_2D_5X5_SRGB"; | ||
| 167 | case PixelFormat::ASTC_2D_10X8_UNORM: | ||
| 168 | return "ASTC_2D_10X8_UNORM"; | ||
| 169 | case PixelFormat::ASTC_2D_10X8_SRGB: | ||
| 170 | return "ASTC_2D_10X8_SRGB"; | ||
| 171 | case PixelFormat::ASTC_2D_6X6_UNORM: | ||
| 172 | return "ASTC_2D_6X6_UNORM"; | ||
| 173 | case PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 174 | return "ASTC_2D_6X6_SRGB"; | ||
| 175 | case PixelFormat::ASTC_2D_10X10_UNORM: | ||
| 176 | return "ASTC_2D_10X10_UNORM"; | ||
| 177 | case PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 178 | return "ASTC_2D_10X10_SRGB"; | ||
| 179 | case PixelFormat::ASTC_2D_12X12_UNORM: | ||
| 180 | return "ASTC_2D_12X12_UNORM"; | ||
| 181 | case PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 182 | return "ASTC_2D_12X12_SRGB"; | ||
| 183 | case PixelFormat::ASTC_2D_8X6_UNORM: | ||
| 184 | return "ASTC_2D_8X6_UNORM"; | ||
| 185 | case PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 186 | return "ASTC_2D_8X6_SRGB"; | ||
| 187 | case PixelFormat::ASTC_2D_6X5_UNORM: | ||
| 188 | return "ASTC_2D_6X5_UNORM"; | ||
| 189 | case PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 190 | return "ASTC_2D_6X5_SRGB"; | ||
| 191 | case PixelFormat::E5B9G9R9_FLOAT: | ||
| 192 | return "E5B9G9R9_FLOAT"; | ||
| 193 | case PixelFormat::D32_FLOAT: | ||
| 194 | return "D32_FLOAT"; | ||
| 195 | case PixelFormat::D16_UNORM: | ||
| 196 | return "D16_UNORM"; | ||
| 197 | case PixelFormat::D24_UNORM_S8_UINT: | ||
| 198 | return "D24_UNORM_S8_UINT"; | ||
| 199 | case PixelFormat::S8_UINT_D24_UNORM: | ||
| 200 | return "S8_UINT_D24_UNORM"; | ||
| 201 | case PixelFormat::D32_FLOAT_S8_UINT: | ||
| 202 | return "D32_FLOAT_S8_UINT"; | ||
| 203 | case PixelFormat::MaxDepthStencilFormat: | ||
| 204 | case PixelFormat::Invalid: | ||
| 205 | return "Invalid"; | ||
| 206 | } | ||
| 207 | return "Invalid"; | ||
| 208 | }(); | ||
| 209 | return formatter<string_view>::format(name, ctx); | ||
| 210 | } | ||
| 211 | }; | ||
| 212 | |||
| 213 | template <> | ||
| 214 | struct fmt::formatter<VideoCommon::ImageType> : fmt::formatter<fmt::string_view> { | ||
| 215 | template <typename FormatContext> | ||
| 216 | auto format(VideoCommon::ImageType type, FormatContext& ctx) { | ||
| 217 | const string_view name = [type] { | ||
| 218 | using VideoCommon::ImageType; | ||
| 219 | switch (type) { | ||
| 220 | case ImageType::e1D: | ||
| 221 | return "1D"; | ||
| 222 | case ImageType::e2D: | ||
| 223 | return "2D"; | ||
| 224 | case ImageType::e3D: | ||
| 225 | return "3D"; | ||
| 226 | case ImageType::Linear: | ||
| 227 | return "Linear"; | ||
| 228 | case ImageType::Buffer: | ||
| 229 | return "Buffer"; | ||
| 230 | } | ||
| 231 | return "Invalid"; | ||
| 232 | }(); | ||
| 233 | return formatter<string_view>::format(name, ctx); | ||
| 234 | } | ||
| 235 | }; | ||
| 236 | |||
| 237 | template <> | ||
| 238 | struct fmt::formatter<VideoCommon::Extent3D> { | ||
| 239 | constexpr auto parse(fmt::format_parse_context& ctx) { | ||
| 240 | return ctx.begin(); | ||
| 241 | } | ||
| 242 | |||
| 243 | template <typename FormatContext> | ||
| 244 | auto format(const VideoCommon::Extent3D& extent, FormatContext& ctx) { | ||
| 245 | return fmt::format_to(ctx.out(), "{{{}, {}, {}}}", extent.width, extent.height, | ||
| 246 | extent.depth); | ||
| 247 | } | ||
| 248 | }; | ||
| 249 | |||
| 250 | namespace VideoCommon { | ||
| 251 | |||
| 252 | struct ImageBase; | ||
| 253 | struct ImageViewBase; | ||
| 254 | struct RenderTargets; | ||
| 255 | |||
| 256 | [[nodiscard]] std::string Name(const ImageBase& image); | ||
| 257 | |||
| 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view, | ||
| 259 | std::optional<ImageViewType> type = std::nullopt); | ||
| 260 | |||
| 261 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); | ||
| 262 | |||
| 263 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp new file mode 100644 index 000000000..959b3f115 --- /dev/null +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/formatter.h" | ||
| 13 | #include "video_core/texture_cache/image_base.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/util.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::DefaultBlockHeight; | ||
| 20 | using VideoCore::Surface::DefaultBlockWidth; | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | /// Returns the base layer and mip level offset | ||
| 24 | [[nodiscard]] std::pair<s32, s32> LayerMipOffset(s32 diff, u32 layer_stride) { | ||
| 25 | if (layer_stride == 0) { | ||
| 26 | return {0, diff}; | ||
| 27 | } else { | ||
| 28 | return {diff / layer_stride, diff % layer_stride}; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | [[nodiscard]] bool ValidateLayers(const SubresourceLayers& layers, const ImageInfo& info) { | ||
| 33 | return layers.base_level < info.resources.levels && | ||
| 34 | layers.base_layer + layers.num_layers <= info.resources.layers; | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] bool ValidateCopy(const ImageCopy& copy, const ImageInfo& dst, const ImageInfo& src) { | ||
| 38 | const Extent3D src_size = MipSize(src.size, copy.src_subresource.base_level); | ||
| 39 | const Extent3D dst_size = MipSize(dst.size, copy.dst_subresource.base_level); | ||
| 40 | if (!ValidateLayers(copy.src_subresource, src)) { | ||
| 41 | return false; | ||
| 42 | } | ||
| 43 | if (!ValidateLayers(copy.dst_subresource, dst)) { | ||
| 44 | return false; | ||
| 45 | } | ||
| 46 | if (copy.src_offset.x + copy.extent.width > src_size.width || | ||
| 47 | copy.src_offset.y + copy.extent.height > src_size.height || | ||
| 48 | copy.src_offset.z + copy.extent.depth > src_size.depth) { | ||
| 49 | return false; | ||
| 50 | } | ||
| 51 | if (copy.dst_offset.x + copy.extent.width > dst_size.width || | ||
| 52 | copy.dst_offset.y + copy.extent.height > dst_size.height || | ||
| 53 | copy.dst_offset.z + copy.extent.depth > dst_size.depth) { | ||
| 54 | return false; | ||
| 55 | } | ||
| 56 | return true; | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) | ||
| 61 | : info{info_}, guest_size_bytes{CalculateGuestSizeInBytes(info)}, | ||
| 62 | unswizzled_size_bytes{CalculateUnswizzledSizeBytes(info)}, | ||
| 63 | converted_size_bytes{CalculateConvertedSizeBytes(info)}, gpu_addr{gpu_addr_}, | ||
| 64 | cpu_addr{cpu_addr_}, cpu_addr_end{cpu_addr + guest_size_bytes}, | ||
| 65 | mip_level_offsets{CalculateMipLevelOffsets(info)} { | ||
| 66 | if (info.type == ImageType::e3D) { | ||
| 67 | slice_offsets = CalculateSliceOffsets(info); | ||
| 68 | slice_subresources = CalculateSliceSubresources(info); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { | ||
| 73 | if (other_addr < gpu_addr) { | ||
| 74 | // Subresource address can't be lower than the base | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 diff = static_cast<u32>(other_addr - gpu_addr); | ||
| 78 | if (diff > guest_size_bytes) { | ||
| 79 | // This can happen when two CPU addresses are used for different GPU addresses | ||
| 80 | return std::nullopt; | ||
| 81 | } | ||
| 82 | if (info.type != ImageType::e3D) { | ||
| 83 | const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); | ||
| 84 | const auto end = mip_level_offsets.begin() + info.resources.levels; | ||
| 85 | const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); | ||
| 86 | if (layer > info.resources.layers || it == end) { | ||
| 87 | return std::nullopt; | ||
| 88 | } | ||
| 89 | return SubresourceBase{ | ||
| 90 | .level = static_cast<s32>(std::distance(mip_level_offsets.begin(), it)), | ||
| 91 | .layer = layer, | ||
| 92 | }; | ||
| 93 | } else { | ||
| 94 | // TODO: Consider using binary_search after a threshold | ||
| 95 | const auto it = std::ranges::find(slice_offsets, diff); | ||
| 96 | if (it == slice_offsets.cend()) { | ||
| 97 | return std::nullopt; | ||
| 98 | } | ||
| 99 | return slice_subresources[std::distance(slice_offsets.begin(), it)]; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | ImageViewId ImageBase::FindView(const ImageViewInfo& view_info) const noexcept { | ||
| 104 | const auto it = std::ranges::find(image_view_infos, view_info); | ||
| 105 | if (it == image_view_infos.end()) { | ||
| 106 | return ImageViewId{}; | ||
| 107 | } | ||
| 108 | return image_view_ids[std::distance(image_view_infos.begin(), it)]; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id) { | ||
| 112 | image_view_infos.push_back(view_info); | ||
| 113 | image_view_ids.push_back(image_view_id); | ||
| 114 | } | ||
| 115 | |||
| 116 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { | ||
| 117 | static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 118 | ASSERT(lhs.info.type == rhs.info.type); | ||
| 119 | std::optional<SubresourceBase> base; | ||
| 120 | if (lhs.info.type == ImageType::Linear) { | ||
| 121 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 122 | } else { | ||
| 123 | // We are passing relaxed formats as an option, having broken views or not won't matter | ||
| 124 | static constexpr bool broken_views = false; | ||
| 125 | base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views); | ||
| 126 | } | ||
| 127 | if (!base) { | ||
| 128 | LOG_ERROR(HW_GPU, "Image alias should have been flipped"); | ||
| 129 | return; | ||
| 130 | } | ||
| 131 | const PixelFormat lhs_format = lhs.info.format; | ||
| 132 | const PixelFormat rhs_format = rhs.info.format; | ||
| 133 | const Extent2D lhs_block{ | ||
| 134 | .width = DefaultBlockWidth(lhs_format), | ||
| 135 | .height = DefaultBlockHeight(lhs_format), | ||
| 136 | }; | ||
| 137 | const Extent2D rhs_block{ | ||
| 138 | .width = DefaultBlockWidth(rhs_format), | ||
| 139 | .height = DefaultBlockHeight(rhs_format), | ||
| 140 | }; | ||
| 141 | const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; | ||
| 142 | const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; | ||
| 143 | if (is_lhs_compressed && is_rhs_compressed) { | ||
| 144 | LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | const s32 lhs_mips = lhs.info.resources.levels; | ||
| 148 | const s32 rhs_mips = rhs.info.resources.levels; | ||
| 149 | const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); | ||
| 150 | AliasedImage lhs_alias; | ||
| 151 | AliasedImage rhs_alias; | ||
| 152 | lhs_alias.id = rhs_id; | ||
| 153 | rhs_alias.id = lhs_id; | ||
| 154 | lhs_alias.copies.reserve(num_mips); | ||
| 155 | rhs_alias.copies.reserve(num_mips); | ||
| 156 | for (s32 mip_level = 0; mip_level < num_mips; ++mip_level) { | ||
| 157 | Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); | ||
| 158 | Extent3D rhs_size = MipSize(rhs.info.size, mip_level); | ||
| 159 | if (is_lhs_compressed) { | ||
| 160 | lhs_size.width /= lhs_block.width; | ||
| 161 | lhs_size.height /= lhs_block.height; | ||
| 162 | } | ||
| 163 | if (is_rhs_compressed) { | ||
| 164 | rhs_size.width /= rhs_block.width; | ||
| 165 | rhs_size.height /= rhs_block.height; | ||
| 166 | } | ||
| 167 | const Extent3D copy_size{ | ||
| 168 | .width = std::min(lhs_size.width, rhs_size.width), | ||
| 169 | .height = std::min(lhs_size.height, rhs_size.height), | ||
| 170 | .depth = std::min(lhs_size.depth, rhs_size.depth), | ||
| 171 | }; | ||
| 172 | if (copy_size.width == 0 || copy_size.height == 0) { | ||
| 173 | LOG_WARNING(HW_GPU, "Copy size is smaller than block size. Mip cannot be aliased."); | ||
| 174 | continue; | ||
| 175 | } | ||
| 176 | const bool is_lhs_3d = lhs.info.type == ImageType::e3D; | ||
| 177 | const bool is_rhs_3d = rhs.info.type == ImageType::e3D; | ||
| 178 | const Offset3D lhs_offset{0, 0, 0}; | ||
| 179 | const Offset3D rhs_offset{0, 0, is_rhs_3d ? base->layer : 0}; | ||
| 180 | const s32 lhs_layers = is_lhs_3d ? 1 : lhs.info.resources.layers - base->layer; | ||
| 181 | const s32 rhs_layers = is_rhs_3d ? 1 : rhs.info.resources.layers; | ||
| 182 | const s32 num_layers = std::min(lhs_layers, rhs_layers); | ||
| 183 | const SubresourceLayers lhs_subresource{ | ||
| 184 | .base_level = mip_level, | ||
| 185 | .base_layer = 0, | ||
| 186 | .num_layers = num_layers, | ||
| 187 | }; | ||
| 188 | const SubresourceLayers rhs_subresource{ | ||
| 189 | .base_level = base->level + mip_level, | ||
| 190 | .base_layer = is_rhs_3d ? 0 : base->layer, | ||
| 191 | .num_layers = num_layers, | ||
| 192 | }; | ||
| 193 | [[maybe_unused]] const ImageCopy& to_lhs_copy = lhs_alias.copies.emplace_back(ImageCopy{ | ||
| 194 | .src_subresource = lhs_subresource, | ||
| 195 | .dst_subresource = rhs_subresource, | ||
| 196 | .src_offset = lhs_offset, | ||
| 197 | .dst_offset = rhs_offset, | ||
| 198 | .extent = copy_size, | ||
| 199 | }); | ||
| 200 | [[maybe_unused]] const ImageCopy& to_rhs_copy = rhs_alias.copies.emplace_back(ImageCopy{ | ||
| 201 | .src_subresource = rhs_subresource, | ||
| 202 | .dst_subresource = lhs_subresource, | ||
| 203 | .src_offset = rhs_offset, | ||
| 204 | .dst_offset = lhs_offset, | ||
| 205 | .extent = copy_size, | ||
| 206 | }); | ||
| 207 | ASSERT_MSG(ValidateCopy(to_lhs_copy, lhs.info, rhs.info), "Invalid RHS to LHS copy"); | ||
| 208 | ASSERT_MSG(ValidateCopy(to_rhs_copy, rhs.info, lhs.info), "Invalid LHS to RHS copy"); | ||
| 209 | } | ||
| 210 | ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty()); | ||
| 211 | if (lhs_alias.copies.empty()) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | lhs.aliased_images.push_back(std::move(lhs_alias)); | ||
| 215 | rhs.aliased_images.push_back(std::move(rhs_alias)); | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h new file mode 100644 index 000000000..b7f3b7e43 --- /dev/null +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/texture_cache/image_info.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | enum class ImageFlagBits : u32 { | ||
| 20 | AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU | ||
| 21 | Converted = 1 << 1, ///< Guest format is not supported natively and it has to be converted | ||
| 22 | CpuModified = 1 << 2, ///< Contents have been modified from the CPU | ||
| 23 | GpuModified = 1 << 3, ///< Contents have been modified from the GPU | ||
| 24 | Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT | ||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | ||
| 26 | Registered = 1 << 6, ///< True when the image is registered | ||
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | ||
| 28 | }; | ||
| 29 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | ||
| 30 | |||
| 31 | struct ImageViewInfo; | ||
| 32 | |||
| 33 | struct AliasedImage { | ||
| 34 | std::vector<ImageCopy> copies; | ||
| 35 | ImageId id; | ||
| 36 | }; | ||
| 37 | |||
| 38 | struct ImageBase { | ||
| 39 | explicit ImageBase(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 40 | |||
| 41 | [[nodiscard]] std::optional<SubresourceBase> TryFindBase(GPUVAddr other_addr) const noexcept; | ||
| 42 | |||
| 43 | [[nodiscard]] ImageViewId FindView(const ImageViewInfo& view_info) const noexcept; | ||
| 44 | |||
| 45 | void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); | ||
| 46 | |||
| 47 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | ||
| 48 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | ||
| 49 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | ||
| 50 | } | ||
| 51 | |||
| 52 | ImageInfo info; | ||
| 53 | |||
| 54 | u32 guest_size_bytes = 0; | ||
| 55 | u32 unswizzled_size_bytes = 0; | ||
| 56 | u32 converted_size_bytes = 0; | ||
| 57 | ImageFlagBits flags = ImageFlagBits::CpuModified; | ||
| 58 | |||
| 59 | GPUVAddr gpu_addr = 0; | ||
| 60 | VAddr cpu_addr = 0; | ||
| 61 | VAddr cpu_addr_end = 0; | ||
| 62 | |||
| 63 | u64 modification_tick = 0; | ||
| 64 | u64 frame_tick = 0; | ||
| 65 | |||
| 66 | std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; | ||
| 67 | |||
| 68 | std::vector<ImageViewInfo> image_view_infos; | ||
| 69 | std::vector<ImageViewId> image_view_ids; | ||
| 70 | |||
| 71 | std::vector<u32> slice_offsets; | ||
| 72 | std::vector<SubresourceBase> slice_subresources; | ||
| 73 | |||
| 74 | std::vector<AliasedImage> aliased_images; | ||
| 75 | }; | ||
| 76 | |||
| 77 | struct ImageAllocBase { | ||
| 78 | std::vector<ImageId> images; | ||
| 79 | }; | ||
| 80 | |||
| 81 | void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id); | ||
| 82 | |||
| 83 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp new file mode 100644 index 000000000..64fd7010a --- /dev/null +++ b/src/video_core/texture_cache/image_info.cpp | |||
| @@ -0,0 +1,189 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/surface.h" | ||
| 7 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 8 | #include "video_core/texture_cache/image_info.h" | ||
| 9 | #include "video_core/texture_cache/samples_helper.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | #include "video_core/texture_cache/util.h" | ||
| 12 | #include "video_core/textures/texture.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using Tegra::Texture::TextureType; | ||
| 17 | using Tegra::Texture::TICEntry; | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | ImageInfo::ImageInfo(const TICEntry& config) noexcept { | ||
| 21 | format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | ||
| 22 | config.a_type, config.srgb_conversion); | ||
| 23 | num_samples = NumSamples(config.msaa_mode); | ||
| 24 | resources.levels = config.max_mip_level + 1; | ||
| 25 | if (config.IsPitchLinear()) { | ||
| 26 | pitch = config.Pitch(); | ||
| 27 | } else if (config.IsBlockLinear()) { | ||
| 28 | block = Extent3D{ | ||
| 29 | .width = config.block_width, | ||
| 30 | .height = config.block_height, | ||
| 31 | .depth = config.block_depth, | ||
| 32 | }; | ||
| 33 | } | ||
| 34 | tile_width_spacing = config.tile_width_spacing; | ||
| 35 | if (config.texture_type != TextureType::Texture2D && | ||
| 36 | config.texture_type != TextureType::Texture2DNoMipmap) { | ||
| 37 | ASSERT(!config.IsPitchLinear()); | ||
| 38 | } | ||
| 39 | switch (config.texture_type) { | ||
| 40 | case TextureType::Texture1D: | ||
| 41 | ASSERT(config.BaseLayer() == 0); | ||
| 42 | type = ImageType::e1D; | ||
| 43 | size.width = config.Width(); | ||
| 44 | break; | ||
| 45 | case TextureType::Texture1DArray: | ||
| 46 | UNIMPLEMENTED_IF(config.BaseLayer() != 0); | ||
| 47 | type = ImageType::e1D; | ||
| 48 | size.width = config.Width(); | ||
| 49 | resources.layers = config.Depth(); | ||
| 50 | break; | ||
| 51 | case TextureType::Texture2D: | ||
| 52 | case TextureType::Texture2DNoMipmap: | ||
| 53 | ASSERT(config.Depth() == 1); | ||
| 54 | type = config.IsPitchLinear() ? ImageType::Linear : ImageType::e2D; | ||
| 55 | size.width = config.Width(); | ||
| 56 | size.height = config.Height(); | ||
| 57 | resources.layers = config.BaseLayer() + 1; | ||
| 58 | break; | ||
| 59 | case TextureType::Texture2DArray: | ||
| 60 | type = ImageType::e2D; | ||
| 61 | size.width = config.Width(); | ||
| 62 | size.height = config.Height(); | ||
| 63 | resources.layers = config.BaseLayer() + config.Depth(); | ||
| 64 | break; | ||
| 65 | case TextureType::TextureCubemap: | ||
| 66 | ASSERT(config.Depth() == 1); | ||
| 67 | type = ImageType::e2D; | ||
| 68 | size.width = config.Width(); | ||
| 69 | size.height = config.Height(); | ||
| 70 | resources.layers = config.BaseLayer() + 6; | ||
| 71 | break; | ||
| 72 | case TextureType::TextureCubeArray: | ||
| 73 | UNIMPLEMENTED_IF(config.load_store_hint != 0); | ||
| 74 | type = ImageType::e2D; | ||
| 75 | size.width = config.Width(); | ||
| 76 | size.height = config.Height(); | ||
| 77 | resources.layers = config.BaseLayer() + config.Depth() * 6; | ||
| 78 | break; | ||
| 79 | case TextureType::Texture3D: | ||
| 80 | ASSERT(config.BaseLayer() == 0); | ||
| 81 | type = ImageType::e3D; | ||
| 82 | size.width = config.Width(); | ||
| 83 | size.height = config.Height(); | ||
| 84 | size.depth = config.Depth(); | ||
| 85 | break; | ||
| 86 | case TextureType::Texture1DBuffer: | ||
| 87 | type = ImageType::Buffer; | ||
| 88 | size.width = config.Width(); | ||
| 89 | break; | ||
| 90 | default: | ||
| 91 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | if (type != ImageType::Linear) { | ||
| 95 | // FIXME: Call this without passing *this | ||
| 96 | layer_stride = CalculateLayerStride(*this); | ||
| 97 | maybe_unaligned_layer_stride = CalculateLayerSize(*this); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept { | ||
| 102 | const auto& rt = regs.rt[index]; | ||
| 103 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(rt.format); | ||
| 104 | if (rt.tile_mode.is_pitch_linear) { | ||
| 105 | ASSERT(rt.tile_mode.is_3d == 0); | ||
| 106 | type = ImageType::Linear; | ||
| 107 | pitch = rt.width; | ||
| 108 | size = Extent3D{ | ||
| 109 | .width = pitch / BytesPerBlock(format), | ||
| 110 | .height = rt.height, | ||
| 111 | .depth = 1, | ||
| 112 | }; | ||
| 113 | return; | ||
| 114 | } | ||
| 115 | size.width = rt.width; | ||
| 116 | size.height = rt.height; | ||
| 117 | layer_stride = rt.layer_stride * 4; | ||
| 118 | maybe_unaligned_layer_stride = layer_stride; | ||
| 119 | num_samples = NumSamples(regs.multisample_mode); | ||
| 120 | block = Extent3D{ | ||
| 121 | .width = rt.tile_mode.block_width, | ||
| 122 | .height = rt.tile_mode.block_height, | ||
| 123 | .depth = rt.tile_mode.block_depth, | ||
| 124 | }; | ||
| 125 | if (rt.tile_mode.is_3d) { | ||
| 126 | type = ImageType::e3D; | ||
| 127 | size.depth = rt.depth; | ||
| 128 | } else { | ||
| 129 | type = ImageType::e2D; | ||
| 130 | resources.layers = rt.depth; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | ||
| 135 | format = VideoCore::Surface::PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 136 | size.width = regs.zeta_width; | ||
| 137 | size.height = regs.zeta_height; | ||
| 138 | resources.levels = 1; | ||
| 139 | layer_stride = regs.zeta.layer_stride * 4; | ||
| 140 | maybe_unaligned_layer_stride = layer_stride; | ||
| 141 | num_samples = NumSamples(regs.multisample_mode); | ||
| 142 | block = Extent3D{ | ||
| 143 | .width = regs.zeta.tile_mode.block_width, | ||
| 144 | .height = regs.zeta.tile_mode.block_height, | ||
| 145 | .depth = regs.zeta.tile_mode.block_depth, | ||
| 146 | }; | ||
| 147 | if (regs.zeta.tile_mode.is_pitch_linear) { | ||
| 148 | ASSERT(regs.zeta.tile_mode.is_3d == 0); | ||
| 149 | type = ImageType::Linear; | ||
| 150 | pitch = size.width * BytesPerBlock(format); | ||
| 151 | } else if (regs.zeta.tile_mode.is_3d) { | ||
| 152 | ASSERT(regs.zeta.tile_mode.is_pitch_linear == 0); | ||
| 153 | type = ImageType::e3D; | ||
| 154 | size.depth = regs.zeta_depth; | ||
| 155 | } else { | ||
| 156 | type = ImageType::e2D; | ||
| 157 | resources.layers = regs.zeta_depth; | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | ||
| 162 | UNIMPLEMENTED_IF_MSG(config.layer != 0, "Surface layer is not zero"); | ||
| 163 | format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(config.format); | ||
| 164 | if (config.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch) { | ||
| 165 | type = ImageType::Linear; | ||
| 166 | size = Extent3D{ | ||
| 167 | .width = config.pitch / VideoCore::Surface::BytesPerBlock(format), | ||
| 168 | .height = config.height, | ||
| 169 | .depth = 1, | ||
| 170 | }; | ||
| 171 | pitch = config.pitch; | ||
| 172 | } else { | ||
| 173 | type = config.block_depth > 0 ? ImageType::e3D : ImageType::e2D; | ||
| 174 | block = Extent3D{ | ||
| 175 | .width = config.block_width, | ||
| 176 | .height = config.block_height, | ||
| 177 | .depth = config.block_depth, | ||
| 178 | }; | ||
| 179 | // 3D blits with more than once slice are not implemented for now | ||
| 180 | // Render to individual slices | ||
| 181 | size = Extent3D{ | ||
| 182 | .width = config.width, | ||
| 183 | .height = config.height, | ||
| 184 | .depth = 1, | ||
| 185 | }; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_info.h b/src/video_core/texture_cache/image_info.h new file mode 100644 index 000000000..5049fc36e --- /dev/null +++ b/src/video_core/texture_cache/image_info.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/fermi_2d.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/surface.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | using Tegra::Texture::TICEntry; | ||
| 15 | using VideoCore::Surface::PixelFormat; | ||
| 16 | |||
| 17 | struct ImageInfo { | ||
| 18 | explicit ImageInfo() = default; | ||
| 19 | explicit ImageInfo(const TICEntry& config) noexcept; | ||
| 20 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; | ||
| 21 | explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; | ||
| 22 | explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; | ||
| 23 | |||
| 24 | PixelFormat format = PixelFormat::Invalid; | ||
| 25 | ImageType type = ImageType::e1D; | ||
| 26 | SubresourceExtent resources; | ||
| 27 | Extent3D size{1, 1, 1}; | ||
| 28 | union { | ||
| 29 | Extent3D block{0, 0, 0}; | ||
| 30 | u32 pitch; | ||
| 31 | }; | ||
| 32 | u32 layer_stride = 0; | ||
| 33 | u32 maybe_unaligned_layer_stride = 0; | ||
| 34 | u32 num_samples = 1; | ||
| 35 | u32 tile_width_spacing = 0; | ||
| 36 | }; | ||
| 37 | |||
| 38 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_base.cpp b/src/video_core/texture_cache/image_view_base.cpp new file mode 100644 index 000000000..18f72e508 --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/compatible_formats.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/formatter.h" | ||
| 12 | #include "video_core/texture_cache/image_info.h" | ||
| 13 | #include "video_core/texture_cache/image_view_base.h" | ||
| 14 | #include "video_core/texture_cache/image_view_info.h" | ||
| 15 | #include "video_core/texture_cache/types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | ||
| 20 | ImageId image_id_) | ||
| 21 | : image_id{image_id_}, format{info.format}, type{info.type}, range{info.range}, | ||
| 22 | size{ | ||
| 23 | .width = std::max(image_info.size.width >> range.base.level, 1u), | ||
| 24 | .height = std::max(image_info.size.height >> range.base.level, 1u), | ||
| 25 | .depth = std::max(image_info.size.depth >> range.base.level, 1u), | ||
| 26 | } { | ||
| 27 | ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false), | ||
| 28 | "Image view format {} is incompatible with image format {}", info.format, | ||
| 29 | image_info.format); | ||
| 30 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 31 | if (image_info.type == ImageType::Linear && is_async) { | ||
| 32 | flags |= ImageViewFlagBits::PreemtiveDownload; | ||
| 33 | } | ||
| 34 | if (image_info.type == ImageType::e3D && info.type != ImageViewType::e3D) { | ||
| 35 | flags |= ImageViewFlagBits::Slice; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | ImageViewBase::ImageViewBase(const NullImageParams&) {} | ||
| 40 | |||
| 41 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_base.h b/src/video_core/texture_cache/image_view_base.h new file mode 100644 index 000000000..73954167e --- /dev/null +++ b/src/video_core/texture_cache/image_view_base.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | #include "video_core/texture_cache/types.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | using VideoCore::Surface::PixelFormat; | ||
| 14 | |||
| 15 | struct ImageViewInfo; | ||
| 16 | struct ImageInfo; | ||
| 17 | |||
| 18 | struct NullImageParams {}; | ||
| 19 | |||
| 20 | enum class ImageViewFlagBits : u16 { | ||
| 21 | PreemtiveDownload = 1 << 0, | ||
| 22 | Strong = 1 << 1, | ||
| 23 | Slice = 1 << 2, | ||
| 24 | }; | ||
| 25 | DECLARE_ENUM_FLAG_OPERATORS(ImageViewFlagBits) | ||
| 26 | |||
| 27 | struct ImageViewBase { | ||
| 28 | explicit ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_info, | ||
| 29 | ImageId image_id); | ||
| 30 | explicit ImageViewBase(const NullImageParams&); | ||
| 31 | |||
| 32 | [[nodiscard]] bool IsBuffer() const noexcept { | ||
| 33 | return type == ImageViewType::Buffer; | ||
| 34 | } | ||
| 35 | |||
| 36 | ImageId image_id{}; | ||
| 37 | PixelFormat format{}; | ||
| 38 | ImageViewType type{}; | ||
| 39 | SubresourceRange range; | ||
| 40 | Extent3D size{0, 0, 0}; | ||
| 41 | ImageViewFlagBits flags{}; | ||
| 42 | |||
| 43 | u64 invalidation_tick = 0; | ||
| 44 | u64 modification_tick = 0; | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp new file mode 100644 index 000000000..faf5b151f --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.cpp | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "video_core/texture_cache/image_view_info.h" | ||
| 9 | #include "video_core/texture_cache/texture_cache.h" | ||
| 10 | #include "video_core/texture_cache/types.h" | ||
| 11 | #include "video_core/textures/texture.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max(); | ||
| 18 | |||
| 19 | [[nodiscard]] u8 CastSwizzle(SwizzleSource source) { | ||
| 20 | const u8 casted = static_cast<u8>(source); | ||
| 21 | ASSERT(static_cast<SwizzleSource>(casted) == source); | ||
| 22 | return casted; | ||
| 23 | } | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | ImageViewInfo::ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept | ||
| 28 | : format{PixelFormatFromTIC(config)}, x_source{CastSwizzle(config.x_source)}, | ||
| 29 | y_source{CastSwizzle(config.y_source)}, z_source{CastSwizzle(config.z_source)}, | ||
| 30 | w_source{CastSwizzle(config.w_source)} { | ||
| 31 | range.base = SubresourceBase{ | ||
| 32 | .level = static_cast<s32>(config.res_min_mip_level), | ||
| 33 | .layer = base_layer, | ||
| 34 | }; | ||
| 35 | range.extent.levels = config.res_max_mip_level - config.res_min_mip_level + 1; | ||
| 36 | |||
| 37 | switch (config.texture_type) { | ||
| 38 | case TextureType::Texture1D: | ||
| 39 | ASSERT(config.Height() == 1); | ||
| 40 | ASSERT(config.Depth() == 1); | ||
| 41 | type = ImageViewType::e1D; | ||
| 42 | break; | ||
| 43 | case TextureType::Texture2D: | ||
| 44 | case TextureType::Texture2DNoMipmap: | ||
| 45 | ASSERT(config.Depth() == 1); | ||
| 46 | type = config.normalized_coords ? ImageViewType::e2D : ImageViewType::Rect; | ||
| 47 | break; | ||
| 48 | case TextureType::Texture3D: | ||
| 49 | type = ImageViewType::e3D; | ||
| 50 | break; | ||
| 51 | case TextureType::TextureCubemap: | ||
| 52 | ASSERT(config.Depth() == 1); | ||
| 53 | type = ImageViewType::Cube; | ||
| 54 | range.extent.layers = 6; | ||
| 55 | break; | ||
| 56 | case TextureType::Texture1DArray: | ||
| 57 | type = ImageViewType::e1DArray; | ||
| 58 | range.extent.layers = config.Depth(); | ||
| 59 | break; | ||
| 60 | case TextureType::Texture2DArray: | ||
| 61 | type = ImageViewType::e2DArray; | ||
| 62 | range.extent.layers = config.Depth(); | ||
| 63 | break; | ||
| 64 | case TextureType::Texture1DBuffer: | ||
| 65 | type = ImageViewType::Buffer; | ||
| 66 | break; | ||
| 67 | case TextureType::TextureCubeArray: | ||
| 68 | type = ImageViewType::CubeArray; | ||
| 69 | range.extent.layers = config.Depth() * 6; | ||
| 70 | break; | ||
| 71 | default: | ||
| 72 | UNREACHABLE_MSG("Invalid texture_type={}", static_cast<int>(config.texture_type.Value())); | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | ImageViewInfo::ImageViewInfo(ImageViewType type_, PixelFormat format_, | ||
| 78 | SubresourceRange range_) noexcept | ||
| 79 | : type{type_}, format{format_}, range{range_}, x_source{RENDER_TARGET_SWIZZLE}, | ||
| 80 | y_source{RENDER_TARGET_SWIZZLE}, z_source{RENDER_TARGET_SWIZZLE}, | ||
| 81 | w_source{RENDER_TARGET_SWIZZLE} {} | ||
| 82 | |||
| 83 | bool ImageViewInfo::IsRenderTarget() const noexcept { | ||
| 84 | return x_source == RENDER_TARGET_SWIZZLE && y_source == RENDER_TARGET_SWIZZLE && | ||
| 85 | z_source == RENDER_TARGET_SWIZZLE && w_source == RENDER_TARGET_SWIZZLE; | ||
| 86 | } | ||
| 87 | |||
| 88 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_view_info.h b/src/video_core/texture_cache/image_view_info.h new file mode 100644 index 000000000..0c1f99117 --- /dev/null +++ b/src/video_core/texture_cache/image_view_info.h | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | #include "video_core/textures/texture.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using Tegra::Texture::SwizzleSource; | ||
| 17 | using Tegra::Texture::TICEntry; | ||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | /// Properties used to determine a image view | ||
| 21 | struct ImageViewInfo { | ||
| 22 | explicit ImageViewInfo() noexcept = default; | ||
| 23 | explicit ImageViewInfo(const TICEntry& config, s32 base_layer) noexcept; | ||
| 24 | explicit ImageViewInfo(ImageViewType type, PixelFormat format, | ||
| 25 | SubresourceRange range = {}) noexcept; | ||
| 26 | |||
| 27 | auto operator<=>(const ImageViewInfo&) const noexcept = default; | ||
| 28 | |||
| 29 | [[nodiscard]] bool IsRenderTarget() const noexcept; | ||
| 30 | |||
| 31 | [[nodiscard]] std::array<SwizzleSource, 4> Swizzle() const noexcept { | ||
| 32 | return std::array{ | ||
| 33 | static_cast<SwizzleSource>(x_source), | ||
| 34 | static_cast<SwizzleSource>(y_source), | ||
| 35 | static_cast<SwizzleSource>(z_source), | ||
| 36 | static_cast<SwizzleSource>(w_source), | ||
| 37 | }; | ||
| 38 | } | ||
| 39 | |||
| 40 | ImageViewType type{}; | ||
| 41 | PixelFormat format{}; | ||
| 42 | SubresourceRange range; | ||
| 43 | u8 x_source = static_cast<u8>(SwizzleSource::R); | ||
| 44 | u8 y_source = static_cast<u8>(SwizzleSource::G); | ||
| 45 | u8 z_source = static_cast<u8>(SwizzleSource::B); | ||
| 46 | u8 w_source = static_cast<u8>(SwizzleSource::A); | ||
| 47 | }; | ||
| 48 | static_assert(std::has_unique_object_representations_v<ImageViewInfo>); | ||
| 49 | |||
| 50 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h new file mode 100644 index 000000000..9b9544b07 --- /dev/null +++ b/src/video_core/texture_cache/render_targets.h | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <span> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/bit_cast.h" | ||
| 12 | #include "video_core/texture_cache/types.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | /// Framebuffer properties used to lookup a framebuffer | ||
| 17 | struct RenderTargets { | ||
| 18 | constexpr auto operator<=>(const RenderTargets&) const noexcept = default; | ||
| 19 | |||
| 20 | constexpr bool Contains(std::span<const ImageViewId> elements) const noexcept { | ||
| 21 | const auto contains = [elements](ImageViewId item) { | ||
| 22 | return std::ranges::find(elements, item) != elements.end(); | ||
| 23 | }; | ||
| 24 | return std::ranges::any_of(color_buffer_ids, contains) || contains(depth_buffer_id); | ||
| 25 | } | ||
| 26 | |||
| 27 | std::array<ImageViewId, NUM_RT> color_buffer_ids; | ||
| 28 | ImageViewId depth_buffer_id; | ||
| 29 | std::array<u8, NUM_RT> draw_buffers{}; | ||
| 30 | Extent2D size; | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
| 34 | |||
| 35 | namespace std { | ||
| 36 | |||
| 37 | template <> | ||
| 38 | struct hash<VideoCommon::RenderTargets> { | ||
| 39 | size_t operator()(const VideoCommon::RenderTargets& rt) const noexcept { | ||
| 40 | using VideoCommon::ImageViewId; | ||
| 41 | size_t value = std::hash<ImageViewId>{}(rt.depth_buffer_id); | ||
| 42 | for (const ImageViewId color_buffer_id : rt.color_buffer_ids) { | ||
| 43 | value ^= std::hash<ImageViewId>{}(color_buffer_id); | ||
| 44 | } | ||
| 45 | value ^= Common::BitCast<u64>(rt.draw_buffers); | ||
| 46 | value ^= Common::BitCast<u64>(rt.size); | ||
| 47 | return value; | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | |||
| 51 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/samples_helper.h b/src/video_core/texture_cache/samples_helper.h new file mode 100644 index 000000000..04539a43c --- /dev/null +++ b/src/video_core/texture_cache/samples_helper.h | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "video_core/textures/texture.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | [[nodiscard]] inline std::pair<int, int> SamplesLog2(int num_samples) { | ||
| 15 | switch (num_samples) { | ||
| 16 | case 1: | ||
| 17 | return {0, 0}; | ||
| 18 | case 2: | ||
| 19 | return {1, 0}; | ||
| 20 | case 4: | ||
| 21 | return {1, 1}; | ||
| 22 | case 8: | ||
| 23 | return {2, 1}; | ||
| 24 | case 16: | ||
| 25 | return {2, 2}; | ||
| 26 | } | ||
| 27 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 28 | return {1, 1}; | ||
| 29 | } | ||
| 30 | |||
| 31 | [[nodiscard]] inline int NumSamples(Tegra::Texture::MsaaMode msaa_mode) { | ||
| 32 | using Tegra::Texture::MsaaMode; | ||
| 33 | switch (msaa_mode) { | ||
| 34 | case MsaaMode::Msaa1x1: | ||
| 35 | return 1; | ||
| 36 | case MsaaMode::Msaa2x1: | ||
| 37 | case MsaaMode::Msaa2x1_D3D: | ||
| 38 | return 2; | ||
| 39 | case MsaaMode::Msaa2x2: | ||
| 40 | case MsaaMode::Msaa2x2_VC4: | ||
| 41 | case MsaaMode::Msaa2x2_VC12: | ||
| 42 | return 4; | ||
| 43 | case MsaaMode::Msaa4x2: | ||
| 44 | case MsaaMode::Msaa4x2_D3D: | ||
| 45 | case MsaaMode::Msaa4x2_VC8: | ||
| 46 | case MsaaMode::Msaa4x2_VC24: | ||
| 47 | return 8; | ||
| 48 | case MsaaMode::Msaa4x4: | ||
| 49 | return 16; | ||
| 50 | } | ||
| 51 | UNREACHABLE_MSG("Invalid MSAA mode={}", static_cast<int>(msaa_mode)); | ||
| 52 | return 1; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h new file mode 100644 index 000000000..eae3be6ea --- /dev/null +++ b/src/video_core/texture_cache/slot_vector.h | |||
| @@ -0,0 +1,156 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <concepts> | ||
| 9 | #include <numeric> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | struct SlotId { | ||
| 20 | static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max(); | ||
| 21 | |||
| 22 | constexpr auto operator<=>(const SlotId&) const noexcept = default; | ||
| 23 | |||
| 24 | constexpr explicit operator bool() const noexcept { | ||
| 25 | return index != INVALID_INDEX; | ||
| 26 | } | ||
| 27 | |||
| 28 | u32 index = INVALID_INDEX; | ||
| 29 | }; | ||
| 30 | |||
| 31 | template <class T> | ||
| 32 | requires std::is_nothrow_move_assignable_v<T>&& | ||
| 33 | std::is_nothrow_move_constructible_v<T> class SlotVector { | ||
| 34 | public: | ||
| 35 | ~SlotVector() noexcept { | ||
| 36 | size_t index = 0; | ||
| 37 | for (u64 bits : stored_bitset) { | ||
| 38 | for (size_t bit = 0; bits; ++bit, bits >>= 1) { | ||
| 39 | if ((bits & 1) != 0) { | ||
| 40 | values[index + bit].object.~T(); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | index += 64; | ||
| 44 | } | ||
| 45 | delete[] values; | ||
| 46 | } | ||
| 47 | |||
| 48 | [[nodiscard]] T& operator[](SlotId id) noexcept { | ||
| 49 | ValidateIndex(id); | ||
| 50 | return values[id.index].object; | ||
| 51 | } | ||
| 52 | |||
| 53 | [[nodiscard]] const T& operator[](SlotId id) const noexcept { | ||
| 54 | ValidateIndex(id); | ||
| 55 | return values[id.index].object; | ||
| 56 | } | ||
| 57 | |||
| 58 | template <typename... Args> | ||
| 59 | [[nodiscard]] SlotId insert(Args&&... args) noexcept { | ||
| 60 | const u32 index = FreeValueIndex(); | ||
| 61 | new (&values[index].object) T(std::forward<Args>(args)...); | ||
| 62 | SetStorageBit(index); | ||
| 63 | |||
| 64 | return SlotId{index}; | ||
| 65 | } | ||
| 66 | |||
| 67 | void erase(SlotId id) noexcept { | ||
| 68 | values[id.index].object.~T(); | ||
| 69 | free_list.push_back(id.index); | ||
| 70 | ResetStorageBit(id.index); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | struct NonTrivialDummy { | ||
| 75 | NonTrivialDummy() noexcept {} | ||
| 76 | }; | ||
| 77 | |||
| 78 | union Entry { | ||
| 79 | Entry() noexcept : dummy{} {} | ||
| 80 | ~Entry() noexcept {} | ||
| 81 | |||
| 82 | NonTrivialDummy dummy; | ||
| 83 | T object; | ||
| 84 | }; | ||
| 85 | |||
| 86 | void SetStorageBit(u32 index) noexcept { | ||
| 87 | stored_bitset[index / 64] |= u64(1) << (index % 64); | ||
| 88 | } | ||
| 89 | |||
| 90 | void ResetStorageBit(u32 index) noexcept { | ||
| 91 | stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); | ||
| 92 | } | ||
| 93 | |||
| 94 | bool ReadStorageBit(u32 index) noexcept { | ||
| 95 | return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; | ||
| 96 | } | ||
| 97 | |||
| 98 | void ValidateIndex(SlotId id) const noexcept { | ||
| 99 | DEBUG_ASSERT(id); | ||
| 100 | DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); | ||
| 101 | DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); | ||
| 102 | } | ||
| 103 | |||
| 104 | [[nodiscard]] u32 FreeValueIndex() noexcept { | ||
| 105 | if (free_list.empty()) { | ||
| 106 | Reserve(values_capacity ? (values_capacity << 1) : 1); | ||
| 107 | } | ||
| 108 | const u32 free_index = free_list.back(); | ||
| 109 | free_list.pop_back(); | ||
| 110 | return free_index; | ||
| 111 | } | ||
| 112 | |||
| 113 | void Reserve(size_t new_capacity) noexcept { | ||
| 114 | Entry* const new_values = new Entry[new_capacity]; | ||
| 115 | size_t index = 0; | ||
| 116 | for (u64 bits : stored_bitset) { | ||
| 117 | for (size_t bit = 0; bits; ++bit, bits >>= 1) { | ||
| 118 | const size_t i = index + bit; | ||
| 119 | if ((bits & 1) == 0) { | ||
| 120 | continue; | ||
| 121 | } | ||
| 122 | T& old_value = values[i].object; | ||
| 123 | new (&new_values[i].object) T(std::move(old_value)); | ||
| 124 | old_value.~T(); | ||
| 125 | } | ||
| 126 | index += 64; | ||
| 127 | } | ||
| 128 | |||
| 129 | stored_bitset.resize((new_capacity + 63) / 64); | ||
| 130 | |||
| 131 | const size_t old_free_size = free_list.size(); | ||
| 132 | free_list.resize(old_free_size + (new_capacity - values_capacity)); | ||
| 133 | std::iota(free_list.begin() + old_free_size, free_list.end(), | ||
| 134 | static_cast<u32>(values_capacity)); | ||
| 135 | |||
| 136 | delete[] values; | ||
| 137 | values = new_values; | ||
| 138 | values_capacity = new_capacity; | ||
| 139 | } | ||
| 140 | |||
| 141 | Entry* values = nullptr; | ||
| 142 | size_t values_capacity = 0; | ||
| 143 | size_t values_size = 0; | ||
| 144 | |||
| 145 | std::vector<u64> stored_bitset; | ||
| 146 | std::vector<u32> free_list; | ||
| 147 | }; | ||
| 148 | |||
| 149 | } // namespace VideoCommon | ||
| 150 | |||
| 151 | template <> | ||
| 152 | struct std::hash<VideoCommon::SlotId> { | ||
| 153 | size_t operator()(const VideoCommon::SlotId& id) const noexcept { | ||
| 154 | return std::hash<u32>{}(id.index); | ||
| 155 | } | ||
| 156 | }; | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp deleted file mode 100644 index b44c09d71..000000000 --- a/src/video_core/texture_cache/surface_base.cpp +++ /dev/null | |||
| @@ -1,298 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/algorithm.h" | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/texture_cache/surface_base.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | #include "video_core/textures/convert.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); | ||
| 17 | MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); | ||
| 18 | |||
| 19 | using Tegra::Texture::ConvertFromGuestToHost; | ||
| 20 | using VideoCore::MortonSwizzleMode; | ||
| 21 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 22 | using VideoCore::Surface::PixelFormat; | ||
| 23 | |||
| 24 | StagingCache::StagingCache() = default; | ||
| 25 | |||
| 26 | StagingCache::~StagingCache() = default; | ||
| 27 | |||
| 28 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 29 | bool is_astc_supported) | ||
| 30 | : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels), | ||
| 31 | mipmap_offsets(params.num_levels) { | ||
| 32 | is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported; | ||
| 33 | host_memory_size = params.GetHostSizeInBytes(is_converted); | ||
| 34 | |||
| 35 | std::size_t offset = 0; | ||
| 36 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 37 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | ||
| 38 | mipmap_sizes[level] = mipmap_size; | ||
| 39 | mipmap_offsets[level] = offset; | ||
| 40 | offset += mipmap_size; | ||
| 41 | } | ||
| 42 | layer_size = offset; | ||
| 43 | if (params.is_layered) { | ||
| 44 | if (params.is_tiled) { | ||
| 45 | layer_size = | ||
| 46 | SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); | ||
| 47 | } | ||
| 48 | guest_memory_size = layer_size * params.depth; | ||
| 49 | } else { | ||
| 50 | guest_memory_size = layer_size; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { | ||
| 55 | const u32 src_bpp{params.GetBytesPerPixel()}; | ||
| 56 | const u32 dst_bpp{rhs.GetBytesPerPixel()}; | ||
| 57 | const bool ib1 = params.IsBuffer(); | ||
| 58 | const bool ib2 = rhs.IsBuffer(); | ||
| 59 | if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { | ||
| 60 | const bool cb1 = params.IsCompressed(); | ||
| 61 | const bool cb2 = rhs.IsCompressed(); | ||
| 62 | if (cb1 == cb2) { | ||
| 63 | return MatchTopologyResult::FullMatch; | ||
| 64 | } | ||
| 65 | return MatchTopologyResult::CompressUnmatch; | ||
| 66 | } | ||
| 67 | return MatchTopologyResult::None; | ||
| 68 | } | ||
| 69 | |||
| 70 | MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { | ||
| 71 | // Buffer surface Check | ||
| 72 | if (params.IsBuffer()) { | ||
| 73 | const std::size_t wd1 = params.width * params.GetBytesPerPixel(); | ||
| 74 | const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); | ||
| 75 | if (wd1 == wd2) { | ||
| 76 | return MatchStructureResult::FullMatch; | ||
| 77 | } | ||
| 78 | return MatchStructureResult::None; | ||
| 79 | } | ||
| 80 | |||
| 81 | // Linear Surface check | ||
| 82 | if (!params.is_tiled) { | ||
| 83 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { | ||
| 84 | if (params.width == rhs.width) { | ||
| 85 | return MatchStructureResult::FullMatch; | ||
| 86 | } else { | ||
| 87 | return MatchStructureResult::SemiMatch; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | return MatchStructureResult::None; | ||
| 91 | } | ||
| 92 | |||
| 93 | // Tiled Surface check | ||
| 94 | if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, | ||
| 95 | params.tile_width_spacing, params.num_levels) == | ||
| 96 | std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 97 | rhs.tile_width_spacing, rhs.num_levels)) { | ||
| 98 | if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { | ||
| 99 | return MatchStructureResult::FullMatch; | ||
| 100 | } | ||
| 101 | const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, | ||
| 102 | rhs.pixel_format); | ||
| 103 | const u32 hs = | ||
| 104 | SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); | ||
| 105 | const u32 w1 = params.GetBlockAlignedWidth(); | ||
| 106 | if (std::tie(w1, params.height) == std::tie(ws, hs)) { | ||
| 107 | return MatchStructureResult::SemiMatch; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | return MatchStructureResult::None; | ||
| 111 | } | ||
| 112 | |||
| 113 | std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( | ||
| 114 | const GPUVAddr candidate_gpu_addr) const { | ||
| 115 | if (gpu_addr == candidate_gpu_addr) { | ||
| 116 | return {{0, 0}}; | ||
| 117 | } | ||
| 118 | |||
| 119 | if (candidate_gpu_addr < gpu_addr) { | ||
| 120 | return std::nullopt; | ||
| 121 | } | ||
| 122 | |||
| 123 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; | ||
| 124 | const auto layer{static_cast<u32>(relative_address / layer_size)}; | ||
| 125 | if (layer >= params.depth) { | ||
| 126 | return std::nullopt; | ||
| 127 | } | ||
| 128 | |||
| 129 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; | ||
| 130 | const auto mipmap_it = | ||
| 131 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||
| 132 | if (mipmap_it == mipmap_offsets.end()) { | ||
| 133 | return std::nullopt; | ||
| 134 | } | ||
| 135 | |||
| 136 | const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; | ||
| 137 | return std::make_pair(layer, level); | ||
| 138 | } | ||
| 139 | |||
| 140 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { | ||
| 141 | const u32 layers{params.depth}; | ||
| 142 | const u32 mipmaps{params.num_levels}; | ||
| 143 | std::vector<CopyParams> result; | ||
| 144 | result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); | ||
| 145 | |||
| 146 | for (u32 layer = 0; layer < layers; layer++) { | ||
| 147 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 148 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 149 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 150 | result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | return result; | ||
| 154 | } | ||
| 155 | |||
| 156 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { | ||
| 157 | const u32 mipmaps{params.num_levels}; | ||
| 158 | std::vector<CopyParams> result; | ||
| 159 | result.reserve(mipmaps); | ||
| 160 | |||
| 161 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 162 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 163 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 164 | const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; | ||
| 165 | result.emplace_back(width, height, depth, level); | ||
| 166 | } | ||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, | ||
| 171 | u8* buffer, u32 level) { | ||
| 172 | const u32 width{params.GetMipWidth(level)}; | ||
| 173 | const u32 height{params.GetMipHeight(level)}; | ||
| 174 | const u32 block_height{params.GetMipBlockHeight(level)}; | ||
| 175 | const u32 block_depth{params.GetMipBlockDepth(level)}; | ||
| 176 | |||
| 177 | std::size_t guest_offset{mipmap_offsets[level]}; | ||
| 178 | if (params.is_layered) { | ||
| 179 | std::size_t host_offset = 0; | ||
| 180 | const std::size_t guest_stride = layer_size; | ||
| 181 | const std::size_t host_stride = params.GetHostLayerSize(level); | ||
| 182 | for (u32 layer = 0; layer < params.depth; ++layer) { | ||
| 183 | MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, | ||
| 184 | params.tile_width_spacing, buffer + host_offset, memory + guest_offset); | ||
| 185 | guest_offset += guest_stride; | ||
| 186 | host_offset += host_stride; | ||
| 187 | } | ||
| 188 | } else { | ||
| 189 | MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, | ||
| 190 | params.GetMipDepth(level), params.tile_width_spacing, buffer, | ||
| 191 | memory + guest_offset); | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | ||
| 196 | StagingCache& staging_cache) { | ||
| 197 | MICROPROFILE_SCOPE(GPU_Load_Texture); | ||
| 198 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 199 | u8* host_ptr; | ||
| 200 | // Use an extra temporal buffer | ||
| 201 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 202 | tmp_buffer.resize(guest_memory_size); | ||
| 203 | host_ptr = tmp_buffer.data(); | ||
| 204 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 205 | |||
| 206 | if (params.is_tiled) { | ||
| 207 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | ||
| 208 | params.block_width, static_cast<u32>(params.target)); | ||
| 209 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 210 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 211 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, | ||
| 212 | staging_buffer.data() + host_offset, level); | ||
| 213 | } | ||
| 214 | } else { | ||
| 215 | ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); | ||
| 216 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 217 | const u32 block_width{params.GetDefaultBlockWidth()}; | ||
| 218 | const u32 block_height{params.GetDefaultBlockHeight()}; | ||
| 219 | const u32 width{(params.width + block_width - 1) / block_width}; | ||
| 220 | const u32 height{(params.height + block_height - 1) / block_height}; | ||
| 221 | const u32 copy_size{width * bpp}; | ||
| 222 | if (params.pitch == copy_size) { | ||
| 223 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); | ||
| 224 | } else { | ||
| 225 | const u8* start{host_ptr}; | ||
| 226 | u8* write_to{staging_buffer.data()}; | ||
| 227 | for (u32 h = height; h > 0; --h) { | ||
| 228 | std::memcpy(write_to, start, copy_size); | ||
| 229 | start += params.pitch; | ||
| 230 | write_to += copy_size; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | if (!is_converted && params.pixel_format != PixelFormat::S8_UINT_D24_UNORM) { | ||
| 236 | return; | ||
| 237 | } | ||
| 238 | |||
| 239 | for (u32 level = params.num_levels; level--;) { | ||
| 240 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 241 | const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; | ||
| 242 | u8* const in_buffer = staging_buffer.data() + in_host_offset; | ||
| 243 | u8* const out_buffer = staging_buffer.data() + out_host_offset; | ||
| 244 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, | ||
| 245 | params.GetMipWidth(level), params.GetMipHeight(level), | ||
| 246 | params.GetMipDepth(level), true, true); | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | ||
| 251 | StagingCache& staging_cache) { | ||
| 252 | MICROPROFILE_SCOPE(GPU_Flush_Texture); | ||
| 253 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 254 | u8* host_ptr; | ||
| 255 | |||
| 256 | // Use an extra temporal buffer | ||
| 257 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 258 | tmp_buffer.resize(guest_memory_size); | ||
| 259 | host_ptr = tmp_buffer.data(); | ||
| 260 | |||
| 261 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 262 | // Special case for 3D texture segments | ||
| 263 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 264 | } | ||
| 265 | |||
| 266 | if (params.is_tiled) { | ||
| 267 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | ||
| 268 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 269 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; | ||
| 270 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, | ||
| 271 | staging_buffer.data() + host_offset, level); | ||
| 272 | } | ||
| 273 | } else if (params.IsBuffer()) { | ||
| 274 | // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest | ||
| 275 | // memory. | ||
| 276 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 277 | } else { | ||
| 278 | ASSERT(params.target == SurfaceTarget::Texture2D); | ||
| 279 | ASSERT(params.num_levels == 1); | ||
| 280 | |||
| 281 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 282 | const u32 copy_size{params.width * bpp}; | ||
| 283 | if (params.pitch == copy_size) { | ||
| 284 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 285 | } else { | ||
| 286 | u8* start{host_ptr}; | ||
| 287 | const u8* read_to{staging_buffer.data()}; | ||
| 288 | for (u32 h = params.height; h > 0; --h) { | ||
| 289 | std::memcpy(start, read_to, copy_size); | ||
| 290 | start += params.pitch; | ||
| 291 | read_to += copy_size; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | } | ||
| 295 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 296 | } | ||
| 297 | |||
| 298 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h deleted file mode 100644 index 173f2edba..000000000 --- a/src/video_core/texture_cache/surface_base.h +++ /dev/null | |||
| @@ -1,333 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <tuple> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/gpu.h" | ||
| 14 | #include "video_core/morton.h" | ||
| 15 | #include "video_core/texture_cache/copy_params.h" | ||
| 16 | #include "video_core/texture_cache/surface_params.h" | ||
| 17 | #include "video_core/texture_cache/surface_view.h" | ||
| 18 | |||
| 19 | namespace Tegra { | ||
| 20 | class MemoryManager; | ||
| 21 | } | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | using VideoCore::MortonSwizzleMode; | ||
| 26 | using VideoCore::Surface::SurfaceTarget; | ||
| 27 | |||
| 28 | enum class MatchStructureResult : u32 { | ||
| 29 | FullMatch = 0, | ||
| 30 | SemiMatch = 1, | ||
| 31 | None = 2, | ||
| 32 | }; | ||
| 33 | |||
| 34 | enum class MatchTopologyResult : u32 { | ||
| 35 | FullMatch = 0, | ||
| 36 | CompressUnmatch = 1, | ||
| 37 | None = 2, | ||
| 38 | }; | ||
| 39 | |||
| 40 | class StagingCache { | ||
| 41 | public: | ||
| 42 | explicit StagingCache(); | ||
| 43 | ~StagingCache(); | ||
| 44 | |||
| 45 | std::vector<u8>& GetBuffer(std::size_t index) { | ||
| 46 | return staging_buffer[index]; | ||
| 47 | } | ||
| 48 | |||
| 49 | const std::vector<u8>& GetBuffer(std::size_t index) const { | ||
| 50 | return staging_buffer[index]; | ||
| 51 | } | ||
| 52 | |||
| 53 | void SetSize(std::size_t size) { | ||
| 54 | staging_buffer.resize(size); | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | std::vector<std::vector<u8>> staging_buffer; | ||
| 59 | }; | ||
| 60 | |||
| 61 | class SurfaceBaseImpl { | ||
| 62 | public: | ||
| 63 | void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 64 | |||
| 65 | void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 66 | |||
| 67 | GPUVAddr GetGpuAddr() const { | ||
| 68 | return gpu_addr; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool Overlaps(const VAddr start, const VAddr end) const { | ||
| 72 | return (cpu_addr < end) && (cpu_addr_end > start); | ||
| 73 | } | ||
| 74 | |||
| 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { | ||
| 76 | const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; | ||
| 77 | return gpu_addr <= other_start && other_end <= gpu_addr_end; | ||
| 78 | } | ||
| 79 | |||
| 80 | // Use only when recycling a surface | ||
| 81 | void SetGpuAddr(const GPUVAddr new_addr) { | ||
| 82 | gpu_addr = new_addr; | ||
| 83 | } | ||
| 84 | |||
| 85 | VAddr GetCpuAddr() const { | ||
| 86 | return cpu_addr; | ||
| 87 | } | ||
| 88 | |||
| 89 | VAddr GetCpuAddrEnd() const { | ||
| 90 | return cpu_addr_end; | ||
| 91 | } | ||
| 92 | |||
| 93 | void SetCpuAddr(const VAddr new_addr) { | ||
| 94 | cpu_addr = new_addr; | ||
| 95 | cpu_addr_end = new_addr + guest_memory_size; | ||
| 96 | } | ||
| 97 | |||
| 98 | const SurfaceParams& GetSurfaceParams() const { | ||
| 99 | return params; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::size_t GetSizeInBytes() const { | ||
| 103 | return guest_memory_size; | ||
| 104 | } | ||
| 105 | |||
| 106 | std::size_t GetHostSizeInBytes() const { | ||
| 107 | return host_memory_size; | ||
| 108 | } | ||
| 109 | |||
| 110 | std::size_t GetMipmapSize(const u32 level) const { | ||
| 111 | return mipmap_sizes[level]; | ||
| 112 | } | ||
| 113 | |||
| 114 | bool IsLinear() const { | ||
| 115 | return !params.is_tiled; | ||
| 116 | } | ||
| 117 | |||
| 118 | bool IsConverted() const { | ||
| 119 | return is_converted; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { | ||
| 123 | return params.pixel_format == pixel_format; | ||
| 124 | } | ||
| 125 | |||
| 126 | VideoCore::Surface::PixelFormat GetFormat() const { | ||
| 127 | return params.pixel_format; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { | ||
| 131 | return params.target == target; | ||
| 132 | } | ||
| 133 | |||
| 134 | MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; | ||
| 135 | |||
| 136 | MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; | ||
| 137 | |||
| 138 | bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { | ||
| 139 | return std::tie(gpu_addr, params.target, params.num_levels) == | ||
| 140 | std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && | ||
| 141 | params.target == SurfaceTarget::Texture2D && params.num_levels == 1; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; | ||
| 145 | |||
| 146 | std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { | ||
| 147 | return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); | ||
| 148 | } | ||
| 149 | |||
| 150 | protected: | ||
| 151 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 152 | bool is_astc_supported); | ||
| 153 | ~SurfaceBaseImpl() = default; | ||
| 154 | |||
| 155 | virtual void DecorateSurfaceName() = 0; | ||
| 156 | |||
| 157 | const SurfaceParams params; | ||
| 158 | std::size_t layer_size; | ||
| 159 | std::size_t guest_memory_size; | ||
| 160 | std::size_t host_memory_size; | ||
| 161 | GPUVAddr gpu_addr{}; | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | VAddr cpu_addr_end{}; | ||
| 164 | bool is_converted{}; | ||
| 165 | |||
| 166 | std::vector<std::size_t> mipmap_sizes; | ||
| 167 | std::vector<std::size_t> mipmap_offsets; | ||
| 168 | |||
| 169 | private: | ||
| 170 | void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, | ||
| 171 | u32 level); | ||
| 172 | |||
| 173 | std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; | ||
| 174 | |||
| 175 | std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; | ||
| 176 | }; | ||
| 177 | |||
| 178 | template <typename TView> | ||
| 179 | class SurfaceBase : public SurfaceBaseImpl { | ||
| 180 | public: | ||
| 181 | virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; | ||
| 182 | |||
| 183 | virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; | ||
| 184 | |||
| 185 | void MarkAsModified(bool is_modified_, u64 tick) { | ||
| 186 | is_modified = is_modified_ || is_target; | ||
| 187 | modification_tick = tick; | ||
| 188 | } | ||
| 189 | |||
| 190 | void MarkAsRenderTarget(bool is_target_, u32 index_) { | ||
| 191 | is_target = is_target_; | ||
| 192 | index = index_; | ||
| 193 | } | ||
| 194 | |||
| 195 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 196 | is_memory_marked = is_memory_marked_; | ||
| 197 | } | ||
| 198 | |||
| 199 | bool IsMemoryMarked() const { | ||
| 200 | return is_memory_marked; | ||
| 201 | } | ||
| 202 | |||
| 203 | void SetSyncPending(bool is_sync_pending_) { | ||
| 204 | is_sync_pending = is_sync_pending_; | ||
| 205 | } | ||
| 206 | |||
| 207 | bool IsSyncPending() const { | ||
| 208 | return is_sync_pending; | ||
| 209 | } | ||
| 210 | |||
| 211 | void MarkAsPicked(bool is_picked_) { | ||
| 212 | is_picked = is_picked_; | ||
| 213 | } | ||
| 214 | |||
| 215 | bool IsModified() const { | ||
| 216 | return is_modified; | ||
| 217 | } | ||
| 218 | |||
| 219 | bool IsProtected() const { | ||
| 220 | // Only 3D slices are to be protected | ||
| 221 | return is_target && params.target == SurfaceTarget::Texture3D; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool IsRenderTarget() const { | ||
| 225 | return is_target; | ||
| 226 | } | ||
| 227 | |||
| 228 | u32 GetRenderTarget() const { | ||
| 229 | return index; | ||
| 230 | } | ||
| 231 | |||
| 232 | bool IsRegistered() const { | ||
| 233 | return is_registered; | ||
| 234 | } | ||
| 235 | |||
| 236 | bool IsPicked() const { | ||
| 237 | return is_picked; | ||
| 238 | } | ||
| 239 | |||
| 240 | void MarkAsRegistered(bool is_reg) { | ||
| 241 | is_registered = is_reg; | ||
| 242 | } | ||
| 243 | |||
| 244 | u64 GetModificationTick() const { | ||
| 245 | return modification_tick; | ||
| 246 | } | ||
| 247 | |||
| 248 | TView EmplaceOverview(const SurfaceParams& overview_params) { | ||
| 249 | const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; | ||
| 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | ||
| 251 | } | ||
| 252 | |||
| 253 | TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { | ||
| 254 | return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, | ||
| 255 | base_level, num_levels)); | ||
| 256 | } | ||
| 257 | |||
| 258 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | ||
| 259 | const GPUVAddr view_addr, | ||
| 260 | const std::size_t candidate_size, const u32 mipmap, | ||
| 261 | const u32 layer) { | ||
| 262 | const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; | ||
| 263 | if (!layer_mipmap) { | ||
| 264 | return {}; | ||
| 265 | } | ||
| 266 | const auto [end_layer, end_mipmap] = *layer_mipmap; | ||
| 267 | if (layer != end_layer) { | ||
| 268 | if (mipmap == 0 && end_mipmap == 0) { | ||
| 269 | return GetView(ViewParams(view_params.target, layer, end_layer - layer, 0, 1)); | ||
| 270 | } | ||
| 271 | return {}; | ||
| 272 | } else { | ||
| 273 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap)); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | ||
| 278 | const std::size_t candidate_size) { | ||
| 279 | if (params.target == SurfaceTarget::Texture3D || | ||
| 280 | view_params.target == SurfaceTarget::Texture3D || | ||
| 281 | (params.num_levels == 1 && !params.is_layered)) { | ||
| 282 | return {}; | ||
| 283 | } | ||
| 284 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | ||
| 285 | if (!layer_mipmap) { | ||
| 286 | return {}; | ||
| 287 | } | ||
| 288 | const auto [layer, mipmap] = *layer_mipmap; | ||
| 289 | if (GetMipmapSize(mipmap) != candidate_size) { | ||
| 290 | return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); | ||
| 291 | } | ||
| 292 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); | ||
| 293 | } | ||
| 294 | |||
| 295 | TView GetMainView() const { | ||
| 296 | return main_view; | ||
| 297 | } | ||
| 298 | |||
| 299 | protected: | ||
| 300 | explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 301 | bool is_astc_supported) | ||
| 302 | : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {} | ||
| 303 | |||
| 304 | ~SurfaceBase() = default; | ||
| 305 | |||
| 306 | virtual TView CreateView(const ViewParams& view_key) = 0; | ||
| 307 | |||
| 308 | TView main_view; | ||
| 309 | std::unordered_map<ViewParams, TView> views; | ||
| 310 | |||
| 311 | private: | ||
| 312 | TView GetView(const ViewParams& key) { | ||
| 313 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 314 | auto& view{entry->second}; | ||
| 315 | if (is_cache_miss) { | ||
| 316 | view = CreateView(key); | ||
| 317 | } | ||
| 318 | return view; | ||
| 319 | } | ||
| 320 | |||
| 321 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 322 | |||
| 323 | bool is_modified{}; | ||
| 324 | bool is_target{}; | ||
| 325 | bool is_registered{}; | ||
| 326 | bool is_picked{}; | ||
| 327 | bool is_memory_marked{}; | ||
| 328 | bool is_sync_pending{}; | ||
| 329 | u32 index{NO_RT}; | ||
| 330 | u64 modification_tick{}; | ||
| 331 | }; | ||
| 332 | |||
| 333 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp deleted file mode 100644 index e8515321b..000000000 --- a/src/video_core/texture_cache/surface_params.cpp +++ /dev/null | |||
| @@ -1,444 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 15 | #include "video_core/texture_cache/surface_params.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 21 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 22 | using VideoCore::Surface::SurfaceTarget; | ||
| 23 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 24 | using VideoCore::Surface::SurfaceType; | ||
| 25 | |||
| 26 | namespace { | ||
| 27 | |||
| 28 | SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { | ||
| 29 | switch (type) { | ||
| 30 | case Tegra::Shader::TextureType::Texture1D: | ||
| 31 | return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D; | ||
| 32 | case Tegra::Shader::TextureType::Texture2D: | ||
| 33 | return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | ||
| 34 | case Tegra::Shader::TextureType::Texture3D: | ||
| 35 | ASSERT(!is_array); | ||
| 36 | return SurfaceTarget::Texture3D; | ||
| 37 | case Tegra::Shader::TextureType::TextureCube: | ||
| 38 | return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap; | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return SurfaceTarget::Texture2D; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) { | ||
| 46 | switch (type) { | ||
| 47 | case Tegra::Shader::ImageType::Texture1D: | ||
| 48 | return SurfaceTarget::Texture1D; | ||
| 49 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 50 | return SurfaceTarget::TextureBuffer; | ||
| 51 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 52 | return SurfaceTarget::Texture1DArray; | ||
| 53 | case Tegra::Shader::ImageType::Texture2D: | ||
| 54 | return SurfaceTarget::Texture2D; | ||
| 55 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 56 | return SurfaceTarget::Texture2DArray; | ||
| 57 | case Tegra::Shader::ImageType::Texture3D: | ||
| 58 | return SurfaceTarget::Texture3D; | ||
| 59 | default: | ||
| 60 | UNREACHABLE(); | ||
| 61 | return SurfaceTarget::Texture2D; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 66 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table, | ||
| 72 | const Tegra::Texture::TICEntry& tic, | ||
| 73 | const VideoCommon::Shader::Sampler& entry) { | ||
| 74 | SurfaceParams params; | ||
| 75 | params.is_tiled = tic.IsTiled(); | ||
| 76 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 77 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0; | ||
| 78 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0; | ||
| 79 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; | ||
| 80 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 81 | params.pixel_format = lookup_table.GetPixelFormat( | ||
| 82 | tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); | ||
| 83 | params.type = GetFormatType(params.pixel_format); | ||
| 84 | if (entry.is_shadow && params.type == SurfaceType::ColorTexture) { | ||
| 85 | switch (params.pixel_format) { | ||
| 86 | case PixelFormat::R16_UNORM: | ||
| 87 | case PixelFormat::R16_FLOAT: | ||
| 88 | params.pixel_format = PixelFormat::D16_UNORM; | ||
| 89 | break; | ||
| 90 | case PixelFormat::R32_FLOAT: | ||
| 91 | params.pixel_format = PixelFormat::D32_FLOAT; | ||
| 92 | break; | ||
| 93 | default: | ||
| 94 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", | ||
| 95 | static_cast<u32>(params.pixel_format)); | ||
| 96 | } | ||
| 97 | params.type = GetFormatType(params.pixel_format); | ||
| 98 | } | ||
| 99 | // TODO: on 1DBuffer we should use the tic info. | ||
| 100 | if (tic.IsBuffer()) { | ||
| 101 | params.target = SurfaceTarget::TextureBuffer; | ||
| 102 | params.width = tic.Width(); | ||
| 103 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 104 | params.height = 1; | ||
| 105 | params.depth = 1; | ||
| 106 | params.num_levels = 1; | ||
| 107 | params.emulated_levels = 1; | ||
| 108 | params.is_layered = false; | ||
| 109 | } else { | ||
| 110 | params.target = TextureTypeToSurfaceTarget(entry.type, entry.is_array); | ||
| 111 | params.width = tic.Width(); | ||
| 112 | params.height = tic.Height(); | ||
| 113 | params.depth = tic.Depth(); | ||
| 114 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 115 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 116 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 117 | params.depth *= 6; | ||
| 118 | } | ||
| 119 | params.num_levels = tic.max_mip_level + 1; | ||
| 120 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 121 | params.is_layered = params.IsLayered(); | ||
| 122 | } | ||
| 123 | return params; | ||
| 124 | } | ||
| 125 | |||
| 126 | SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table, | ||
| 127 | const Tegra::Texture::TICEntry& tic, | ||
| 128 | const VideoCommon::Shader::Image& entry) { | ||
| 129 | SurfaceParams params; | ||
| 130 | params.is_tiled = tic.IsTiled(); | ||
| 131 | params.srgb_conversion = tic.IsSrgbConversionEnabled(); | ||
| 132 | params.block_width = params.is_tiled ? tic.BlockWidth() : 0; | ||
| 133 | params.block_height = params.is_tiled ? tic.BlockHeight() : 0; | ||
| 134 | params.block_depth = params.is_tiled ? tic.BlockDepth() : 0; | ||
| 135 | params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1; | ||
| 136 | params.pixel_format = lookup_table.GetPixelFormat( | ||
| 137 | tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type); | ||
| 138 | params.type = GetFormatType(params.pixel_format); | ||
| 139 | params.target = ImageTypeToSurfaceTarget(entry.type); | ||
| 140 | // TODO: on 1DBuffer we should use the tic info. | ||
| 141 | if (tic.IsBuffer()) { | ||
| 142 | params.target = SurfaceTarget::TextureBuffer; | ||
| 143 | params.width = tic.Width(); | ||
| 144 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 145 | params.height = 1; | ||
| 146 | params.depth = 1; | ||
| 147 | params.num_levels = 1; | ||
| 148 | params.emulated_levels = 1; | ||
| 149 | params.is_layered = false; | ||
| 150 | } else { | ||
| 151 | params.width = tic.Width(); | ||
| 152 | params.height = tic.Height(); | ||
| 153 | params.depth = tic.Depth(); | ||
| 154 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | ||
| 155 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 156 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 157 | params.depth *= 6; | ||
| 158 | } | ||
| 159 | params.num_levels = tic.max_mip_level + 1; | ||
| 160 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 161 | params.is_layered = params.IsLayered(); | ||
| 162 | } | ||
| 163 | return params; | ||
| 164 | } | ||
| 165 | |||
| 166 | SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { | ||
| 167 | const auto& regs = maxwell3d.regs; | ||
| 168 | const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); | ||
| 169 | const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; | ||
| 170 | const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); | ||
| 171 | return { | ||
| 172 | .is_tiled = regs.zeta.memory_layout.type == | ||
| 173 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, | ||
| 174 | .srgb_conversion = false, | ||
| 175 | .is_layered = is_layered, | ||
| 176 | .block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U), | ||
| 177 | .block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U), | ||
| 178 | .block_depth = block_depth, | ||
| 179 | .tile_width_spacing = 1, | ||
| 180 | .width = regs.zeta_width, | ||
| 181 | .height = regs.zeta_height, | ||
| 182 | .depth = is_layered ? regs.zeta_layers.Value() : 1U, | ||
| 183 | .pitch = 0, | ||
| 184 | .num_levels = 1, | ||
| 185 | .emulated_levels = 1, | ||
| 186 | .pixel_format = pixel_format, | ||
| 187 | .type = GetFormatType(pixel_format), | ||
| 188 | .target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D, | ||
| 189 | }; | ||
| 190 | } | ||
| 191 | |||
| 192 | SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 193 | std::size_t index) { | ||
| 194 | const auto& config{maxwell3d.regs.rt[index]}; | ||
| 195 | SurfaceParams params; | ||
| 196 | params.is_tiled = | ||
| 197 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 198 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | ||
| 199 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB; | ||
| 200 | params.block_width = config.memory_layout.block_width; | ||
| 201 | params.block_height = config.memory_layout.block_height; | ||
| 202 | params.block_depth = config.memory_layout.block_depth; | ||
| 203 | params.tile_width_spacing = 1; | ||
| 204 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 205 | params.type = GetFormatType(params.pixel_format); | ||
| 206 | if (params.is_tiled) { | ||
| 207 | params.pitch = 0; | ||
| 208 | params.width = config.width; | ||
| 209 | } else { | ||
| 210 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 211 | params.pitch = config.width; | ||
| 212 | params.width = params.pitch / bpp; | ||
| 213 | } | ||
| 214 | params.height = config.height; | ||
| 215 | params.num_levels = 1; | ||
| 216 | params.emulated_levels = 1; | ||
| 217 | |||
| 218 | if (config.memory_layout.is_3d != 0) { | ||
| 219 | params.depth = config.layers.Value(); | ||
| 220 | params.is_layered = false; | ||
| 221 | params.target = SurfaceTarget::Texture3D; | ||
| 222 | } else if (config.layers > 1) { | ||
| 223 | params.depth = config.layers.Value(); | ||
| 224 | params.is_layered = true; | ||
| 225 | params.target = SurfaceTarget::Texture2DArray; | ||
| 226 | } else { | ||
| 227 | params.depth = 1; | ||
| 228 | params.is_layered = false; | ||
| 229 | params.target = SurfaceTarget::Texture2D; | ||
| 230 | } | ||
| 231 | return params; | ||
| 232 | } | ||
| 233 | |||
| 234 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 235 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 236 | const bool is_tiled = !config.linear; | ||
| 237 | const auto pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 238 | |||
| 239 | SurfaceParams params{ | ||
| 240 | .is_tiled = is_tiled, | ||
| 241 | .srgb_conversion = config.format == Tegra::RenderTargetFormat::B8G8R8A8_SRGB || | ||
| 242 | config.format == Tegra::RenderTargetFormat::A8B8G8R8_SRGB, | ||
| 243 | .block_width = is_tiled ? std::min(config.BlockWidth(), 5U) : 0U, | ||
| 244 | .block_height = is_tiled ? std::min(config.BlockHeight(), 5U) : 0U, | ||
| 245 | .block_depth = is_tiled ? std::min(config.BlockDepth(), 5U) : 0U, | ||
| 246 | .tile_width_spacing = 1, | ||
| 247 | .width = config.width, | ||
| 248 | .height = config.height, | ||
| 249 | .depth = 1, | ||
| 250 | .pitch = config.pitch, | ||
| 251 | .num_levels = 1, | ||
| 252 | .emulated_levels = 1, | ||
| 253 | .pixel_format = pixel_format, | ||
| 254 | .type = GetFormatType(pixel_format), | ||
| 255 | // TODO(Rodrigo): Try to guess texture arrays from parameters | ||
| 256 | .target = SurfaceTarget::Texture2D, | ||
| 257 | }; | ||
| 258 | |||
| 259 | params.is_layered = params.IsLayered(); | ||
| 260 | return params; | ||
| 261 | } | ||
| 262 | |||
| 263 | VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( | ||
| 264 | const VideoCommon::Shader::Sampler& entry) { | ||
| 265 | return TextureTypeToSurfaceTarget(entry.type, entry.is_array); | ||
| 266 | } | ||
| 267 | |||
| 268 | VideoCore::Surface::SurfaceTarget SurfaceParams::ExpectedTarget( | ||
| 269 | const VideoCommon::Shader::Image& entry) { | ||
| 270 | return ImageTypeToSurfaceTarget(entry.type); | ||
| 271 | } | ||
| 272 | |||
| 273 | bool SurfaceParams::IsLayered() const { | ||
| 274 | switch (target) { | ||
| 275 | case SurfaceTarget::Texture1DArray: | ||
| 276 | case SurfaceTarget::Texture2DArray: | ||
| 277 | case SurfaceTarget::TextureCubemap: | ||
| 278 | case SurfaceTarget::TextureCubeArray: | ||
| 279 | return true; | ||
| 280 | default: | ||
| 281 | return false; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | // Auto block resizing algorithm from: | ||
| 286 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 287 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 288 | if (level == 0) { | ||
| 289 | return this->block_height; | ||
| 290 | } | ||
| 291 | |||
| 292 | const u32 height_new{GetMipHeight(level)}; | ||
| 293 | const u32 default_block_height{GetDefaultBlockHeight()}; | ||
| 294 | const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; | ||
| 295 | const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); | ||
| 296 | return std::clamp(block_height_new, 3U, 7U) - 3U; | ||
| 297 | } | ||
| 298 | |||
| 299 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 300 | if (level == 0) { | ||
| 301 | return this->block_depth; | ||
| 302 | } | ||
| 303 | if (is_layered) { | ||
| 304 | return 0; | ||
| 305 | } | ||
| 306 | |||
| 307 | const u32 depth_new{GetMipDepth(level)}; | ||
| 308 | const u32 block_depth_new = Common::Log2Ceil32(depth_new); | ||
| 309 | if (block_depth_new > 4) { | ||
| 310 | return 5 - (GetMipBlockHeight(level) >= 2); | ||
| 311 | } | ||
| 312 | return block_depth_new; | ||
| 313 | } | ||
| 314 | |||
| 315 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 316 | std::size_t offset = 0; | ||
| 317 | for (u32 i = 0; i < level; i++) { | ||
| 318 | offset += GetInnerMipmapMemorySize(i, false, false); | ||
| 319 | } | ||
| 320 | return offset; | ||
| 321 | } | ||
| 322 | |||
| 323 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { | ||
| 324 | std::size_t offset = 0; | ||
| 325 | if (is_converted) { | ||
| 326 | for (u32 i = 0; i < level; ++i) { | ||
| 327 | offset += GetConvertedMipmapSize(i) * GetNumLayers(); | ||
| 328 | } | ||
| 329 | } else { | ||
| 330 | for (u32 i = 0; i < level; ++i) { | ||
| 331 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 332 | } | ||
| 333 | } | ||
| 334 | return offset; | ||
| 335 | } | ||
| 336 | |||
| 337 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { | ||
| 338 | constexpr std::size_t rgba8_bpp = 4ULL; | ||
| 339 | const std::size_t mip_width = GetMipWidth(level); | ||
| 340 | const std::size_t mip_height = GetMipHeight(level); | ||
| 341 | const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); | ||
| 342 | return mip_width * mip_height * mip_depth * rgba8_bpp; | ||
| 343 | } | ||
| 344 | |||
| 345 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { | ||
| 346 | std::size_t size = 0; | ||
| 347 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 348 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | ||
| 349 | } | ||
| 350 | if (is_tiled && is_layered) { | ||
| 351 | return Common::AlignBits(size, Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 352 | } | ||
| 353 | return size; | ||
| 354 | } | ||
| 355 | |||
| 356 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | ||
| 357 | bool uncompressed) const { | ||
| 358 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | ||
| 359 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | ||
| 360 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | ||
| 361 | if (is_tiled) { | ||
| 362 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, | ||
| 363 | depth, GetMipBlockHeight(level), | ||
| 364 | GetMipBlockDepth(level)); | ||
| 365 | } else if (as_host_size || IsBuffer()) { | ||
| 366 | return GetBytesPerPixel() * width * height * depth; | ||
| 367 | } else { | ||
| 368 | // Linear Texture Case | ||
| 369 | return pitch * height * depth; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | |||
| 373 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | ||
| 374 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 375 | height, depth, pitch, num_levels, pixel_format, type, target) == | ||
| 376 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 377 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 378 | rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target); | ||
| 379 | } | ||
| 380 | |||
| 381 | std::string SurfaceParams::TargetName() const { | ||
| 382 | switch (target) { | ||
| 383 | case SurfaceTarget::Texture1D: | ||
| 384 | return "1D"; | ||
| 385 | case SurfaceTarget::TextureBuffer: | ||
| 386 | return "TexBuffer"; | ||
| 387 | case SurfaceTarget::Texture2D: | ||
| 388 | return "2D"; | ||
| 389 | case SurfaceTarget::Texture3D: | ||
| 390 | return "3D"; | ||
| 391 | case SurfaceTarget::Texture1DArray: | ||
| 392 | return "1DArray"; | ||
| 393 | case SurfaceTarget::Texture2DArray: | ||
| 394 | return "2DArray"; | ||
| 395 | case SurfaceTarget::TextureCubemap: | ||
| 396 | return "Cube"; | ||
| 397 | case SurfaceTarget::TextureCubeArray: | ||
| 398 | return "CubeArray"; | ||
| 399 | default: | ||
| 400 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 401 | UNREACHABLE(); | ||
| 402 | return fmt::format("TUK({})", static_cast<u32>(target)); | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 406 | u32 SurfaceParams::GetBlockSize() const { | ||
| 407 | const u32 x = 64U << block_width; | ||
| 408 | const u32 y = 8U << block_height; | ||
| 409 | const u32 z = 1U << block_depth; | ||
| 410 | return x * y * z; | ||
| 411 | } | ||
| 412 | |||
| 413 | std::pair<u32, u32> SurfaceParams::GetBlockXY() const { | ||
| 414 | const u32 x_pixels = 64U / GetBytesPerPixel(); | ||
| 415 | const u32 x = x_pixels << block_width; | ||
| 416 | const u32 y = 8U << block_height; | ||
| 417 | return {x, y}; | ||
| 418 | } | ||
| 419 | |||
| 420 | std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const { | ||
| 421 | const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 422 | const u32 block_size = GetBlockSize(); | ||
| 423 | const u32 block_index = offset / block_size; | ||
| 424 | const u32 gob_offset = offset % block_size; | ||
| 425 | const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GOB_SIZE); | ||
| 426 | const u32 x_gob_pixels = 64U / GetBytesPerPixel(); | ||
| 427 | const u32 x_block_pixels = x_gob_pixels << block_width; | ||
| 428 | const u32 y_block_pixels = 8U << block_height; | ||
| 429 | const u32 z_block_pixels = 1U << block_depth; | ||
| 430 | const u32 x_blocks = div_ceil(width, x_block_pixels); | ||
| 431 | const u32 y_blocks = div_ceil(height, y_block_pixels); | ||
| 432 | const u32 z_blocks = div_ceil(depth, z_block_pixels); | ||
| 433 | const u32 base_x = block_index % x_blocks; | ||
| 434 | const u32 base_y = (block_index / x_blocks) % y_blocks; | ||
| 435 | const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks; | ||
| 436 | u32 x = base_x * x_block_pixels; | ||
| 437 | u32 y = base_y * y_block_pixels; | ||
| 438 | u32 z = base_z * z_block_pixels; | ||
| 439 | z += gob_index >> block_height; | ||
| 440 | y += (gob_index * 8U) % y_block_pixels; | ||
| 441 | return {x, y, z}; | ||
| 442 | } | ||
| 443 | |||
| 444 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h deleted file mode 100644 index 4466c3c34..000000000 --- a/src/video_core/texture_cache/surface_params.h +++ /dev/null | |||
| @@ -1,294 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "common/cityhash.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/fermi_2d.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/surface.h" | ||
| 17 | #include "video_core/textures/decoders.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | class FormatLookupTable; | ||
| 22 | |||
| 23 | class SurfaceParams { | ||
| 24 | public: | ||
| 25 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 26 | static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table, | ||
| 27 | const Tegra::Texture::TICEntry& tic, | ||
| 28 | const VideoCommon::Shader::Sampler& entry); | ||
| 29 | |||
| 30 | /// Creates SurfaceCachedParams from an image configuration. | ||
| 31 | static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table, | ||
| 32 | const Tegra::Texture::TICEntry& tic, | ||
| 33 | const VideoCommon::Shader::Image& entry); | ||
| 34 | |||
| 35 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 36 | static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); | ||
| 37 | |||
| 38 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 39 | static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 40 | std::size_t index); | ||
| 41 | |||
| 42 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 43 | static SurfaceParams CreateForFermiCopySurface( | ||
| 44 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 45 | |||
| 46 | /// Obtains the texture target from a shader's sampler entry. | ||
| 47 | static VideoCore::Surface::SurfaceTarget ExpectedTarget( | ||
| 48 | const VideoCommon::Shader::Sampler& entry); | ||
| 49 | |||
| 50 | /// Obtains the texture target from a shader's sampler entry. | ||
| 51 | static VideoCore::Surface::SurfaceTarget ExpectedTarget( | ||
| 52 | const VideoCommon::Shader::Image& entry); | ||
| 53 | |||
| 54 | std::size_t Hash() const { | ||
| 55 | return static_cast<std::size_t>( | ||
| 56 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool operator==(const SurfaceParams& rhs) const; | ||
| 60 | |||
| 61 | bool operator!=(const SurfaceParams& rhs) const { | ||
| 62 | return !operator==(rhs); | ||
| 63 | } | ||
| 64 | |||
| 65 | std::size_t GetGuestSizeInBytes() const { | ||
| 66 | return GetInnerMemorySize(false, false, false); | ||
| 67 | } | ||
| 68 | |||
| 69 | std::size_t GetHostSizeInBytes(bool is_converted) const { | ||
| 70 | if (!is_converted) { | ||
| 71 | return GetInnerMemorySize(true, false, false); | ||
| 72 | } | ||
| 73 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 74 | std::size_t host_size_in_bytes = 0; | ||
| 75 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 76 | host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); | ||
| 77 | } | ||
| 78 | return host_size_in_bytes; | ||
| 79 | } | ||
| 80 | |||
| 81 | u32 GetBlockAlignedWidth() const { | ||
| 82 | return Common::AlignUp(width, 64 / GetBytesPerPixel()); | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Returns the width of a given mipmap level. | ||
| 86 | u32 GetMipWidth(u32 level) const { | ||
| 87 | return std::max(1U, width >> level); | ||
| 88 | } | ||
| 89 | |||
| 90 | /// Returns the height of a given mipmap level. | ||
| 91 | u32 GetMipHeight(u32 level) const { | ||
| 92 | return std::max(1U, height >> level); | ||
| 93 | } | ||
| 94 | |||
| 95 | /// Returns the depth of a given mipmap level. | ||
| 96 | u32 GetMipDepth(u32 level) const { | ||
| 97 | return is_layered ? depth : std::max(1U, depth >> level); | ||
| 98 | } | ||
| 99 | |||
| 100 | /// Returns the block height of a given mipmap level. | ||
| 101 | u32 GetMipBlockHeight(u32 level) const; | ||
| 102 | |||
| 103 | /// Returns the block depth of a given mipmap level. | ||
| 104 | u32 GetMipBlockDepth(u32 level) const; | ||
| 105 | |||
| 106 | /// Returns the best possible row/pitch alignment for the surface. | ||
| 107 | u32 GetRowAlignment(u32 level, bool is_converted) const { | ||
| 108 | const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); | ||
| 109 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); | ||
| 110 | } | ||
| 111 | |||
| 112 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 113 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 114 | |||
| 115 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 116 | std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; | ||
| 117 | |||
| 118 | /// Returns the size in bytes in guest memory of a given mipmap level. | ||
| 119 | std::size_t GetGuestMipmapSize(u32 level) const { | ||
| 120 | return GetInnerMipmapMemorySize(level, false, false); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns the size in bytes in host memory (linear) of a given mipmap level. | ||
| 124 | std::size_t GetHostMipmapSize(u32 level) const { | ||
| 125 | return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); | ||
| 126 | } | ||
| 127 | |||
| 128 | std::size_t GetConvertedMipmapSize(u32 level) const; | ||
| 129 | |||
| 130 | /// Get this texture Tegra Block size in guest memory layout | ||
| 131 | u32 GetBlockSize() const; | ||
| 132 | |||
| 133 | /// Get X, Y coordinates max sizes of a single block. | ||
| 134 | std::pair<u32, u32> GetBlockXY() const; | ||
| 135 | |||
| 136 | /// Get the offset in x, y, z coordinates from a memory offset | ||
| 137 | std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const; | ||
| 138 | |||
| 139 | /// Returns the size of a layer in bytes in guest memory. | ||
| 140 | std::size_t GetGuestLayerSize() const { | ||
| 141 | return GetLayerSize(false, false); | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 145 | std::size_t GetHostLayerSize(u32 level) const { | ||
| 146 | ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); | ||
| 147 | return GetInnerMipmapMemorySize(level, true, false); | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns the max possible mipmap that the texture can have in host gpu | ||
| 151 | u32 MaxPossibleMipmap() const { | ||
| 152 | const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; | ||
| 153 | const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; | ||
| 154 | const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); | ||
| 155 | if (target != VideoCore::Surface::SurfaceTarget::Texture3D) | ||
| 156 | return max_mipmap; | ||
| 157 | return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); | ||
| 158 | } | ||
| 159 | |||
| 160 | /// Returns if the guest surface is a compressed surface. | ||
| 161 | bool IsCompressed() const { | ||
| 162 | return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Returns the default block width. | ||
| 166 | u32 GetDefaultBlockWidth() const { | ||
| 167 | return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||
| 168 | } | ||
| 169 | |||
| 170 | /// Returns the default block height. | ||
| 171 | u32 GetDefaultBlockHeight() const { | ||
| 172 | return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||
| 173 | } | ||
| 174 | |||
| 175 | /// Returns the bits per pixel. | ||
| 176 | u32 GetBitsPerPixel() const { | ||
| 177 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Returns the bytes per pixel. | ||
| 181 | u32 GetBytesPerPixel() const { | ||
| 182 | return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||
| 183 | } | ||
| 184 | |||
| 185 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 186 | bool IsPixelFormatZeta() const { | ||
| 187 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 188 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 189 | } | ||
| 190 | |||
| 191 | /// Returns is the surface is a TextureBuffer type of surface. | ||
| 192 | bool IsBuffer() const { | ||
| 193 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | ||
| 194 | } | ||
| 195 | |||
| 196 | /// Returns the number of layers in the surface. | ||
| 197 | std::size_t GetNumLayers() const { | ||
| 198 | return is_layered ? depth : 1; | ||
| 199 | } | ||
| 200 | |||
| 201 | /// Returns the debug name of the texture for use in graphic debuggers. | ||
| 202 | std::string TargetName() const; | ||
| 203 | |||
| 204 | // Helper used for out of class size calculations | ||
| 205 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | ||
| 206 | const u32 block_depth) { | ||
| 207 | return Common::AlignBits(out_size, | ||
| 208 | Tegra::Texture::GOB_SIZE_SHIFT + block_height + block_depth); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Converts a width from a type of surface into another. This helps represent the | ||
| 212 | /// equivalent value between compressed/non-compressed textures. | ||
| 213 | static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 214 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 215 | const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); | ||
| 216 | const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); | ||
| 217 | return (width * bw2 + bw1 - 1) / bw1; | ||
| 218 | } | ||
| 219 | |||
| 220 | /// Converts a height from a type of surface into another. This helps represent the | ||
| 221 | /// equivalent value between compressed/non-compressed textures. | ||
| 222 | static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 223 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 224 | const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); | ||
| 225 | const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); | ||
| 226 | return (height * bh2 + bh1 - 1) / bh1; | ||
| 227 | } | ||
| 228 | |||
| 229 | // Finds the maximun possible width between 2 2D layers of different formats | ||
| 230 | static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 231 | const u32 src_level, const u32 dst_level) { | ||
| 232 | const u32 bw1 = src_params.GetDefaultBlockWidth(); | ||
| 233 | const u32 bw2 = dst_params.GetDefaultBlockWidth(); | ||
| 234 | const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; | ||
| 235 | const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; | ||
| 236 | return std::min(t_src_width, t_dst_width); | ||
| 237 | } | ||
| 238 | |||
| 239 | // Finds the maximun possible height between 2 2D layers of different formats | ||
| 240 | static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 241 | const u32 src_level, const u32 dst_level) { | ||
| 242 | const u32 bh1 = src_params.GetDefaultBlockHeight(); | ||
| 243 | const u32 bh2 = dst_params.GetDefaultBlockHeight(); | ||
| 244 | const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; | ||
| 245 | const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; | ||
| 246 | return std::min(t_src_height, t_dst_height); | ||
| 247 | } | ||
| 248 | |||
| 249 | bool is_tiled; | ||
| 250 | bool srgb_conversion; | ||
| 251 | bool is_layered; | ||
| 252 | u32 block_width; | ||
| 253 | u32 block_height; | ||
| 254 | u32 block_depth; | ||
| 255 | u32 tile_width_spacing; | ||
| 256 | u32 width; | ||
| 257 | u32 height; | ||
| 258 | u32 depth; | ||
| 259 | u32 pitch; | ||
| 260 | u32 num_levels; | ||
| 261 | u32 emulated_levels; | ||
| 262 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 263 | VideoCore::Surface::SurfaceType type; | ||
| 264 | VideoCore::Surface::SurfaceTarget target; | ||
| 265 | |||
| 266 | private: | ||
| 267 | /// Returns the size of a given mipmap level inside a layer. | ||
| 268 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; | ||
| 269 | |||
| 270 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 271 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | ||
| 272 | return GetLayerSize(as_host_size, uncompressed) * | ||
| 273 | (layer_only ? 1U : (is_layered ? depth : 1U)); | ||
| 274 | } | ||
| 275 | |||
| 276 | /// Returns the size of a layer | ||
| 277 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | ||
| 278 | |||
| 279 | /// Returns true if these parameters are from a layered surface. | ||
| 280 | bool IsLayered() const; | ||
| 281 | }; | ||
| 282 | |||
| 283 | } // namespace VideoCommon | ||
| 284 | |||
| 285 | namespace std { | ||
| 286 | |||
| 287 | template <> | ||
| 288 | struct hash<VideoCommon::SurfaceParams> { | ||
| 289 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 290 | return k.Hash(); | ||
| 291 | } | ||
| 292 | }; | ||
| 293 | |||
| 294 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp deleted file mode 100644 index 6b5f5984b..000000000 --- a/src/video_core/texture_cache/surface_view.cpp +++ /dev/null | |||
| @@ -1,27 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/texture_cache/surface_view.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | std::size_t ViewParams::Hash() const { | ||
| 13 | return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^ | ||
| 14 | (static_cast<std::size_t>(base_level) << 24) ^ | ||
| 15 | (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); | ||
| 16 | } | ||
| 17 | |||
| 18 | bool ViewParams::operator==(const ViewParams& rhs) const { | ||
| 19 | return std::tie(base_layer, num_layers, base_level, num_levels, target) == | ||
| 20 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); | ||
| 21 | } | ||
| 22 | |||
| 23 | bool ViewParams::operator!=(const ViewParams& rhs) const { | ||
| 24 | return !operator==(rhs); | ||
| 25 | } | ||
| 26 | |||
| 27 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h deleted file mode 100644 index 90a8bb0ae..000000000 --- a/src/video_core/texture_cache/surface_view.h +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | struct ViewParams { | ||
| 16 | constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, | ||
| 17 | u32 num_layers, u32 base_level, u32 num_levels) | ||
| 18 | : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, | ||
| 19 | num_levels{num_levels} {} | ||
| 20 | |||
| 21 | std::size_t Hash() const; | ||
| 22 | |||
| 23 | bool operator==(const ViewParams& rhs) const; | ||
| 24 | bool operator!=(const ViewParams& rhs) const; | ||
| 25 | |||
| 26 | bool IsLayered() const { | ||
| 27 | switch (target) { | ||
| 28 | case VideoCore::Surface::SurfaceTarget::Texture1DArray: | ||
| 29 | case VideoCore::Surface::SurfaceTarget::Texture2DArray: | ||
| 30 | case VideoCore::Surface::SurfaceTarget::TextureCubemap: | ||
| 31 | case VideoCore::Surface::SurfaceTarget::TextureCubeArray: | ||
| 32 | return true; | ||
| 33 | default: | ||
| 34 | return false; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | VideoCore::Surface::SurfaceTarget target{}; | ||
| 39 | u32 base_layer{}; | ||
| 40 | u32 num_layers{}; | ||
| 41 | u32 base_level{}; | ||
| 42 | u32 num_levels{}; | ||
| 43 | }; | ||
| 44 | |||
| 45 | class ViewBase { | ||
| 46 | public: | ||
| 47 | constexpr explicit ViewBase(const ViewParams& params) : params{params} {} | ||
| 48 | |||
| 49 | constexpr const ViewParams& GetViewParams() const { | ||
| 50 | return params; | ||
| 51 | } | ||
| 52 | |||
| 53 | protected: | ||
| 54 | ViewParams params; | ||
| 55 | }; | ||
| 56 | |||
| 57 | } // namespace VideoCommon | ||
| 58 | |||
| 59 | namespace std { | ||
| 60 | |||
| 61 | template <> | ||
| 62 | struct hash<VideoCommon::ViewParams> { | ||
| 63 | std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { | ||
| 64 | return k.Hash(); | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ea835c59f..d1080300f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -6,1299 +6,1454 @@ | |||
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <list> | 9 | #include <bit> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <mutex> | 11 | #include <mutex> |
| 12 | #include <set> | 12 | #include <optional> |
| 13 | #include <tuple> | 13 | #include <span> |
| 14 | #include <type_traits> | ||
| 14 | #include <unordered_map> | 15 | #include <unordered_map> |
| 16 | #include <utility> | ||
| 15 | #include <vector> | 17 | #include <vector> |
| 16 | 18 | ||
| 17 | #include <boost/container/small_vector.hpp> | 19 | #include <boost/container/small_vector.hpp> |
| 18 | #include <boost/icl/interval_map.hpp> | ||
| 19 | #include <boost/range/iterator_range.hpp> | ||
| 20 | 20 | ||
| 21 | #include "common/assert.h" | 21 | #include "common/alignment.h" |
| 22 | #include "common/common_funcs.h" | ||
| 22 | #include "common/common_types.h" | 23 | #include "common/common_types.h" |
| 23 | #include "common/math_util.h" | 24 | #include "common/logging/log.h" |
| 24 | #include "core/core.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "core/settings.h" | ||
| 27 | #include "video_core/compatible_formats.h" | 25 | #include "video_core/compatible_formats.h" |
| 26 | #include "video_core/delayed_destruction_ring.h" | ||
| 28 | #include "video_core/dirty_flags.h" | 27 | #include "video_core/dirty_flags.h" |
| 29 | #include "video_core/engines/fermi_2d.h" | 28 | #include "video_core/engines/fermi_2d.h" |
| 29 | #include "video_core/engines/kepler_compute.h" | ||
| 30 | #include "video_core/engines/maxwell_3d.h" | 30 | #include "video_core/engines/maxwell_3d.h" |
| 31 | #include "video_core/gpu.h" | ||
| 32 | #include "video_core/memory_manager.h" | 31 | #include "video_core/memory_manager.h" |
| 33 | #include "video_core/rasterizer_interface.h" | 32 | #include "video_core/rasterizer_interface.h" |
| 34 | #include "video_core/surface.h" | 33 | #include "video_core/surface.h" |
| 35 | #include "video_core/texture_cache/copy_params.h" | 34 | #include "video_core/texture_cache/descriptor_table.h" |
| 36 | #include "video_core/texture_cache/format_lookup_table.h" | 35 | #include "video_core/texture_cache/format_lookup_table.h" |
| 37 | #include "video_core/texture_cache/surface_base.h" | 36 | #include "video_core/texture_cache/formatter.h" |
| 38 | #include "video_core/texture_cache/surface_params.h" | 37 | #include "video_core/texture_cache/image_base.h" |
| 39 | #include "video_core/texture_cache/surface_view.h" | 38 | #include "video_core/texture_cache/image_info.h" |
| 40 | 39 | #include "video_core/texture_cache/image_view_base.h" | |
| 41 | namespace Tegra::Texture { | 40 | #include "video_core/texture_cache/image_view_info.h" |
| 42 | struct FullTextureInfo; | 41 | #include "video_core/texture_cache/render_targets.h" |
| 43 | } | 42 | #include "video_core/texture_cache/samples_helper.h" |
| 44 | 43 | #include "video_core/texture_cache/slot_vector.h" | |
| 45 | namespace VideoCore { | 44 | #include "video_core/texture_cache/types.h" |
| 46 | class RasterizerInterface; | 45 | #include "video_core/texture_cache/util.h" |
| 47 | } | 46 | #include "video_core/textures/texture.h" |
| 48 | 47 | ||
| 49 | namespace VideoCommon { | 48 | namespace VideoCommon { |
| 50 | 49 | ||
| 51 | using VideoCore::Surface::FormatCompatibility; | 50 | using Tegra::Texture::SwizzleSource; |
| 51 | using Tegra::Texture::TextureType; | ||
| 52 | using Tegra::Texture::TICEntry; | ||
| 53 | using Tegra::Texture::TSCEntry; | ||
| 54 | using VideoCore::Surface::GetFormatType; | ||
| 55 | using VideoCore::Surface::IsCopyCompatible; | ||
| 52 | using VideoCore::Surface::PixelFormat; | 56 | using VideoCore::Surface::PixelFormat; |
| 53 | using VideoCore::Surface::SurfaceTarget; | 57 | using VideoCore::Surface::PixelFormatFromDepthFormat; |
| 54 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | 58 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 59 | using VideoCore::Surface::SurfaceType; | ||
| 55 | 60 | ||
| 56 | template <typename TSurface, typename TView> | 61 | template <class P> |
| 57 | class TextureCache { | 62 | class TextureCache { |
| 58 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | 63 | /// Address shift for caching images into a hash table |
| 64 | static constexpr u64 PAGE_BITS = 20; | ||
| 65 | |||
| 66 | /// Enables debugging features to the texture cache | ||
| 67 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; | ||
| 68 | /// Implement blits as copies between framebuffers | ||
| 69 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | ||
| 70 | /// True when some copies have to be emulated | ||
| 71 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||
| 72 | |||
| 73 | /// Image view ID for null descriptors | ||
| 74 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||
| 75 | /// Sampler ID for bugged sampler ids | ||
| 76 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | ||
| 77 | |||
| 78 | using Runtime = typename P::Runtime; | ||
| 79 | using Image = typename P::Image; | ||
| 80 | using ImageAlloc = typename P::ImageAlloc; | ||
| 81 | using ImageView = typename P::ImageView; | ||
| 82 | using Sampler = typename P::Sampler; | ||
| 83 | using Framebuffer = typename P::Framebuffer; | ||
| 84 | |||
| 85 | struct BlitImages { | ||
| 86 | ImageId dst_id; | ||
| 87 | ImageId src_id; | ||
| 88 | PixelFormat dst_format; | ||
| 89 | PixelFormat src_format; | ||
| 90 | }; | ||
| 91 | |||
| 92 | template <typename T> | ||
| 93 | struct IdentityHash { | ||
| 94 | [[nodiscard]] size_t operator()(T value) const noexcept { | ||
| 95 | return static_cast<size_t>(value); | ||
| 96 | } | ||
| 97 | }; | ||
| 59 | 98 | ||
| 60 | public: | 99 | public: |
| 61 | void InvalidateRegion(VAddr addr, std::size_t size) { | 100 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, |
| 62 | std::lock_guard lock{mutex}; | 101 | Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); |
| 63 | 102 | ||
| 64 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 103 | /// Notify the cache that a new frame has been queued |
| 65 | Unregister(surface); | 104 | void TickFrame(); |
| 66 | } | ||
| 67 | } | ||
| 68 | 105 | ||
| 69 | void OnCPUWrite(VAddr addr, std::size_t size) { | 106 | /// Return an unique mutually exclusive lock for the cache |
| 70 | std::lock_guard lock{mutex}; | 107 | [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); |
| 71 | 108 | ||
| 72 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 109 | /// Return a constant reference to the given image view id |
| 73 | if (surface->IsMemoryMarked()) { | 110 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 74 | UnmarkMemory(surface); | ||
| 75 | surface->SetSyncPending(true); | ||
| 76 | marked_for_unregister.emplace_back(surface); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | } | ||
| 80 | 111 | ||
| 81 | void SyncGuestHost() { | 112 | /// Return a reference to the given image view id |
| 82 | std::lock_guard lock{mutex}; | 113 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; |
| 83 | 114 | ||
| 84 | for (const auto& surface : marked_for_unregister) { | 115 | /// Fill image_view_ids with the graphics images in indices |
| 85 | if (surface->IsRegistered()) { | 116 | void FillGraphicsImageViews(std::span<const u32> indices, |
| 86 | surface->SetSyncPending(false); | 117 | std::span<ImageViewId> image_view_ids); |
| 87 | Unregister(surface); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | marked_for_unregister.clear(); | ||
| 91 | } | ||
| 92 | 118 | ||
| 93 | /** | 119 | /// Fill image_view_ids with the compute images in indices |
| 94 | * Guarantees that rendertargets don't unregister themselves if the | 120 | void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); |
| 95 | * collide. Protection is currently only done on 3D slices. | ||
| 96 | */ | ||
| 97 | void GuardRenderTargets(bool new_guard) { | ||
| 98 | guard_render_targets = new_guard; | ||
| 99 | } | ||
| 100 | 121 | ||
| 101 | void GuardSamplers(bool new_guard) { | 122 | /// Get the sampler from the graphics descriptor table in the specified index |
| 102 | guard_samplers = new_guard; | 123 | Sampler* GetGraphicsSampler(u32 index); |
| 103 | } | ||
| 104 | 124 | ||
| 105 | void FlushRegion(VAddr addr, std::size_t size) { | 125 | /// Get the sampler from the compute descriptor table in the specified index |
| 106 | std::lock_guard lock{mutex}; | 126 | Sampler* GetComputeSampler(u32 index); |
| 107 | 127 | ||
| 108 | auto surfaces = GetSurfacesInRegion(addr, size); | 128 | /// Refresh the state for graphics image view and sampler descriptors |
| 109 | if (surfaces.empty()) { | 129 | void SynchronizeGraphicsDescriptors(); |
| 110 | return; | ||
| 111 | } | ||
| 112 | std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { | ||
| 113 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 114 | }); | ||
| 115 | for (const auto& surface : surfaces) { | ||
| 116 | mutex.unlock(); | ||
| 117 | FlushSurface(surface); | ||
| 118 | mutex.lock(); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | 130 | ||
| 122 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 131 | /// Refresh the state for compute image view and sampler descriptors |
| 123 | std::lock_guard lock{mutex}; | 132 | void SynchronizeComputeDescriptors(); |
| 124 | 133 | ||
| 125 | const auto surfaces = GetSurfacesInRegion(addr, size); | 134 | /// Update bound render targets and upload memory if necessary |
| 126 | return std::any_of(surfaces.cbegin(), surfaces.cend(), | 135 | /// @param is_clear True when the render targets are being used for clears |
| 127 | [](const TSurface& surface) { return surface->IsModified(); }); | 136 | void UpdateRenderTargets(bool is_clear); |
| 128 | } | ||
| 129 | 137 | ||
| 130 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | 138 | /// Find a framebuffer with the currently bound render targets |
| 131 | const VideoCommon::Shader::Sampler& entry) { | 139 | /// UpdateRenderTargets should be called before this |
| 132 | std::lock_guard lock{mutex}; | 140 | Framebuffer* GetFramebuffer(); |
| 133 | const auto gpu_addr{tic.Address()}; | ||
| 134 | if (!gpu_addr) { | ||
| 135 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 136 | } | ||
| 137 | 141 | ||
| 138 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 142 | /// Mark images in a range as modified from the CPU |
| 139 | if (!cpu_addr) { | 143 | void WriteMemory(VAddr cpu_addr, size_t size); |
| 140 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 141 | } | ||
| 142 | 144 | ||
| 143 | if (!IsTypeCompatible(tic.texture_type, entry)) { | 145 | /// Download contents of host images to guest memory in a region |
| 144 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 146 | void DownloadMemory(VAddr cpu_addr, size_t size); |
| 145 | } | ||
| 146 | 147 | ||
| 147 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 148 | /// Remove images in a region |
| 148 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | 149 | void UnmapMemory(VAddr cpu_addr, size_t size); |
| 149 | if (guard_samplers) { | ||
| 150 | sampled_textures.push_back(surface); | ||
| 151 | } | ||
| 152 | return view; | ||
| 153 | } | ||
| 154 | 150 | ||
| 155 | TView GetImageSurface(const Tegra::Texture::TICEntry& tic, | 151 | /// Blit an image with the given parameters |
| 156 | const VideoCommon::Shader::Image& entry) { | 152 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 157 | std::lock_guard lock{mutex}; | 153 | const Tegra::Engines::Fermi2D::Surface& src, |
| 158 | const auto gpu_addr{tic.Address()}; | 154 | const Tegra::Engines::Fermi2D::Config& copy); |
| 159 | if (!gpu_addr) { | ||
| 160 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 161 | } | ||
| 162 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 163 | if (!cpu_addr) { | ||
| 164 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 165 | } | ||
| 166 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; | ||
| 167 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); | ||
| 168 | if (guard_samplers) { | ||
| 169 | sampled_textures.push_back(surface); | ||
| 170 | } | ||
| 171 | return view; | ||
| 172 | } | ||
| 173 | 155 | ||
| 174 | bool TextureBarrier() { | 156 | /// Invalidate the contents of the color buffer index |
| 175 | const bool any_rt = | 157 | /// These contents become unspecified, the cache can assume aggressive optimizations. |
| 176 | std::any_of(sampled_textures.begin(), sampled_textures.end(), | 158 | void InvalidateColorBuffer(size_t index); |
| 177 | [](const auto& surface) { return surface->IsRenderTarget(); }); | ||
| 178 | sampled_textures.clear(); | ||
| 179 | return any_rt; | ||
| 180 | } | ||
| 181 | 159 | ||
| 182 | TView GetDepthBufferSurface(bool preserve_contents) { | 160 | /// Invalidate the contents of the depth buffer |
| 183 | std::lock_guard lock{mutex}; | 161 | /// These contents become unspecified, the cache can assume aggressive optimizations. |
| 184 | auto& dirty = maxwell3d.dirty; | 162 | void InvalidateDepthBuffer(); |
| 185 | if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { | ||
| 186 | return depth_buffer.view; | ||
| 187 | } | ||
| 188 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; | ||
| 189 | 163 | ||
| 190 | const auto& regs{maxwell3d.regs}; | 164 | /// Try to find a cached image view in the given CPU address |
| 191 | const auto gpu_addr{regs.zeta.Address()}; | 165 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); |
| 192 | if (!gpu_addr || !regs.zeta_enable) { | ||
| 193 | SetEmptyDepthBuffer(); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 197 | if (!cpu_addr) { | ||
| 198 | SetEmptyDepthBuffer(); | ||
| 199 | return {}; | ||
| 200 | } | ||
| 201 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; | ||
| 202 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); | ||
| 203 | if (depth_buffer.target) | ||
| 204 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 205 | depth_buffer.target = surface_view.first; | ||
| 206 | depth_buffer.view = surface_view.second; | ||
| 207 | if (depth_buffer.target) | ||
| 208 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); | ||
| 209 | return surface_view.second; | ||
| 210 | } | ||
| 211 | |||
| 212 | TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 213 | std::lock_guard lock{mutex}; | ||
| 214 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 215 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { | ||
| 216 | return render_targets[index].view; | ||
| 217 | } | ||
| 218 | maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; | ||
| 219 | 166 | ||
| 220 | const auto& regs{maxwell3d.regs}; | 167 | /// Return true when there are uncommitted images to be downloaded |
| 221 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | 168 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| 222 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 223 | SetEmptyColorBuffer(index); | ||
| 224 | return {}; | ||
| 225 | } | ||
| 226 | 169 | ||
| 227 | const auto& config{regs.rt[index]}; | 170 | /// Return true when the caller should wait for async downloads |
| 228 | const auto gpu_addr{config.Address()}; | 171 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
| 229 | if (!gpu_addr) { | ||
| 230 | SetEmptyColorBuffer(index); | ||
| 231 | return {}; | ||
| 232 | } | ||
| 233 | 172 | ||
| 234 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 173 | /// Commit asynchronous downloads |
| 235 | if (!cpu_addr) { | 174 | void CommitAsyncFlushes(); |
| 236 | SetEmptyColorBuffer(index); | 175 | |
| 237 | return {}; | 176 | /// Pop asynchronous downloads |
| 238 | } | 177 | void PopAsyncFlushes(); |
| 178 | |||
| 179 | /// Return true when a CPU region is modified from the GPU | ||
| 180 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 239 | 181 | ||
| 240 | auto surface_view = | 182 | private: |
| 241 | GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), | 183 | /// Iterate over all page indices in a range |
| 242 | preserve_contents, true); | 184 | template <typename Func> |
| 243 | if (render_targets[index].target) { | 185 | static void ForEachPage(VAddr addr, size_t size, Func&& func) { |
| 244 | auto& surface = render_targets[index].target; | 186 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |
| 245 | surface->MarkAsRenderTarget(false, NO_RT); | 187 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; |
| 246 | const auto& cr_params = surface->GetSurfaceParams(); | 188 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { |
| 247 | if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 189 | if constexpr (RETURNS_BOOL) { |
| 248 | AsyncFlushSurface(surface); | 190 | if (func(page)) { |
| 191 | break; | ||
| 192 | } | ||
| 193 | } else { | ||
| 194 | func(page); | ||
| 249 | } | 195 | } |
| 250 | } | 196 | } |
| 251 | render_targets[index].target = surface_view.first; | ||
| 252 | render_targets[index].view = surface_view.second; | ||
| 253 | if (render_targets[index].target) | ||
| 254 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); | ||
| 255 | return surface_view.second; | ||
| 256 | } | 197 | } |
| 257 | 198 | ||
| 258 | void MarkColorBufferInUse(std::size_t index) { | 199 | /// Fills image_view_ids in the image views in indices |
| 259 | if (auto& render_target = render_targets[index].target) { | 200 | void FillImageViews(DescriptorTable<TICEntry>& table, |
| 260 | render_target->MarkAsModified(true, Tick()); | 201 | std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, |
| 261 | } | 202 | std::span<ImageViewId> image_view_ids); |
| 262 | } | ||
| 263 | 203 | ||
| 264 | void MarkDepthBufferInUse() { | 204 | /// Find or create an image view in the guest descriptor table |
| 265 | if (depth_buffer.target) { | 205 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, |
| 266 | depth_buffer.target->MarkAsModified(true, Tick()); | 206 | std::span<ImageViewId> cached_image_view_ids, u32 index); |
| 267 | } | ||
| 268 | } | ||
| 269 | 207 | ||
| 270 | void SetEmptyDepthBuffer() { | 208 | /// Find or create a framebuffer with the given render target parameters |
| 271 | if (depth_buffer.target == nullptr) { | 209 | FramebufferId GetFramebufferId(const RenderTargets& key); |
| 272 | return; | ||
| 273 | } | ||
| 274 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||
| 275 | depth_buffer.target = nullptr; | ||
| 276 | depth_buffer.view = nullptr; | ||
| 277 | } | ||
| 278 | 210 | ||
| 279 | void SetEmptyColorBuffer(std::size_t index) { | 211 | /// Refresh the contents (pixel data) of an image |
| 280 | if (render_targets[index].target == nullptr) { | 212 | void RefreshContents(Image& image); |
| 281 | return; | ||
| 282 | } | ||
| 283 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||
| 284 | render_targets[index].target = nullptr; | ||
| 285 | render_targets[index].view = nullptr; | ||
| 286 | } | ||
| 287 | |||
| 288 | void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 289 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 290 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 291 | std::lock_guard lock{mutex}; | ||
| 292 | SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | ||
| 293 | SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | ||
| 294 | const GPUVAddr src_gpu_addr = src_config.Address(); | ||
| 295 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | ||
| 296 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | ||
| 297 | |||
| 298 | const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); | ||
| 299 | const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); | ||
| 300 | std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | ||
| 301 | TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; | ||
| 302 | ImageBlit(src_surface, dst_surface.second, copy_config); | ||
| 303 | dst_surface.first->MarkAsModified(true, Tick()); | ||
| 304 | } | ||
| 305 | |||
| 306 | TSurface TryFindFramebufferSurface(VAddr addr) const { | ||
| 307 | if (!addr) { | ||
| 308 | return nullptr; | ||
| 309 | } | ||
| 310 | const VAddr page = addr >> registry_page_bits; | ||
| 311 | const auto it = registry.find(page); | ||
| 312 | if (it == registry.end()) { | ||
| 313 | return nullptr; | ||
| 314 | } | ||
| 315 | const auto& list = it->second; | ||
| 316 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 317 | return surface->GetCpuAddr() == addr; | ||
| 318 | }); | ||
| 319 | return found != list.end() ? *found : nullptr; | ||
| 320 | } | ||
| 321 | 213 | ||
| 322 | u64 Tick() { | 214 | /// Upload data from guest to an image |
| 323 | return ++ticks; | 215 | template <typename MapBuffer> |
| 324 | } | 216 | void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); |
| 325 | 217 | ||
| 326 | void CommitAsyncFlushes() { | 218 | /// Find or create an image view from a guest descriptor |
| 327 | committed_flushes.push_back(uncommitted_flushes); | 219 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| 328 | uncommitted_flushes.reset(); | ||
| 329 | } | ||
| 330 | 220 | ||
| 331 | bool HasUncommittedFlushes() const { | 221 | /// Create a new image view from a guest descriptor |
| 332 | return uncommitted_flushes != nullptr; | 222 | [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); |
| 333 | } | ||
| 334 | 223 | ||
| 335 | bool ShouldWaitAsyncFlushes() const { | 224 | /// Find or create an image from the given parameters |
| 336 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | 225 | [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 337 | } | 226 | RelaxedOptions options = RelaxedOptions{}); |
| 338 | 227 | ||
| 339 | void PopAsyncFlushes() { | 228 | /// Find an image from the given parameters |
| 340 | if (committed_flushes.empty()) { | 229 | [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 341 | return; | 230 | RelaxedOptions options); |
| 342 | } | ||
| 343 | auto& flush_list = committed_flushes.front(); | ||
| 344 | if (!flush_list) { | ||
| 345 | committed_flushes.pop_front(); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | for (TSurface& surface : *flush_list) { | ||
| 349 | FlushSurface(surface); | ||
| 350 | } | ||
| 351 | committed_flushes.pop_front(); | ||
| 352 | } | ||
| 353 | 231 | ||
| 354 | protected: | 232 | /// Create an image from the given parameters |
| 355 | explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, | 233 | [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 356 | Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, | 234 | RelaxedOptions options); |
| 357 | bool is_astc_supported_) | ||
| 358 | : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 359 | gpu_memory{gpu_memory_} { | ||
| 360 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||
| 361 | SetEmptyColorBuffer(i); | ||
| 362 | } | ||
| 363 | 235 | ||
| 364 | SetEmptyDepthBuffer(); | 236 | /// Create a new image and join perfectly matching existing images |
| 365 | staging_cache.SetSize(2); | 237 | /// Remove joined images from the cache |
| 238 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 366 | 239 | ||
| 367 | const auto make_siblings = [this](PixelFormat a, PixelFormat b) { | 240 | /// Return a blit image pair from the given guest blit parameters |
| 368 | siblings_table[static_cast<std::size_t>(a)] = b; | 241 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, |
| 369 | siblings_table[static_cast<std::size_t>(b)] = a; | 242 | const Tegra::Engines::Fermi2D::Surface& src); |
| 370 | }; | ||
| 371 | std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); | ||
| 372 | make_siblings(PixelFormat::D16_UNORM, PixelFormat::R16_UNORM); | ||
| 373 | make_siblings(PixelFormat::D32_FLOAT, PixelFormat::R32_FLOAT); | ||
| 374 | make_siblings(PixelFormat::D32_FLOAT_S8_UINT, PixelFormat::R32G32_FLOAT); | ||
| 375 | 243 | ||
| 376 | sampled_textures.reserve(64); | 244 | /// Find or create a sampler from a guest descriptor sampler |
| 377 | } | 245 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
| 378 | 246 | ||
| 379 | ~TextureCache() = default; | 247 | /// Find or create an image view for the given color buffer index |
| 248 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | ||
| 380 | 249 | ||
| 381 | virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; | 250 | /// Find or create an image view for the depth buffer |
| 251 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | ||
| 382 | 252 | ||
| 383 | virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, | 253 | /// Find or create a view for a render target with the given image parameters |
| 384 | const CopyParams& copy_params) = 0; | 254 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 255 | bool is_clear); | ||
| 385 | 256 | ||
| 386 | virtual void ImageBlit(TView& src_view, TView& dst_view, | 257 | /// Iterates over all the images in a region calling func |
| 387 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | 258 | template <typename Func> |
| 259 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||
| 388 | 260 | ||
| 389 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | 261 | /// Find or create an image view in the given image with the passed parameters |
| 390 | // and reading it from a separate buffer. | 262 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); |
| 391 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | ||
| 392 | 263 | ||
| 393 | void ManageRenderTargetUnregister(TSurface& surface) { | 264 | /// Register image in the page table |
| 394 | auto& dirty = maxwell3d.dirty; | 265 | void RegisterImage(ImageId image); |
| 395 | const u32 index = surface->GetRenderTarget(); | 266 | |
| 396 | if (index == DEPTH_RT) { | 267 | /// Unregister image from the page table |
| 397 | dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; | 268 | void UnregisterImage(ImageId image); |
| 398 | } else { | 269 | |
| 399 | dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; | 270 | /// Track CPU reads and writes for image |
| 400 | } | 271 | void TrackImage(ImageBase& image); |
| 401 | dirty.flags[VideoCommon::Dirty::RenderTargets] = true; | 272 | |
| 273 | /// Stop tracking CPU reads and writes for image | ||
| 274 | void UntrackImage(ImageBase& image); | ||
| 275 | |||
| 276 | /// Delete image from the cache | ||
| 277 | void DeleteImage(ImageId image); | ||
| 278 | |||
| 279 | /// Remove image views references from the cache | ||
| 280 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); | ||
| 281 | |||
| 282 | /// Remove framebuffers using the given image views from the cache | ||
| 283 | void RemoveFramebuffers(std::span<const ImageViewId> removed_views); | ||
| 284 | |||
| 285 | /// Mark an image as modified from the GPU | ||
| 286 | void MarkModification(ImageBase& image) noexcept; | ||
| 287 | |||
| 288 | /// Synchronize image aliases, copying data if needed | ||
| 289 | void SynchronizeAliases(ImageId image_id); | ||
| 290 | |||
| 291 | /// Prepare an image to be used | ||
| 292 | void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); | ||
| 293 | |||
| 294 | /// Prepare an image view to be used | ||
| 295 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); | ||
| 296 | |||
| 297 | /// Execute copies from one image to the other, even if they are incompatible | ||
| 298 | void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); | ||
| 299 | |||
| 300 | /// Bind an image view as render target, downloading resources preemtively if needed | ||
| 301 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); | ||
| 302 | |||
| 303 | /// Create a render target from a given image and image view parameters | ||
| 304 | [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( | ||
| 305 | ImageId, const ImageViewInfo& view_info); | ||
| 306 | |||
| 307 | /// Returns true if the current clear parameters clear the whole image of a given image view | ||
| 308 | [[nodiscard]] bool IsFullClear(ImageViewId id); | ||
| 309 | |||
| 310 | Runtime& runtime; | ||
| 311 | VideoCore::RasterizerInterface& rasterizer; | ||
| 312 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 313 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 314 | Tegra::MemoryManager& gpu_memory; | ||
| 315 | |||
| 316 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | ||
| 317 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | ||
| 318 | std::vector<SamplerId> graphics_sampler_ids; | ||
| 319 | std::vector<ImageViewId> graphics_image_view_ids; | ||
| 320 | |||
| 321 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; | ||
| 322 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; | ||
| 323 | std::vector<SamplerId> compute_sampler_ids; | ||
| 324 | std::vector<ImageViewId> compute_image_view_ids; | ||
| 325 | |||
| 326 | RenderTargets render_targets; | ||
| 327 | |||
| 328 | std::mutex mutex; | ||
| 329 | |||
| 330 | std::unordered_map<TICEntry, ImageViewId> image_views; | ||
| 331 | std::unordered_map<TSCEntry, SamplerId> samplers; | ||
| 332 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||
| 333 | |||
| 334 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | ||
| 335 | |||
| 336 | bool has_deleted_images = false; | ||
| 337 | |||
| 338 | SlotVector<Image> slot_images; | ||
| 339 | SlotVector<ImageView> slot_image_views; | ||
| 340 | SlotVector<ImageAlloc> slot_image_allocs; | ||
| 341 | SlotVector<Sampler> slot_samplers; | ||
| 342 | SlotVector<Framebuffer> slot_framebuffers; | ||
| 343 | |||
| 344 | // TODO: This data structure is not optimal and it should be reworked | ||
| 345 | std::vector<ImageId> uncommitted_downloads; | ||
| 346 | std::queue<std::vector<ImageId>> committed_downloads; | ||
| 347 | |||
| 348 | static constexpr size_t TICKS_TO_DESTROY = 6; | ||
| 349 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | ||
| 350 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | ||
| 351 | DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; | ||
| 352 | |||
| 353 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | ||
| 354 | |||
| 355 | u64 modification_tick = 0; | ||
| 356 | u64 frame_tick = 0; | ||
| 357 | }; | ||
| 358 | |||
| 359 | template <class P> | ||
| 360 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 361 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 362 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 363 | Tegra::MemoryManager& gpu_memory_) | ||
| 364 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 365 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 366 | // Configure null sampler | ||
| 367 | TSCEntry sampler_descriptor{}; | ||
| 368 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 369 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 370 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||
| 371 | sampler_descriptor.cubemap_anisotropy.Assign(1); | ||
| 372 | |||
| 373 | // Make sure the first index is reserved for the null resources | ||
| 374 | // This way the null resource becomes a compile time constant | ||
| 375 | void(slot_image_views.insert(runtime, NullImageParams{})); | ||
| 376 | void(slot_samplers.insert(runtime, sampler_descriptor)); | ||
| 377 | } | ||
| 378 | |||
| 379 | template <class P> | ||
| 380 | void TextureCache<P>::TickFrame() { | ||
| 381 | // Tick sentenced resources in this order to ensure they are destroyed in the right order | ||
| 382 | sentenced_images.Tick(); | ||
| 383 | sentenced_framebuffers.Tick(); | ||
| 384 | sentenced_image_view.Tick(); | ||
| 385 | ++frame_tick; | ||
| 386 | } | ||
| 387 | |||
| 388 | template <class P> | ||
| 389 | std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { | ||
| 390 | return std::unique_lock{mutex}; | ||
| 391 | } | ||
| 392 | |||
| 393 | template <class P> | ||
| 394 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||
| 395 | return slot_image_views[id]; | ||
| 396 | } | ||
| 397 | |||
| 398 | template <class P> | ||
| 399 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | ||
| 400 | return slot_image_views[id]; | ||
| 401 | } | ||
| 402 | |||
| 403 | template <class P> | ||
| 404 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | ||
| 405 | std::span<ImageViewId> image_view_ids) { | ||
| 406 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 407 | } | ||
| 408 | |||
| 409 | template <class P> | ||
| 410 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | ||
| 411 | std::span<ImageViewId> image_view_ids) { | ||
| 412 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 413 | } | ||
| 414 | |||
| 415 | template <class P> | ||
| 416 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | ||
| 417 | [[unlikely]] if (index > graphics_sampler_table.Limit()) { | ||
| 418 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); | ||
| 419 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 420 | } | ||
| 421 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 422 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 423 | [[unlikely]] if (is_new) { | ||
| 424 | id = FindSampler(descriptor); | ||
| 402 | } | 425 | } |
| 426 | return &slot_samplers[id]; | ||
| 427 | } | ||
| 403 | 428 | ||
| 404 | void Register(TSurface surface) { | 429 | template <class P> |
| 405 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 430 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
| 406 | const std::size_t size = surface->GetSizeInBytes(); | 431 | [[unlikely]] if (index > compute_sampler_table.Limit()) { |
| 407 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 432 | LOG_ERROR(HW_GPU, "Invalid sampler index={}", index); |
| 408 | if (!cpu_addr) { | 433 | return &slot_samplers[NULL_SAMPLER_ID]; |
| 409 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | 434 | } |
| 410 | gpu_addr); | 435 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); |
| 411 | return; | 436 | SamplerId& id = compute_sampler_ids[index]; |
| 412 | } | 437 | [[unlikely]] if (is_new) { |
| 413 | surface->SetCpuAddr(*cpu_addr); | 438 | id = FindSampler(descriptor); |
| 414 | RegisterInnerCache(surface); | ||
| 415 | surface->MarkAsRegistered(true); | ||
| 416 | surface->SetMemoryMarked(true); | ||
| 417 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 418 | } | 439 | } |
| 440 | return &slot_samplers[id]; | ||
| 441 | } | ||
| 419 | 442 | ||
| 420 | void UnmarkMemory(TSurface surface) { | 443 | template <class P> |
| 421 | if (!surface->IsMemoryMarked()) { | 444 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { |
| 422 | return; | 445 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; |
| 423 | } | 446 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; |
| 424 | const std::size_t size = surface->GetSizeInBytes(); | 447 | const u32 tic_limit = maxwell3d.regs.tic.limit; |
| 425 | const VAddr cpu_addr = surface->GetCpuAddr(); | 448 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; |
| 426 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 449 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { |
| 427 | surface->SetMemoryMarked(false); | 450 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 428 | } | 451 | } |
| 452 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 453 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 454 | } | ||
| 455 | } | ||
| 429 | 456 | ||
| 430 | void Unregister(TSurface surface) { | 457 | template <class P> |
| 431 | if (guard_render_targets && surface->IsProtected()) { | 458 | void TextureCache<P>::SynchronizeComputeDescriptors() { |
| 432 | return; | 459 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; |
| 433 | } | 460 | const u32 tic_limit = kepler_compute.regs.tic.limit; |
| 434 | if (!guard_render_targets && surface->IsRenderTarget()) { | 461 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; |
| 435 | ManageRenderTargetUnregister(surface); | 462 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); |
| 436 | } | 463 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { |
| 437 | UnmarkMemory(surface); | 464 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
| 438 | if (surface->IsSyncPending()) { | ||
| 439 | marked_for_unregister.remove(surface); | ||
| 440 | surface->SetSyncPending(false); | ||
| 441 | } | ||
| 442 | UnregisterInnerCache(surface); | ||
| 443 | surface->MarkAsRegistered(false); | ||
| 444 | ReserveSurface(surface->GetSurfaceParams(), surface); | ||
| 445 | } | 465 | } |
| 466 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 467 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 468 | } | ||
| 469 | } | ||
| 446 | 470 | ||
| 447 | TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | 471 | template <class P> |
| 448 | if (const auto surface = TryGetReservedSurface(params); surface) { | 472 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
| 449 | surface->SetGpuAddr(gpu_addr); | 473 | using namespace VideoCommon::Dirty; |
| 450 | return surface; | 474 | auto& flags = maxwell3d.dirty.flags; |
| 451 | } | 475 | if (!flags[Dirty::RenderTargets]) { |
| 452 | // No reserved surface available, create a new one and reserve it | 476 | return; |
| 453 | auto new_surface{CreateSurface(gpu_addr, params)}; | ||
| 454 | return new_surface; | ||
| 455 | } | 477 | } |
| 478 | flags[Dirty::RenderTargets] = false; | ||
| 456 | 479 | ||
| 457 | const bool is_astc_supported; | 480 | // Render target control is used on all render targets, so force look ups when this one is up |
| 481 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 482 | flags[Dirty::RenderTargetControl] = false; | ||
| 458 | 483 | ||
| 459 | private: | 484 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 460 | enum class RecycleStrategy : u32 { | 485 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 461 | Ignore = 0, | 486 | if (flags[Dirty::ColorBuffer0 + index] || force) { |
| 462 | Flush = 1, | 487 | flags[Dirty::ColorBuffer0 + index] = false; |
| 463 | BufferCopy = 3, | 488 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); |
| 464 | }; | 489 | } |
| 490 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 491 | } | ||
| 492 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 493 | flags[Dirty::ZetaBuffer] = false; | ||
| 494 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 495 | } | ||
| 496 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 497 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 465 | 498 | ||
| 466 | enum class DeductionType : u32 { | 499 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 467 | DeductionComplete, | 500 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); |
| 468 | DeductionIncomplete, | 501 | } |
| 469 | DeductionFailed, | 502 | render_targets.size = Extent2D{ |
| 503 | maxwell3d.regs.render_area.width, | ||
| 504 | maxwell3d.regs.render_area.height, | ||
| 470 | }; | 505 | }; |
| 506 | } | ||
| 471 | 507 | ||
| 472 | struct Deduction { | 508 | template <class P> |
| 473 | DeductionType type{DeductionType::DeductionFailed}; | 509 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { |
| 474 | TSurface surface{}; | 510 | return &slot_framebuffers[GetFramebufferId(render_targets)]; |
| 511 | } | ||
| 475 | 512 | ||
| 476 | bool Failed() const { | 513 | template <class P> |
| 477 | return type == DeductionType::DeductionFailed; | 514 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, |
| 478 | } | 515 | std::span<ImageViewId> cached_image_view_ids, |
| 516 | std::span<const u32> indices, | ||
| 517 | std::span<ImageViewId> image_view_ids) { | ||
| 518 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 519 | do { | ||
| 520 | has_deleted_images = false; | ||
| 521 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 522 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 523 | }); | ||
| 524 | } while (has_deleted_images); | ||
| 525 | } | ||
| 479 | 526 | ||
| 480 | bool Incomplete() const { | 527 | template <class P> |
| 481 | return type == DeductionType::DeductionIncomplete; | 528 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, |
| 482 | } | 529 | std::span<ImageViewId> cached_image_view_ids, |
| 530 | u32 index) { | ||
| 531 | if (index > table.Limit()) { | ||
| 532 | LOG_ERROR(HW_GPU, "Invalid image view index={}", index); | ||
| 533 | return NULL_IMAGE_VIEW_ID; | ||
| 534 | } | ||
| 535 | const auto [descriptor, is_new] = table.Read(index); | ||
| 536 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 537 | if (is_new) { | ||
| 538 | image_view_id = FindImageView(descriptor); | ||
| 539 | } | ||
| 540 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 541 | PrepareImageView(image_view_id, false, false); | ||
| 542 | } | ||
| 543 | return image_view_id; | ||
| 544 | } | ||
| 483 | 545 | ||
| 484 | bool IsDepth() const { | 546 | template <class P> |
| 485 | return surface->GetSurfaceParams().IsPixelFormatZeta(); | 547 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { |
| 486 | } | 548 | const auto [pair, is_new] = framebuffers.try_emplace(key); |
| 487 | }; | 549 | FramebufferId& framebuffer_id = pair->second; |
| 550 | if (!is_new) { | ||
| 551 | return framebuffer_id; | ||
| 552 | } | ||
| 553 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 554 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 555 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 556 | ImageView* const depth_buffer = | ||
| 557 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 558 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 559 | return framebuffer_id; | ||
| 560 | } | ||
| 488 | 561 | ||
| 489 | /** | 562 | template <class P> |
| 490 | * Takes care of selecting a proper strategy to deal with a texture recycle. | 563 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { |
| 491 | * | 564 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { |
| 492 | * @param overlaps The overlapping surfaces registered in the cache. | 565 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 493 | * @param params The parameters on the new surface. | 566 | return; |
| 494 | * @param gpu_addr The starting address of the new surface. | ||
| 495 | * @param untopological Indicates to the recycler that the texture has no way | ||
| 496 | * to match the overlaps due to topological reasons. | ||
| 497 | **/ | ||
| 498 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, | ||
| 499 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | ||
| 500 | if (Settings::IsGPULevelExtreme()) { | ||
| 501 | return RecycleStrategy::Flush; | ||
| 502 | } | ||
| 503 | // 3D Textures decision | ||
| 504 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 505 | return RecycleStrategy::Flush; | ||
| 506 | } | ||
| 507 | for (const auto& s : overlaps) { | ||
| 508 | const auto& s_params = s->GetSurfaceParams(); | ||
| 509 | if (s_params.target == SurfaceTarget::Texture3D) { | ||
| 510 | return RecycleStrategy::Flush; | ||
| 511 | } | ||
| 512 | } | ||
| 513 | // Untopological decision | ||
| 514 | if (untopological == MatchTopologyResult::CompressUnmatch) { | ||
| 515 | return RecycleStrategy::Flush; | ||
| 516 | } | ||
| 517 | if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { | ||
| 518 | return RecycleStrategy::Flush; | ||
| 519 | } | ||
| 520 | return RecycleStrategy::Ignore; | ||
| 521 | } | ||
| 522 | |||
| 523 | /** | ||
| 524 | * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented | ||
| 525 | * strategies: Ignore and Flush. | ||
| 526 | * | ||
| 527 | * - Ignore: Just unregisters all the overlaps and loads the new texture. | ||
| 528 | * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. | ||
| 529 | * | ||
| 530 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 531 | * @param params The parameters for the new surface. | ||
| 532 | * @param gpu_addr The starting address of the new surface. | ||
| 533 | * @param preserve_contents Indicates that the new surface should be loaded from memory or left | ||
| 534 | * blank. | ||
| 535 | * @param untopological Indicates to the recycler that the texture has no way to match the | ||
| 536 | * overlaps due to topological reasons. | ||
| 537 | **/ | ||
| 538 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, | ||
| 539 | const GPUVAddr gpu_addr, const bool preserve_contents, | ||
| 540 | const MatchTopologyResult untopological) { | ||
| 541 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | ||
| 542 | for (auto& surface : overlaps) { | ||
| 543 | Unregister(surface); | ||
| 544 | } | ||
| 545 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | ||
| 546 | case RecycleStrategy::Ignore: { | ||
| 547 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 548 | } | ||
| 549 | case RecycleStrategy::Flush: { | ||
| 550 | std::sort(overlaps.begin(), overlaps.end(), | ||
| 551 | [](const TSurface& a, const TSurface& b) -> bool { | ||
| 552 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 553 | }); | ||
| 554 | for (auto& surface : overlaps) { | ||
| 555 | FlushSurface(surface); | ||
| 556 | } | ||
| 557 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 558 | } | 567 | } |
| 559 | case RecycleStrategy::BufferCopy: { | 568 | image.flags |= ImageFlagBits::CpuModified; |
| 560 | auto new_surface = GetUncachedSurface(gpu_addr, params); | 569 | UntrackImage(image); |
| 561 | BufferCopy(overlaps[0], new_surface); | 570 | }); |
| 562 | return {new_surface, new_surface->GetMainView()}; | 571 | } |
| 572 | |||
| 573 | template <class P> | ||
| 574 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||
| 575 | std::vector<ImageId> images; | ||
| 576 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | ||
| 577 | // Skip images that were not modified from the GPU | ||
| 578 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 579 | return; | ||
| 563 | } | 580 | } |
| 564 | default: { | 581 | // Skip images that .are. modified from the CPU |
| 565 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | 582 | // We don't want to write sensitive data from the guest |
| 566 | return InitializeSurface(gpu_addr, params, do_load); | 583 | if (True(image.flags & ImageFlagBits::CpuModified)) { |
| 584 | return; | ||
| 567 | } | 585 | } |
| 586 | if (image.info.num_samples > 1) { | ||
| 587 | LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); | ||
| 588 | return; | ||
| 568 | } | 589 | } |
| 590 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 591 | images.push_back(image_id); | ||
| 592 | }); | ||
| 593 | if (images.empty()) { | ||
| 594 | return; | ||
| 595 | } | ||
| 596 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 597 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 598 | }); | ||
| 599 | for (const ImageId image_id : images) { | ||
| 600 | Image& image = slot_images[image_id]; | ||
| 601 | auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); | ||
| 602 | const auto copies = FullDownloadCopies(image.info); | ||
| 603 | image.DownloadMemory(map, 0, copies); | ||
| 604 | runtime.Finish(); | ||
| 605 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); | ||
| 569 | } | 606 | } |
| 607 | } | ||
| 570 | 608 | ||
| 571 | /** | 609 | template <class P> |
| 572 | * Takes a single surface and recreates into another that may differ in | 610 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 573 | * format, target or width alignment. | 611 | std::vector<ImageId> deleted_images; |
| 574 | * | 612 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 575 | * @param current_surface The registered surface in the cache which we want to convert. | 613 | for (const ImageId id : deleted_images) { |
| 576 | * @param params The new surface params which we'll use to recreate the surface. | 614 | Image& image = slot_images[id]; |
| 577 | * @param is_render Whether or not the surface is a render target. | 615 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 578 | **/ | 616 | UntrackImage(image); |
| 579 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, | 617 | } |
| 580 | bool is_render) { | 618 | UnregisterImage(id); |
| 581 | const auto gpu_addr = current_surface->GetGpuAddr(); | 619 | DeleteImage(id); |
| 582 | const auto& cr_params = current_surface->GetSurfaceParams(); | 620 | } |
| 583 | TSurface new_surface; | 621 | } |
| 584 | if (cr_params.pixel_format != params.pixel_format && !is_render && | ||
| 585 | GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { | ||
| 586 | SurfaceParams new_params = params; | ||
| 587 | new_params.pixel_format = cr_params.pixel_format; | ||
| 588 | new_params.type = cr_params.type; | ||
| 589 | new_surface = GetUncachedSurface(gpu_addr, new_params); | ||
| 590 | } else { | ||
| 591 | new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 592 | } | ||
| 593 | const SurfaceParams& final_params = new_surface->GetSurfaceParams(); | ||
| 594 | if (cr_params.type != final_params.type) { | ||
| 595 | if (Settings::IsGPULevelExtreme()) { | ||
| 596 | BufferCopy(current_surface, new_surface); | ||
| 597 | } | ||
| 598 | } else { | ||
| 599 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | ||
| 600 | for (auto& brick : bricks) { | ||
| 601 | TryCopyImage(current_surface, new_surface, brick); | ||
| 602 | } | ||
| 603 | } | ||
| 604 | Unregister(current_surface); | ||
| 605 | Register(new_surface); | ||
| 606 | new_surface->MarkAsModified(current_surface->IsModified(), Tick()); | ||
| 607 | return {new_surface, new_surface->GetMainView()}; | ||
| 608 | } | ||
| 609 | |||
| 610 | /** | ||
| 611 | * Takes a single surface and checks with the new surface's params if it's an exact | ||
| 612 | * match, we return the main view of the registered surface. If its formats don't | ||
| 613 | * match, we rebuild the surface. We call this last method a `Mirage`. If formats | ||
| 614 | * match but the targets don't, we create an overview View of the registered surface. | ||
| 615 | * | ||
| 616 | * @param current_surface The registered surface in the cache which we want to convert. | ||
| 617 | * @param params The new surface params which we want to check. | ||
| 618 | * @param is_render Whether or not the surface is a render target. | ||
| 619 | **/ | ||
| 620 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, | ||
| 621 | const SurfaceParams& params, bool is_render) { | ||
| 622 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 623 | const bool matches_target = current_surface->MatchTarget(params.target); | ||
| 624 | const auto match_check = [&]() -> std::pair<TSurface, TView> { | ||
| 625 | if (matches_target) { | ||
| 626 | return {current_surface, current_surface->GetMainView()}; | ||
| 627 | } | ||
| 628 | return {current_surface, current_surface->EmplaceOverview(params)}; | ||
| 629 | }; | ||
| 630 | if (!is_mirage) { | ||
| 631 | return match_check(); | ||
| 632 | } | ||
| 633 | if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { | ||
| 634 | return match_check(); | ||
| 635 | } | ||
| 636 | return RebuildSurface(current_surface, params, is_render); | ||
| 637 | } | ||
| 638 | |||
| 639 | /** | ||
| 640 | * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate | ||
| 641 | * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps | ||
| 642 | * of the new surface, if they all match we end up recreating a surface for them, | ||
| 643 | * else we return nothing. | ||
| 644 | * | ||
| 645 | * @param overlaps The overlapping surfaces registered in the cache. | ||
| 646 | * @param params The parameters on the new surface. | ||
| 647 | * @param gpu_addr The starting address of the new surface. | ||
| 648 | **/ | ||
| 649 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, | ||
| 650 | const SurfaceParams& params, | ||
| 651 | GPUVAddr gpu_addr) { | ||
| 652 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 653 | return std::nullopt; | ||
| 654 | } | ||
| 655 | const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; | ||
| 656 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 657 | 622 | ||
| 658 | if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { | 623 | template <class P> |
| 659 | LoadSurface(new_surface); | 624 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 660 | for (const auto& surface : overlaps) { | 625 | const Tegra::Engines::Fermi2D::Surface& src, |
| 661 | Unregister(surface); | 626 | const Tegra::Engines::Fermi2D::Config& copy) { |
| 662 | } | 627 | const BlitImages images = GetBlitImages(dst, src); |
| 663 | Register(new_surface); | 628 | const ImageId dst_id = images.dst_id; |
| 664 | return {{new_surface, new_surface->GetMainView()}}; | 629 | const ImageId src_id = images.src_id; |
| 665 | } | 630 | PrepareImage(src_id, false, false); |
| 631 | PrepareImage(dst_id, true, false); | ||
| 632 | |||
| 633 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 634 | const ImageBase& src_image = slot_images[src_id]; | ||
| 635 | |||
| 636 | // TODO: Deduplicate | ||
| 637 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | ||
| 638 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | ||
| 639 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | ||
| 640 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 641 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 642 | const std::array src_region{ | ||
| 643 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 644 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 645 | }; | ||
| 666 | 646 | ||
| 667 | std::size_t passed_tests = 0; | 647 | const std::optional src_base = src_image.TryFindBase(src.Address()); |
| 668 | for (auto& surface : overlaps) { | 648 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; |
| 669 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 649 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); |
| 670 | const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | 650 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); |
| 671 | if (!mipmap_layer) { | 651 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); |
| 672 | continue; | 652 | const std::array dst_region{ |
| 673 | } | 653 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, |
| 674 | const auto [base_layer, base_mipmap] = *mipmap_layer; | 654 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, |
| 675 | if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { | 655 | }; |
| 676 | continue; | ||
| 677 | } | ||
| 678 | ++passed_tests; | ||
| 679 | |||
| 680 | // Copy all mipmaps and layers | ||
| 681 | const u32 block_width = params.GetDefaultBlockWidth(); | ||
| 682 | const u32 block_height = params.GetDefaultBlockHeight(); | ||
| 683 | for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { | ||
| 684 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | ||
| 685 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 686 | if (width < block_width || height < block_height) { | ||
| 687 | // Current APIs forbid copying small compressed textures, avoid errors | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, | ||
| 691 | src_params.depth); | ||
| 692 | TryCopyImage(surface, new_surface, copy_params); | ||
| 693 | } | ||
| 694 | } | ||
| 695 | if (passed_tests == 0) { | ||
| 696 | return std::nullopt; | ||
| 697 | } | ||
| 698 | if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | ||
| 699 | // In Accurate GPU all tests should pass, else we recycle | ||
| 700 | return std::nullopt; | ||
| 701 | } | ||
| 702 | 656 | ||
| 703 | const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); | 657 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. |
| 704 | for (const auto& surface : overlaps) { | 658 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; |
| 705 | Unregister(surface); | 659 | if constexpr (FRAMEBUFFER_BLITS) { |
| 706 | } | 660 | // OpenGL blits from framebuffers, not images |
| 661 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 662 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 663 | copy.filter, copy.operation); | ||
| 664 | } else { | ||
| 665 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 666 | // Provide a framebuffer in case it's necessary | ||
| 667 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 668 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 669 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 670 | copy.operation); | ||
| 671 | } | ||
| 672 | } | ||
| 707 | 673 | ||
| 708 | new_surface->MarkAsModified(modified, Tick()); | 674 | template <class P> |
| 709 | Register(new_surface); | 675 | void TextureCache<P>::InvalidateColorBuffer(size_t index) { |
| 710 | return {{new_surface, new_surface->GetMainView()}}; | 676 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 711 | } | 677 | color_buffer_id = FindColorBuffer(index, false); |
| 712 | 678 | if (!color_buffer_id) { | |
| 713 | /** | 679 | LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); |
| 714 | * Takes care of managing 3D textures and its slices. Does HLE methods for reconstructing the 3D | 680 | return; |
| 715 | * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of | 681 | } |
| 716 | * the HLE methods. | 682 | // When invalidating a color buffer, the old contents are no longer relevant |
| 717 | * | 683 | ImageView& color_buffer = slot_image_views[color_buffer_id]; |
| 718 | * @param overlaps The overlapping surfaces registered in the cache. | 684 | Image& image = slot_images[color_buffer.image_id]; |
| 719 | * @param params The parameters on the new surface. | 685 | image.flags &= ~ImageFlagBits::CpuModified; |
| 720 | * @param gpu_addr The starting address of the new surface. | 686 | image.flags &= ~ImageFlagBits::GpuModified; |
| 721 | * @param cpu_addr The starting address of the new surface on physical memory. | ||
| 722 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 723 | * left blank. | ||
| 724 | */ | ||
| 725 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, | ||
| 726 | const SurfaceParams& params, | ||
| 727 | GPUVAddr gpu_addr, VAddr cpu_addr, | ||
| 728 | bool preserve_contents) { | ||
| 729 | if (params.target != SurfaceTarget::Texture3D) { | ||
| 730 | for (const auto& surface : overlaps) { | ||
| 731 | if (!surface->MatchTarget(params.target)) { | ||
| 732 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | ||
| 733 | if (Settings::IsGPULevelExtreme()) { | ||
| 734 | return std::nullopt; | ||
| 735 | } | ||
| 736 | Unregister(surface); | ||
| 737 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 738 | } | ||
| 739 | return std::nullopt; | ||
| 740 | } | ||
| 741 | if (surface->GetCpuAddr() != cpu_addr) { | ||
| 742 | continue; | ||
| 743 | } | ||
| 744 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | ||
| 745 | return std::make_pair(surface, surface->GetMainView()); | ||
| 746 | } | ||
| 747 | } | ||
| 748 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 749 | } | ||
| 750 | 687 | ||
| 751 | if (params.num_levels > 1) { | 688 | runtime.InvalidateColorBuffer(color_buffer, index); |
| 752 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | 689 | } |
| 753 | return std::nullopt; | ||
| 754 | } | ||
| 755 | 690 | ||
| 756 | if (overlaps.size() == 1) { | 691 | template <class P> |
| 757 | const auto& surface = overlaps[0]; | 692 | void TextureCache<P>::InvalidateDepthBuffer() { |
| 758 | const SurfaceParams& overlap_params = surface->GetSurfaceParams(); | 693 | ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; |
| 759 | // Don't attempt to render to textures with more than one level for now | 694 | depth_buffer_id = FindDepthBuffer(false); |
| 760 | // The texture has to be to the right or the sample address if we want to render to it | 695 | if (!depth_buffer_id) { |
| 761 | if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { | 696 | LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); |
| 762 | const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); | 697 | return; |
| 763 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | 698 | } |
| 764 | if (slice < overlap_params.depth) { | 699 | // When invalidating the depth buffer, the old contents are no longer relevant |
| 765 | auto view = surface->Emplace3DView(slice, params.depth, 0, 1); | 700 | ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; |
| 766 | return std::make_pair(std::move(surface), std::move(view)); | 701 | image.flags &= ~ImageFlagBits::CpuModified; |
| 767 | } | 702 | image.flags &= ~ImageFlagBits::GpuModified; |
| 768 | } | ||
| 769 | } | ||
| 770 | 703 | ||
| 771 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | 704 | ImageView& depth_buffer = slot_image_views[depth_buffer_id]; |
| 772 | bool modified = false; | 705 | runtime.InvalidateDepthBuffer(depth_buffer); |
| 706 | } | ||
| 773 | 707 | ||
| 774 | for (auto& surface : overlaps) { | 708 | template <class P> |
| 775 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 709 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { |
| 776 | if (src_params.target != SurfaceTarget::Texture2D || | 710 | // TODO: Properly implement this |
| 777 | src_params.height != params.height || | 711 | const auto it = page_table.find(cpu_addr >> PAGE_BITS); |
| 778 | src_params.block_depth != params.block_depth || | 712 | if (it == page_table.end()) { |
| 779 | src_params.block_height != params.block_height) { | 713 | return nullptr; |
| 780 | return std::nullopt; | 714 | } |
| 781 | } | 715 | const auto& image_ids = it->second; |
| 782 | modified |= surface->IsModified(); | 716 | for (const ImageId image_id : image_ids) { |
| 783 | 717 | const ImageBase& image = slot_images[image_id]; | |
| 784 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | 718 | if (image.cpu_addr != cpu_addr) { |
| 785 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | 719 | continue; |
| 786 | const u32 width = params.width; | ||
| 787 | const u32 height = params.height; | ||
| 788 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | ||
| 789 | TryCopyImage(surface, new_surface, copy_params); | ||
| 790 | } | 720 | } |
| 791 | for (const auto& surface : overlaps) { | 721 | if (image.image_view_ids.empty()) { |
| 792 | Unregister(surface); | 722 | continue; |
| 793 | } | 723 | } |
| 794 | new_surface->MarkAsModified(modified, Tick()); | 724 | return &slot_image_views[image.image_view_ids.at(0)]; |
| 795 | Register(new_surface); | 725 | } |
| 796 | 726 | return nullptr; | |
| 797 | TView view = new_surface->GetMainView(); | 727 | } |
| 798 | return std::make_pair(std::move(new_surface), std::move(view)); | ||
| 799 | } | ||
| 800 | |||
| 801 | /** | ||
| 802 | * Gets the starting address and parameters of a candidate surface and tries | ||
| 803 | * to find a matching surface within the cache. This is done in 3 big steps: | ||
| 804 | * | ||
| 805 | * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. | ||
| 806 | * | ||
| 807 | * 2. Check if there are any overlaps at all, if there are none, we just load the texture from | ||
| 808 | * memory else we move to step 3. | ||
| 809 | * | ||
| 810 | * 3. Consists of figuring out the relationship between the candidate texture and the | ||
| 811 | * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If | ||
| 812 | * there's many, we just try to reconstruct a new surface out of them based on the | ||
| 813 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we | ||
| 814 | * have to check if the candidate is a view (layer/mipmap) of the overlap or if the | ||
| 815 | * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct | ||
| 816 | * a new surface. | ||
| 817 | * | ||
| 818 | * @param gpu_addr The starting address of the candidate surface. | ||
| 819 | * @param params The parameters on the candidate surface. | ||
| 820 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 821 | * left blank. | ||
| 822 | * @param is_render Whether or not the surface is a render target. | ||
| 823 | **/ | ||
| 824 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||
| 825 | const SurfaceParams& params, bool preserve_contents, | ||
| 826 | bool is_render) { | ||
| 827 | // Step 1 | ||
| 828 | // Check Level 1 Cache for a fast structural match. If candidate surface | ||
| 829 | // matches at certain level we are pretty much done. | ||
| 830 | if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { | ||
| 831 | TSurface& current_surface = iter->second; | ||
| 832 | const auto topological_result = current_surface->MatchesTopology(params); | ||
| 833 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 834 | VectorSurface overlaps{current_surface}; | ||
| 835 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 836 | topological_result); | ||
| 837 | } | ||
| 838 | 728 | ||
| 839 | const auto struct_result = current_surface->MatchesStructure(params); | 729 | template <class P> |
| 840 | if (struct_result != MatchStructureResult::None) { | 730 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { |
| 841 | const auto& old_params = current_surface->GetSurfaceParams(); | 731 | return !uncommitted_downloads.empty(); |
| 842 | const bool not_3d = params.target != SurfaceTarget::Texture3D && | 732 | } |
| 843 | old_params.target != SurfaceTarget::Texture3D; | ||
| 844 | if (not_3d || current_surface->MatchTarget(params.target)) { | ||
| 845 | if (struct_result == MatchStructureResult::FullMatch) { | ||
| 846 | return ManageStructuralMatch(current_surface, params, is_render); | ||
| 847 | } else { | ||
| 848 | return RebuildSurface(current_surface, params, is_render); | ||
| 849 | } | ||
| 850 | } | ||
| 851 | } | ||
| 852 | } | ||
| 853 | 733 | ||
| 854 | // Step 2 | 734 | template <class P> |
| 855 | // Obtain all possible overlaps in the memory region | 735 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 856 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 736 | return !committed_downloads.empty() && !committed_downloads.front().empty(); |
| 857 | auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; | 737 | } |
| 858 | 738 | ||
| 859 | // If none are found, we are done. we just load the surface and create it. | 739 | template <class P> |
| 860 | if (overlaps.empty()) { | 740 | void TextureCache<P>::CommitAsyncFlushes() { |
| 861 | return InitializeSurface(gpu_addr, params, preserve_contents); | 741 | // This is intentionally passing the value by copy |
| 862 | } | 742 | committed_downloads.push(uncommitted_downloads); |
| 743 | uncommitted_downloads.clear(); | ||
| 744 | } | ||
| 863 | 745 | ||
| 864 | // Step 3 | 746 | template <class P> |
| 865 | // Now we need to figure the relationship between the texture and its overlaps | 747 | void TextureCache<P>::PopAsyncFlushes() { |
| 866 | // we do a topological test to ensure we can find some relationship. If it fails | 748 | if (committed_downloads.empty()) { |
| 867 | // immediately recycle the texture | 749 | return; |
| 868 | for (const auto& surface : overlaps) { | 750 | } |
| 869 | const auto topological_result = surface->MatchesTopology(params); | 751 | const std::span<const ImageId> download_ids = committed_downloads.front(); |
| 870 | if (topological_result != MatchTopologyResult::FullMatch) { | 752 | if (download_ids.empty()) { |
| 871 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 753 | committed_downloads.pop(); |
| 872 | topological_result); | 754 | return; |
| 873 | } | 755 | } |
| 874 | } | 756 | size_t total_size_bytes = 0; |
| 757 | for (const ImageId image_id : download_ids) { | ||
| 758 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 759 | } | ||
| 760 | auto download_map = runtime.MapDownloadBuffer(total_size_bytes); | ||
| 761 | size_t buffer_offset = 0; | ||
| 762 | for (const ImageId image_id : download_ids) { | ||
| 763 | Image& image = slot_images[image_id]; | ||
| 764 | const auto copies = FullDownloadCopies(image.info); | ||
| 765 | image.DownloadMemory(download_map, buffer_offset, copies); | ||
| 766 | buffer_offset += image.unswizzled_size_bytes; | ||
| 767 | } | ||
| 768 | // Wait for downloads to finish | ||
| 769 | runtime.Finish(); | ||
| 770 | |||
| 771 | buffer_offset = 0; | ||
| 772 | const std::span<u8> download_span = download_map.Span(); | ||
| 773 | for (const ImageId image_id : download_ids) { | ||
| 774 | const ImageBase& image = slot_images[image_id]; | ||
| 775 | const auto copies = FullDownloadCopies(image.info); | ||
| 776 | const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | ||
| 777 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | ||
| 778 | buffer_offset += image.unswizzled_size_bytes; | ||
| 779 | } | ||
| 780 | committed_downloads.pop(); | ||
| 781 | } | ||
| 875 | 782 | ||
| 876 | // Manage 3D textures | 783 | template <class P> |
| 877 | if (params.block_depth > 0) { | 784 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 878 | auto surface = | 785 | bool is_modified = false; |
| 879 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | 786 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { |
| 880 | if (surface) { | 787 | if (False(image.flags & ImageFlagBits::GpuModified)) { |
| 881 | return *surface; | 788 | return false; |
| 882 | } | ||
| 883 | } | 789 | } |
| 790 | is_modified = true; | ||
| 791 | return true; | ||
| 792 | }); | ||
| 793 | return is_modified; | ||
| 794 | } | ||
| 884 | 795 | ||
| 885 | // Split cases between 1 overlap or many. | 796 | template <class P> |
| 886 | if (overlaps.size() == 1) { | 797 | void TextureCache<P>::RefreshContents(Image& image) { |
| 887 | TSurface current_surface = overlaps[0]; | 798 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 888 | // First check if the surface is within the overlap. If not, it means | 799 | // Only upload modified images |
| 889 | // two things either the candidate surface is a supertexture of the overlap | 800 | return; |
| 890 | // or they don't match in any known way. | 801 | } |
| 891 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | 802 | image.flags &= ~ImageFlagBits::CpuModified; |
| 892 | const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); | 803 | TrackImage(image); |
| 893 | if (view) { | ||
| 894 | return *view; | ||
| 895 | } | ||
| 896 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 897 | MatchTopologyResult::FullMatch); | ||
| 898 | } | ||
| 899 | // Now we check if the candidate is a mipmap/layer of the overlap | ||
| 900 | std::optional<TView> view = | ||
| 901 | current_surface->EmplaceView(params, gpu_addr, candidate_size); | ||
| 902 | if (view) { | ||
| 903 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 904 | if (is_mirage) { | ||
| 905 | // On a mirage view, we need to recreate the surface under this new view | ||
| 906 | // and then obtain a view again. | ||
| 907 | SurfaceParams new_params = current_surface->GetSurfaceParams(); | ||
| 908 | const u32 wh = SurfaceParams::ConvertWidth( | ||
| 909 | new_params.width, new_params.pixel_format, params.pixel_format); | ||
| 910 | const u32 hh = SurfaceParams::ConvertHeight( | ||
| 911 | new_params.height, new_params.pixel_format, params.pixel_format); | ||
| 912 | new_params.width = wh; | ||
| 913 | new_params.height = hh; | ||
| 914 | new_params.pixel_format = params.pixel_format; | ||
| 915 | std::pair<TSurface, TView> pair = | ||
| 916 | RebuildSurface(current_surface, new_params, is_render); | ||
| 917 | std::optional<TView> mirage_view = | ||
| 918 | pair.first->EmplaceView(params, gpu_addr, candidate_size); | ||
| 919 | if (mirage_view) | ||
| 920 | return {pair.first, *mirage_view}; | ||
| 921 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 922 | MatchTopologyResult::FullMatch); | ||
| 923 | } | ||
| 924 | return {current_surface, *view}; | ||
| 925 | } | ||
| 926 | } else { | ||
| 927 | // If there are many overlaps, odds are they are subtextures of the candidate | ||
| 928 | // surface. We try to construct a new surface based on the candidate parameters, | ||
| 929 | // using the overlaps. If a single overlap fails, this will fail. | ||
| 930 | std::optional<std::pair<TSurface, TView>> view = | ||
| 931 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 932 | if (view) { | ||
| 933 | return *view; | ||
| 934 | } | ||
| 935 | } | ||
| 936 | // We failed all the tests, recycle the overlaps into a new texture. | ||
| 937 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 938 | MatchTopologyResult::FullMatch); | ||
| 939 | } | ||
| 940 | |||
| 941 | /** | ||
| 942 | * Gets the starting address and parameters of a candidate surface and tries to find a | ||
| 943 | * matching surface within the cache that's similar to it. If there are many textures | ||
| 944 | * or the texture found if entirely incompatible, it will fail. If no texture is found, the | ||
| 945 | * blit will be unsuccessful. | ||
| 946 | * | ||
| 947 | * @param gpu_addr The starting address of the candidate surface. | ||
| 948 | * @param params The parameters on the candidate surface. | ||
| 949 | **/ | ||
| 950 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 951 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 952 | |||
| 953 | if (!cpu_addr) { | ||
| 954 | Deduction result{}; | ||
| 955 | result.type = DeductionType::DeductionFailed; | ||
| 956 | return result; | ||
| 957 | } | ||
| 958 | 804 | ||
| 959 | if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { | 805 | if (image.info.num_samples > 1) { |
| 960 | TSurface& current_surface = iter->second; | 806 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 961 | const auto topological_result = current_surface->MatchesTopology(params); | 807 | return; |
| 962 | if (topological_result != MatchTopologyResult::FullMatch) { | 808 | } |
| 963 | Deduction result{}; | 809 | auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); |
| 964 | result.type = DeductionType::DeductionFailed; | 810 | UploadImageContents(image, map, 0); |
| 965 | return result; | 811 | runtime.InsertUploadMemoryBarrier(); |
| 966 | } | 812 | } |
| 967 | const auto struct_result = current_surface->MatchesStructure(params); | ||
| 968 | if (struct_result != MatchStructureResult::None && | ||
| 969 | current_surface->MatchTarget(params.target)) { | ||
| 970 | Deduction result{}; | ||
| 971 | result.type = DeductionType::DeductionComplete; | ||
| 972 | result.surface = current_surface; | ||
| 973 | return result; | ||
| 974 | } | ||
| 975 | } | ||
| 976 | 813 | ||
| 977 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 814 | template <class P> |
| 978 | auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; | 815 | template <typename MapBuffer> |
| 816 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | ||
| 817 | const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); | ||
| 818 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 819 | |||
| 820 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 821 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 822 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 823 | runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | ||
| 824 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 825 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 826 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 827 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 828 | image.UploadMemory(map, buffer_offset, copies); | ||
| 829 | } else if (image.info.type == ImageType::Buffer) { | ||
| 830 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | ||
| 831 | image.UploadMemory(map, buffer_offset, copies); | ||
| 832 | } else { | ||
| 833 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 834 | image.UploadMemory(map, buffer_offset, copies); | ||
| 835 | } | ||
| 836 | } | ||
| 979 | 837 | ||
| 980 | if (overlaps.empty()) { | 838 | template <class P> |
| 981 | Deduction result{}; | 839 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
| 982 | result.type = DeductionType::DeductionIncomplete; | 840 | if (!IsValidAddress(gpu_memory, config)) { |
| 983 | return result; | 841 | return NULL_IMAGE_VIEW_ID; |
| 984 | } | 842 | } |
| 843 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 844 | ImageViewId& image_view_id = pair->second; | ||
| 845 | if (is_new) { | ||
| 846 | image_view_id = CreateImageView(config); | ||
| 847 | } | ||
| 848 | return image_view_id; | ||
| 849 | } | ||
| 985 | 850 | ||
| 986 | if (overlaps.size() > 1) { | 851 | template <class P> |
| 987 | Deduction result{}; | 852 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { |
| 988 | result.type = DeductionType::DeductionFailed; | 853 | const ImageInfo info(config); |
| 989 | return result; | 854 | const GPUVAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; |
| 990 | } else { | 855 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); |
| 991 | Deduction result{}; | 856 | if (!image_id) { |
| 992 | result.type = DeductionType::DeductionComplete; | 857 | return NULL_IMAGE_VIEW_ID; |
| 993 | result.surface = overlaps[0]; | ||
| 994 | return result; | ||
| 995 | } | ||
| 996 | } | 858 | } |
| 859 | ImageBase& image = slot_images[image_id]; | ||
| 860 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 861 | ASSERT(base.level == 0); | ||
| 862 | const ImageViewInfo view_info(config, base.layer); | ||
| 863 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 864 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 865 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 866 | image.flags |= ImageFlagBits::Strong; | ||
| 867 | return image_view_id; | ||
| 868 | } | ||
| 997 | 869 | ||
| 998 | /** | 870 | template <class P> |
| 999 | * Gets a null surface based on a target texture. | 871 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1000 | * @param target The target of the null surface. | 872 | RelaxedOptions options) { |
| 1001 | */ | 873 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { |
| 1002 | TView GetNullSurface(SurfaceTarget target) { | 874 | return image_id; |
| 1003 | const u32 i_target = static_cast<u32>(target); | 875 | } |
| 1004 | if (const auto it = invalid_cache.find(i_target); it != invalid_cache.end()) { | 876 | return InsertImage(info, gpu_addr, options); |
| 1005 | return it->second->GetMainView(); | 877 | } |
| 1006 | } | 878 | |
| 1007 | SurfaceParams params{}; | 879 | template <class P> |
| 1008 | params.target = target; | 880 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1009 | params.is_tiled = false; | 881 | RelaxedOptions options) { |
| 1010 | params.srgb_conversion = false; | 882 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1011 | params.is_layered = | 883 | if (!cpu_addr) { |
| 1012 | target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || | 884 | return ImageId{}; |
| 1013 | target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; | 885 | } |
| 1014 | params.block_width = 0; | 886 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| 1015 | params.block_height = 0; | 887 | ImageId image_id; |
| 1016 | params.block_depth = 0; | 888 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1017 | params.tile_width_spacing = 1; | 889 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { |
| 1018 | params.width = 1; | 890 | const bool strict_size = False(options & RelaxedOptions::Size) && |
| 1019 | params.height = 1; | 891 | True(existing_image.flags & ImageFlagBits::Strong); |
| 1020 | params.depth = 1; | 892 | const ImageInfo& existing = existing_image.info; |
| 1021 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | 893 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && |
| 1022 | params.depth = 6; | 894 | existing.pitch == info.pitch && |
| 1023 | } | 895 | IsPitchLinearSameSize(existing, info, strict_size) && |
| 1024 | params.pitch = 4; | 896 | IsViewCompatible(existing.format, info.format, broken_views)) { |
| 1025 | params.num_levels = 1; | 897 | image_id = existing_image_id; |
| 1026 | params.emulated_levels = 1; | 898 | return true; |
| 1027 | params.pixel_format = VideoCore::Surface::PixelFormat::R8_UNORM; | 899 | } |
| 1028 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 900 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) { |
| 1029 | auto surface = CreateSurface(0ULL, params); | 901 | image_id = existing_image_id; |
| 1030 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | 902 | return true; |
| 1031 | surface->UploadTexture(invalid_memory); | ||
| 1032 | surface->MarkAsModified(false, Tick()); | ||
| 1033 | invalid_cache.emplace(i_target, surface); | ||
| 1034 | return surface->GetMainView(); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | /** | ||
| 1038 | * Gets the a source and destination starting address and parameters, | ||
| 1039 | * and tries to deduce if they are supposed to be depth textures. If so, their | ||
| 1040 | * parameters are modified and fixed into so. | ||
| 1041 | * | ||
| 1042 | * @param src_params The parameters of the candidate surface. | ||
| 1043 | * @param dst_params The parameters of the destination surface. | ||
| 1044 | * @param src_gpu_addr The starting address of the candidate surface. | ||
| 1045 | * @param dst_gpu_addr The starting address of the destination surface. | ||
| 1046 | **/ | ||
| 1047 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, | ||
| 1048 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { | ||
| 1049 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); | ||
| 1050 | auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); | ||
| 1051 | if (deduced_src.Failed() || deduced_dst.Failed()) { | ||
| 1052 | return; | ||
| 1053 | } | 903 | } |
| 904 | return false; | ||
| 905 | }; | ||
| 906 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 907 | return image_id; | ||
| 908 | } | ||
| 1054 | 909 | ||
| 1055 | const bool incomplete_src = deduced_src.Incomplete(); | 910 | template <class P> |
| 1056 | const bool incomplete_dst = deduced_dst.Incomplete(); | 911 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 912 | RelaxedOptions options) { | ||
| 913 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 914 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 915 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 916 | const Image& image = slot_images[image_id]; | ||
| 917 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 918 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 919 | if (is_new) { | ||
| 920 | it->second = slot_image_allocs.insert(); | ||
| 921 | } | ||
| 922 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 923 | return image_id; | ||
| 924 | } | ||
| 1057 | 925 | ||
| 1058 | if (incomplete_src && incomplete_dst) { | 926 | template <class P> |
| 927 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||
| 928 | ImageInfo new_info = info; | ||
| 929 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 930 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 931 | std::vector<ImageId> overlap_ids; | ||
| 932 | std::vector<ImageId> left_aliased_ids; | ||
| 933 | std::vector<ImageId> right_aliased_ids; | ||
| 934 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 935 | if (info.type != overlap.info.type) { | ||
| 1059 | return; | 936 | return; |
| 1060 | } | 937 | } |
| 1061 | 938 | if (info.type == ImageType::Linear) { | |
| 1062 | const bool any_incomplete = incomplete_src || incomplete_dst; | 939 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { |
| 1063 | 940 | // Alias linear images with the same pitch | |
| 1064 | if (!any_incomplete) { | 941 | left_aliased_ids.push_back(overlap_id); |
| 1065 | if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { | ||
| 1066 | return; | ||
| 1067 | } | ||
| 1068 | } else { | ||
| 1069 | if (incomplete_src && !(deduced_dst.IsDepth())) { | ||
| 1070 | return; | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | if (incomplete_dst && !(deduced_src.IsDepth())) { | ||
| 1074 | return; | ||
| 1075 | } | 942 | } |
| 943 | return; | ||
| 1076 | } | 944 | } |
| 1077 | 945 | static constexpr bool strict_size = true; | |
| 1078 | const auto inherit_format = [](SurfaceParams& to, TSurface from) { | 946 | const std::optional<OverlapResult> solution = |
| 1079 | const SurfaceParams& params = from->GetSurfaceParams(); | 947 | ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views); |
| 1080 | to.pixel_format = params.pixel_format; | 948 | if (solution) { |
| 1081 | to.type = params.type; | 949 | gpu_addr = solution->gpu_addr; |
| 1082 | }; | 950 | cpu_addr = solution->cpu_addr; |
| 1083 | // Now we got the cases where one or both is Depth and the other is not known | 951 | new_info.resources = solution->resources; |
| 1084 | if (!incomplete_src) { | 952 | overlap_ids.push_back(overlap_id); |
| 1085 | inherit_format(src_params, deduced_src.surface); | 953 | return; |
| 1086 | } else { | ||
| 1087 | inherit_format(src_params, deduced_dst.surface); | ||
| 1088 | } | 954 | } |
| 1089 | if (!incomplete_dst) { | 955 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; |
| 1090 | inherit_format(dst_params, deduced_dst.surface); | 956 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); |
| 957 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) { | ||
| 958 | left_aliased_ids.push_back(overlap_id); | ||
| 959 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||
| 960 | broken_views)) { | ||
| 961 | right_aliased_ids.push_back(overlap_id); | ||
| 962 | } | ||
| 963 | }); | ||
| 964 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 965 | Image& new_image = slot_images[new_image_id]; | ||
| 966 | |||
| 967 | // TODO: Only upload what we need | ||
| 968 | RefreshContents(new_image); | ||
| 969 | |||
| 970 | for (const ImageId overlap_id : overlap_ids) { | ||
| 971 | Image& overlap = slot_images[overlap_id]; | ||
| 972 | if (overlap.info.num_samples != new_image.info.num_samples) { | ||
| 973 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||
| 1091 | } else { | 974 | } else { |
| 1092 | inherit_format(dst_params, deduced_src.surface); | 975 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); |
| 976 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 977 | runtime.CopyImage(new_image, overlap, copies); | ||
| 1093 | } | 978 | } |
| 979 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 980 | UntrackImage(overlap); | ||
| 981 | } | ||
| 982 | UnregisterImage(overlap_id); | ||
| 983 | DeleteImage(overlap_id); | ||
| 984 | } | ||
| 985 | ImageBase& new_image_base = new_image; | ||
| 986 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 987 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 988 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 989 | } | ||
| 990 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 991 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 992 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 1094 | } | 993 | } |
| 994 | RegisterImage(new_image_id); | ||
| 995 | return new_image_id; | ||
| 996 | } | ||
| 1095 | 997 | ||
| 1096 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | 998 | template <class P> |
| 1097 | bool preserve_contents) { | 999 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( |
| 1098 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; | 1000 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { |
| 1099 | Register(new_surface); | 1001 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; |
| 1100 | if (preserve_contents) { | 1002 | const GPUVAddr dst_addr = dst.Address(); |
| 1101 | LoadSurface(new_surface); | 1003 | const GPUVAddr src_addr = src.Address(); |
| 1102 | } | 1004 | ImageInfo dst_info(dst); |
| 1103 | return {new_surface, new_surface->GetMainView()}; | 1005 | ImageInfo src_info(src); |
| 1006 | ImageId dst_id; | ||
| 1007 | ImageId src_id; | ||
| 1008 | do { | ||
| 1009 | has_deleted_images = false; | ||
| 1010 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 1011 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 1012 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 1013 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 1014 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 1015 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 1016 | continue; | ||
| 1017 | } | ||
| 1018 | if (!dst_id) { | ||
| 1019 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 1020 | } | ||
| 1021 | if (!src_id) { | ||
| 1022 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 1023 | } | ||
| 1024 | } while (has_deleted_images); | ||
| 1025 | return BlitImages{ | ||
| 1026 | .dst_id = dst_id, | ||
| 1027 | .src_id = src_id, | ||
| 1028 | .dst_format = dst_info.format, | ||
| 1029 | .src_format = src_info.format, | ||
| 1030 | }; | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | template <class P> | ||
| 1034 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||
| 1035 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 1036 | return NULL_SAMPLER_ID; | ||
| 1037 | } | ||
| 1038 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 1039 | if (is_new) { | ||
| 1040 | pair->second = slot_samplers.insert(runtime, config); | ||
| 1104 | } | 1041 | } |
| 1042 | return pair->second; | ||
| 1043 | } | ||
| 1105 | 1044 | ||
| 1106 | void LoadSurface(const TSurface& surface) { | 1045 | template <class P> |
| 1107 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | 1046 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
| 1108 | surface->LoadBuffer(gpu_memory, staging_cache); | 1047 | const auto& regs = maxwell3d.regs; |
| 1109 | surface->UploadTexture(staging_cache.GetBuffer(0)); | 1048 | if (index >= regs.rt_control.count) { |
| 1110 | surface->MarkAsModified(false, Tick()); | 1049 | return ImageViewId{}; |
| 1050 | } | ||
| 1051 | const auto& rt = regs.rt[index]; | ||
| 1052 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 1053 | if (gpu_addr == 0) { | ||
| 1054 | return ImageViewId{}; | ||
| 1055 | } | ||
| 1056 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 1057 | return ImageViewId{}; | ||
| 1111 | } | 1058 | } |
| 1059 | const ImageInfo info(regs, index); | ||
| 1060 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1061 | } | ||
| 1112 | 1062 | ||
| 1113 | void FlushSurface(const TSurface& surface) { | 1063 | template <class P> |
| 1114 | if (!surface->IsModified()) { | 1064 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { |
| 1115 | return; | 1065 | const auto& regs = maxwell3d.regs; |
| 1116 | } | 1066 | if (!regs.zeta_enable) { |
| 1117 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | 1067 | return ImageViewId{}; |
| 1118 | surface->DownloadTexture(staging_cache.GetBuffer(0)); | 1068 | } |
| 1119 | surface->FlushBuffer(gpu_memory, staging_cache); | 1069 | const GPUVAddr gpu_addr = regs.zeta.Address(); |
| 1120 | surface->MarkAsModified(false, Tick()); | 1070 | if (gpu_addr == 0) { |
| 1121 | } | 1071 | return ImageViewId{}; |
| 1122 | |||
| 1123 | void RegisterInnerCache(TSurface& surface) { | ||
| 1124 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 1125 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1126 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | ||
| 1127 | l1_cache[cpu_addr] = surface; | ||
| 1128 | while (start <= end) { | ||
| 1129 | registry[start].push_back(surface); | ||
| 1130 | start++; | ||
| 1131 | } | ||
| 1132 | } | 1072 | } |
| 1073 | const ImageInfo info(regs); | ||
| 1074 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1075 | } | ||
| 1133 | 1076 | ||
| 1134 | void UnregisterInnerCache(TSurface& surface) { | 1077 | template <class P> |
| 1135 | const VAddr cpu_addr = surface->GetCpuAddr(); | 1078 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1136 | VAddr start = cpu_addr >> registry_page_bits; | 1079 | bool is_clear) { |
| 1137 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; | 1080 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; |
| 1138 | l1_cache.erase(cpu_addr); | 1081 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); |
| 1139 | while (start <= end) { | 1082 | if (!image_id) { |
| 1140 | auto& reg{registry[start]}; | 1083 | return NULL_IMAGE_VIEW_ID; |
| 1141 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | 1084 | } |
| 1142 | start++; | 1085 | Image& image = slot_images[image_id]; |
| 1143 | } | 1086 | const ImageViewType view_type = RenderTargetImageViewType(info); |
| 1087 | SubresourceBase base; | ||
| 1088 | if (image.info.type == ImageType::Linear) { | ||
| 1089 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 1090 | } else { | ||
| 1091 | base = image.TryFindBase(gpu_addr).value(); | ||
| 1144 | } | 1092 | } |
| 1093 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 1094 | const SubresourceRange range{ | ||
| 1095 | .base = base, | ||
| 1096 | .extent = {.levels = 1, .layers = layers}, | ||
| 1097 | }; | ||
| 1098 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 1099 | } | ||
| 1145 | 1100 | ||
| 1146 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1101 | template <class P> |
| 1147 | if (size == 0) { | 1102 | template <typename Func> |
| 1148 | return {}; | 1103 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { |
| 1104 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1105 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1106 | boost::container::small_vector<ImageId, 32> images; | ||
| 1107 | ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { | ||
| 1108 | const auto it = page_table.find(page); | ||
| 1109 | if (it == page_table.end()) { | ||
| 1110 | if constexpr (BOOL_BREAK) { | ||
| 1111 | return false; | ||
| 1112 | } else { | ||
| 1113 | return; | ||
| 1114 | } | ||
| 1149 | } | 1115 | } |
| 1150 | const VAddr cpu_addr_end = cpu_addr + size; | 1116 | for (const ImageId image_id : it->second) { |
| 1151 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1117 | Image& image = slot_images[image_id]; |
| 1152 | VectorSurface surfaces; | 1118 | if (True(image.flags & ImageFlagBits::Picked)) { |
| 1153 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { | ||
| 1154 | const auto it = registry.find(start); | ||
| 1155 | if (it == registry.end()) { | ||
| 1156 | continue; | 1119 | continue; |
| 1157 | } | 1120 | } |
| 1158 | for (auto& surface : it->second) { | 1121 | if (!image.Overlaps(cpu_addr, size)) { |
| 1159 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1122 | continue; |
| 1160 | continue; | 1123 | } |
| 1124 | image.flags |= ImageFlagBits::Picked; | ||
| 1125 | images.push_back(image_id); | ||
| 1126 | if constexpr (BOOL_BREAK) { | ||
| 1127 | if (func(image_id, image)) { | ||
| 1128 | return true; | ||
| 1161 | } | 1129 | } |
| 1162 | surface->MarkAsPicked(true); | 1130 | } else { |
| 1163 | surfaces.push_back(surface); | 1131 | func(image_id, image); |
| 1164 | } | 1132 | } |
| 1165 | } | 1133 | } |
| 1166 | for (auto& surface : surfaces) { | 1134 | if constexpr (BOOL_BREAK) { |
| 1167 | surface->MarkAsPicked(false); | 1135 | return false; |
| 1168 | } | 1136 | } |
| 1169 | return surfaces; | 1137 | }); |
| 1138 | for (const ImageId image_id : images) { | ||
| 1139 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1170 | } | 1140 | } |
| 1141 | } | ||
| 1171 | 1142 | ||
| 1172 | void ReserveSurface(const SurfaceParams& params, TSurface surface) { | 1143 | template <class P> |
| 1173 | surface_reserve[params].push_back(std::move(surface)); | 1144 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { |
| 1145 | Image& image = slot_images[image_id]; | ||
| 1146 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1147 | return image_view_id; | ||
| 1174 | } | 1148 | } |
| 1149 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1150 | image.InsertView(info, image_view_id); | ||
| 1151 | return image_view_id; | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | template <class P> | ||
| 1155 | void TextureCache<P>::RegisterImage(ImageId image_id) { | ||
| 1156 | ImageBase& image = slot_images[image_id]; | ||
| 1157 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1158 | "Trying to register an already registered image"); | ||
| 1159 | image.flags |= ImageFlagBits::Registered; | ||
| 1160 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1161 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | ||
| 1162 | } | ||
| 1175 | 1163 | ||
| 1176 | TSurface TryGetReservedSurface(const SurfaceParams& params) { | 1164 | template <class P> |
| 1177 | auto search{surface_reserve.find(params)}; | 1165 | void TextureCache<P>::UnregisterImage(ImageId image_id) { |
| 1178 | if (search == surface_reserve.end()) { | 1166 | Image& image = slot_images[image_id]; |
| 1179 | return {}; | 1167 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), |
| 1168 | "Trying to unregister an already registered image"); | ||
| 1169 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1170 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | ||
| 1171 | const auto page_it = page_table.find(page); | ||
| 1172 | if (page_it == page_table.end()) { | ||
| 1173 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1174 | return; | ||
| 1180 | } | 1175 | } |
| 1181 | for (auto& surface : search->second) { | 1176 | std::vector<ImageId>& image_ids = page_it->second; |
| 1182 | if (!surface->IsRegistered()) { | 1177 | const auto vector_it = std::ranges::find(image_ids, image_id); |
| 1183 | return surface; | 1178 | if (vector_it == image_ids.end()) { |
| 1184 | } | 1179 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); |
| 1180 | return; | ||
| 1185 | } | 1181 | } |
| 1186 | return {}; | 1182 | image_ids.erase(vector_it); |
| 1187 | } | 1183 | }); |
| 1184 | } | ||
| 1188 | 1185 | ||
| 1189 | /// Try to do an image copy logging when formats are incompatible. | 1186 | template <class P> |
| 1190 | void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { | 1187 | void TextureCache<P>::TrackImage(ImageBase& image) { |
| 1191 | const SurfaceParams& src_params = src->GetSurfaceParams(); | 1188 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 1192 | const SurfaceParams& dst_params = dst->GetSurfaceParams(); | 1189 | image.flags |= ImageFlagBits::Tracked; |
| 1193 | if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { | 1190 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); |
| 1194 | LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", | 1191 | } |
| 1195 | static_cast<int>(dst_params.pixel_format), | 1192 | |
| 1196 | static_cast<int>(src_params.pixel_format)); | 1193 | template <class P> |
| 1197 | return; | 1194 | void TextureCache<P>::UntrackImage(ImageBase& image) { |
| 1195 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||
| 1196 | image.flags &= ~ImageFlagBits::Tracked; | ||
| 1197 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | template <class P> | ||
| 1201 | void TextureCache<P>::DeleteImage(ImageId image_id) { | ||
| 1202 | ImageBase& image = slot_images[image_id]; | ||
| 1203 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 1204 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1205 | if (alloc_it == image_allocs_table.end()) { | ||
| 1206 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1207 | gpu_addr); | ||
| 1208 | return; | ||
| 1209 | } | ||
| 1210 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1211 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1212 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1213 | if (alloc_image_it == alloc_images.end()) { | ||
| 1214 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1215 | return; | ||
| 1216 | } | ||
| 1217 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1218 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1219 | |||
| 1220 | // Mark render targets as dirty | ||
| 1221 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1222 | dirty[Dirty::RenderTargets] = true; | ||
| 1223 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1224 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1225 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1226 | } | ||
| 1227 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1228 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1229 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1230 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1231 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1198 | } | 1232 | } |
| 1199 | ImageCopy(src, dst, copy); | ||
| 1200 | } | 1233 | } |
| 1234 | RemoveImageViewReferences(image_view_ids); | ||
| 1235 | RemoveFramebuffers(image_view_ids); | ||
| 1236 | |||
| 1237 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1238 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1239 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1240 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1241 | return other_alias.id == image_id; | ||
| 1242 | }); | ||
| 1243 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1244 | num_removed_aliases); | ||
| 1245 | } | ||
| 1246 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1247 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1248 | slot_image_views.erase(image_view_id); | ||
| 1249 | } | ||
| 1250 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1251 | slot_images.erase(image_id); | ||
| 1201 | 1252 | ||
| 1202 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { | 1253 | alloc_images.erase(alloc_image_it); |
| 1203 | return siblings_table[static_cast<std::size_t>(format)]; | 1254 | if (alloc_images.empty()) { |
| 1255 | image_allocs_table.erase(alloc_it); | ||
| 1204 | } | 1256 | } |
| 1257 | if constexpr (ENABLE_VALIDATION) { | ||
| 1258 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1259 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1260 | } | ||
| 1261 | graphics_image_table.Invalidate(); | ||
| 1262 | compute_image_table.Invalidate(); | ||
| 1263 | has_deleted_images = true; | ||
| 1264 | } | ||
| 1205 | 1265 | ||
| 1206 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | 1266 | template <class P> |
| 1207 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | 1267 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { |
| 1208 | const VideoCommon::Shader::Sampler& entry) { | 1268 | auto it = image_views.begin(); |
| 1209 | const auto shader_type = entry.type; | 1269 | while (it != image_views.end()) { |
| 1210 | switch (tic_type) { | 1270 | const auto found = std::ranges::find(removed_views, it->second); |
| 1211 | case Tegra::Texture::TextureType::Texture1D: | 1271 | if (found != removed_views.end()) { |
| 1212 | case Tegra::Texture::TextureType::Texture1DArray: | 1272 | it = image_views.erase(it); |
| 1213 | return shader_type == Tegra::Shader::TextureType::Texture1D; | 1273 | } else { |
| 1214 | case Tegra::Texture::TextureType::Texture1DBuffer: | 1274 | ++it; |
| 1215 | // TODO(Rodrigo): Assume as valid for now | ||
| 1216 | return true; | ||
| 1217 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1218 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1219 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1220 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1221 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1222 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1223 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1224 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1225 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1226 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1227 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1228 | return true; | ||
| 1229 | } | ||
| 1230 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.is_array; | ||
| 1231 | } | 1275 | } |
| 1232 | UNREACHABLE(); | ||
| 1233 | return true; | ||
| 1234 | } | 1276 | } |
| 1277 | } | ||
| 1235 | 1278 | ||
| 1236 | struct FramebufferTargetInfo { | 1279 | template <class P> |
| 1237 | TSurface target; | 1280 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { |
| 1238 | TView view; | 1281 | auto it = framebuffers.begin(); |
| 1239 | }; | 1282 | while (it != framebuffers.end()) { |
| 1240 | 1283 | if (it->first.Contains(removed_views)) { | |
| 1241 | void AsyncFlushSurface(TSurface& surface) { | 1284 | it = framebuffers.erase(it); |
| 1242 | if (!uncommitted_flushes) { | 1285 | } else { |
| 1243 | uncommitted_flushes = std::make_shared<std::list<TSurface>>(); | 1286 | ++it; |
| 1244 | } | 1287 | } |
| 1245 | uncommitted_flushes->push_back(surface); | ||
| 1246 | } | 1288 | } |
| 1289 | } | ||
| 1247 | 1290 | ||
| 1248 | VideoCore::RasterizerInterface& rasterizer; | 1291 | template <class P> |
| 1249 | Tegra::Engines::Maxwell3D& maxwell3d; | 1292 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { |
| 1250 | Tegra::MemoryManager& gpu_memory; | 1293 | image.flags |= ImageFlagBits::GpuModified; |
| 1251 | 1294 | image.modification_tick = ++modification_tick; | |
| 1252 | FormatLookupTable format_lookup_table; | 1295 | } |
| 1253 | FormatCompatibility format_compatibility; | ||
| 1254 | |||
| 1255 | u64 ticks{}; | ||
| 1256 | |||
| 1257 | // Guards the cache for protection conflicts. | ||
| 1258 | bool guard_render_targets{}; | ||
| 1259 | bool guard_samplers{}; | ||
| 1260 | |||
| 1261 | // The siblings table is for formats that can inter exchange with one another | ||
| 1262 | // without causing issues. This is only valid when a conflict occurs on a non | ||
| 1263 | // rendering use. | ||
| 1264 | std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; | ||
| 1265 | |||
| 1266 | // The internal Cache is different for the Texture Cache. It's based on buckets | ||
| 1267 | // of 1MB. This fits better for the purpose of this cache as textures are normaly | ||
| 1268 | // large in size. | ||
| 1269 | static constexpr u64 registry_page_bits{20}; | ||
| 1270 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||
| 1271 | std::unordered_map<VAddr, std::vector<TSurface>> registry; | ||
| 1272 | 1296 | ||
| 1273 | static constexpr u32 DEPTH_RT = 8; | 1297 | template <class P> |
| 1274 | static constexpr u32 NO_RT = 0xFFFFFFFF; | 1298 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 1299 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1300 | ImageBase& image = slot_images[image_id]; | ||
| 1301 | u64 most_recent_tick = image.modification_tick; | ||
| 1302 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1303 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1304 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1305 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1306 | aliased_images.push_back(&aliased); | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | if (aliased_images.empty()) { | ||
| 1310 | return; | ||
| 1311 | } | ||
| 1312 | image.modification_tick = most_recent_tick; | ||
| 1313 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1314 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1315 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1316 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1317 | }); | ||
| 1318 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1319 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1320 | } | ||
| 1321 | } | ||
| 1275 | 1322 | ||
| 1276 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 1323 | template <class P> |
| 1277 | // This avoids calculating size and other stuffs. | 1324 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { |
| 1278 | std::unordered_map<VAddr, TSurface> l1_cache; | 1325 | Image& image = slot_images[image_id]; |
| 1326 | if (invalidate) { | ||
| 1327 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | ||
| 1328 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1329 | TrackImage(image); | ||
| 1330 | } | ||
| 1331 | } else { | ||
| 1332 | RefreshContents(image); | ||
| 1333 | SynchronizeAliases(image_id); | ||
| 1334 | } | ||
| 1335 | if (is_modification) { | ||
| 1336 | MarkModification(image); | ||
| 1337 | } | ||
| 1338 | image.frame_tick = frame_tick; | ||
| 1339 | } | ||
| 1279 | 1340 | ||
| 1280 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 1341 | template <class P> |
| 1281 | /// previously been used. This is to prevent surfaces from being constantly created and | 1342 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, |
| 1282 | /// destroyed when used with different surface parameters. | 1343 | bool invalidate) { |
| 1283 | std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; | 1344 | if (!image_view_id) { |
| 1284 | std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | 1345 | return; |
| 1285 | render_targets; | 1346 | } |
| 1286 | FramebufferTargetInfo depth_buffer; | 1347 | const ImageViewBase& image_view = slot_image_views[image_view_id]; |
| 1348 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1349 | } | ||
| 1287 | 1350 | ||
| 1288 | std::vector<TSurface> sampled_textures; | 1351 | template <class P> |
| 1352 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | ||
| 1353 | Image& dst = slot_images[dst_id]; | ||
| 1354 | Image& src = slot_images[src_id]; | ||
| 1355 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1356 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1357 | if (src_format_type == dst_format_type) { | ||
| 1358 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1359 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1360 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1361 | } | ||
| 1362 | } | ||
| 1363 | return runtime.CopyImage(dst, src, copies); | ||
| 1364 | } | ||
| 1365 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1366 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1367 | for (const ImageCopy& copy : copies) { | ||
| 1368 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1369 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1370 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1371 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1372 | |||
| 1373 | const SubresourceBase dst_base{ | ||
| 1374 | .level = copy.dst_subresource.base_level, | ||
| 1375 | .layer = copy.dst_subresource.base_layer, | ||
| 1376 | }; | ||
| 1377 | const SubresourceBase src_base{ | ||
| 1378 | .level = copy.src_subresource.base_level, | ||
| 1379 | .layer = copy.src_subresource.base_layer, | ||
| 1380 | }; | ||
| 1381 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1382 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1383 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1384 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1385 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1386 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1387 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1388 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1389 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1390 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1391 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1392 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1393 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1394 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1395 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1396 | }; | ||
| 1397 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1289 | 1398 | ||
| 1290 | /// This cache stores null surfaces in order to be used as a placeholder | 1399 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); |
| 1291 | /// for invalid texture calls. | 1400 | } |
| 1292 | std::unordered_map<u32, TSurface> invalid_cache; | 1401 | } |
| 1293 | std::vector<u8> invalid_memory; | ||
| 1294 | 1402 | ||
| 1295 | std::list<TSurface> marked_for_unregister; | 1403 | template <class P> |
| 1404 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { | ||
| 1405 | if (*old_id == new_id) { | ||
| 1406 | return; | ||
| 1407 | } | ||
| 1408 | if (*old_id) { | ||
| 1409 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1410 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1411 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1412 | } | ||
| 1413 | } | ||
| 1414 | *old_id = new_id; | ||
| 1415 | } | ||
| 1296 | 1416 | ||
| 1297 | std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; | 1417 | template <class P> |
| 1298 | std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; | 1418 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( |
| 1419 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1420 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1421 | const ImageBase& image = slot_images[image_id]; | ||
| 1422 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1423 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1424 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1425 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1426 | const u32 num_samples = image.info.num_samples; | ||
| 1427 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1428 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1429 | .color_buffer_ids = {color_view_id}, | ||
| 1430 | .depth_buffer_id = depth_view_id, | ||
| 1431 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1432 | }); | ||
| 1433 | return {framebuffer_id, view_id}; | ||
| 1434 | } | ||
| 1299 | 1435 | ||
| 1300 | StagingCache staging_cache; | 1436 | template <class P> |
| 1301 | std::recursive_mutex mutex; | 1437 | bool TextureCache<P>::IsFullClear(ImageViewId id) { |
| 1302 | }; | 1438 | if (!id) { |
| 1439 | return true; | ||
| 1440 | } | ||
| 1441 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1442 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1443 | const Extent3D size = image_view.size; | ||
| 1444 | const auto& regs = maxwell3d.regs; | ||
| 1445 | const auto& scissor = regs.scissor_test[0]; | ||
| 1446 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1447 | // Images with multiple resources can't be cleared in a single call | ||
| 1448 | return false; | ||
| 1449 | } | ||
| 1450 | if (regs.clear_flags.scissor == 0) { | ||
| 1451 | // If scissor testing is disabled, the clear is always full | ||
| 1452 | return true; | ||
| 1453 | } | ||
| 1454 | // Make sure the clear covers all texels in the subresource | ||
| 1455 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1456 | scissor.max_y >= size.height; | ||
| 1457 | } | ||
| 1303 | 1458 | ||
| 1304 | } // namespace VideoCommon | 1459 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h new file mode 100644 index 000000000..2ad2d72a6 --- /dev/null +++ b/src/video_core/texture_cache/types.h | |||
| @@ -0,0 +1,140 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_funcs.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/texture_cache/slot_vector.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | constexpr size_t NUM_RT = 8; | ||
| 14 | constexpr size_t MAX_MIP_LEVELS = 14; | ||
| 15 | |||
| 16 | constexpr SlotId CORRUPT_ID{0xfffffffe}; | ||
| 17 | |||
| 18 | using ImageId = SlotId; | ||
| 19 | using ImageViewId = SlotId; | ||
| 20 | using ImageAllocId = SlotId; | ||
| 21 | using SamplerId = SlotId; | ||
| 22 | using FramebufferId = SlotId; | ||
| 23 | |||
| 24 | enum class ImageType : u32 { | ||
| 25 | e1D, | ||
| 26 | e2D, | ||
| 27 | e3D, | ||
| 28 | Linear, | ||
| 29 | Buffer, | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class ImageViewType : u32 { | ||
| 33 | e1D, | ||
| 34 | e2D, | ||
| 35 | Cube, | ||
| 36 | e3D, | ||
| 37 | e1DArray, | ||
| 38 | e2DArray, | ||
| 39 | CubeArray, | ||
| 40 | Rect, | ||
| 41 | Buffer, | ||
| 42 | }; | ||
| 43 | constexpr size_t NUM_IMAGE_VIEW_TYPES = 9; | ||
| 44 | |||
| 45 | enum class RelaxedOptions : u32 { | ||
| 46 | Size = 1 << 0, | ||
| 47 | Format = 1 << 1, | ||
| 48 | Samples = 1 << 2, | ||
| 49 | }; | ||
| 50 | DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) | ||
| 51 | |||
| 52 | struct Offset2D { | ||
| 53 | constexpr auto operator<=>(const Offset2D&) const noexcept = default; | ||
| 54 | |||
| 55 | s32 x; | ||
| 56 | s32 y; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct Offset3D { | ||
| 60 | constexpr auto operator<=>(const Offset3D&) const noexcept = default; | ||
| 61 | |||
| 62 | s32 x; | ||
| 63 | s32 y; | ||
| 64 | s32 z; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct Extent2D { | ||
| 68 | constexpr auto operator<=>(const Extent2D&) const noexcept = default; | ||
| 69 | |||
| 70 | u32 width; | ||
| 71 | u32 height; | ||
| 72 | }; | ||
| 73 | |||
| 74 | struct Extent3D { | ||
| 75 | constexpr auto operator<=>(const Extent3D&) const noexcept = default; | ||
| 76 | |||
| 77 | u32 width; | ||
| 78 | u32 height; | ||
| 79 | u32 depth; | ||
| 80 | }; | ||
| 81 | |||
| 82 | struct SubresourceLayers { | ||
| 83 | s32 base_level = 0; | ||
| 84 | s32 base_layer = 0; | ||
| 85 | s32 num_layers = 1; | ||
| 86 | }; | ||
| 87 | |||
| 88 | struct SubresourceBase { | ||
| 89 | constexpr auto operator<=>(const SubresourceBase&) const noexcept = default; | ||
| 90 | |||
| 91 | s32 level = 0; | ||
| 92 | s32 layer = 0; | ||
| 93 | }; | ||
| 94 | |||
| 95 | struct SubresourceExtent { | ||
| 96 | constexpr auto operator<=>(const SubresourceExtent&) const noexcept = default; | ||
| 97 | |||
| 98 | s32 levels = 1; | ||
| 99 | s32 layers = 1; | ||
| 100 | }; | ||
| 101 | |||
| 102 | struct SubresourceRange { | ||
| 103 | constexpr auto operator<=>(const SubresourceRange&) const noexcept = default; | ||
| 104 | |||
| 105 | SubresourceBase base; | ||
| 106 | SubresourceExtent extent; | ||
| 107 | }; | ||
| 108 | |||
| 109 | struct ImageCopy { | ||
| 110 | SubresourceLayers src_subresource; | ||
| 111 | SubresourceLayers dst_subresource; | ||
| 112 | Offset3D src_offset; | ||
| 113 | Offset3D dst_offset; | ||
| 114 | Extent3D extent; | ||
| 115 | }; | ||
| 116 | |||
| 117 | struct BufferImageCopy { | ||
| 118 | size_t buffer_offset; | ||
| 119 | size_t buffer_size; | ||
| 120 | u32 buffer_row_length; | ||
| 121 | u32 buffer_image_height; | ||
| 122 | SubresourceLayers image_subresource; | ||
| 123 | Offset3D image_offset; | ||
| 124 | Extent3D image_extent; | ||
| 125 | }; | ||
| 126 | |||
| 127 | struct BufferCopy { | ||
| 128 | size_t src_offset; | ||
| 129 | size_t dst_offset; | ||
| 130 | size_t size; | ||
| 131 | }; | ||
| 132 | |||
| 133 | struct SwizzleParameters { | ||
| 134 | Extent3D num_tiles; | ||
| 135 | Extent3D block; | ||
| 136 | size_t buffer_offset; | ||
| 137 | s32 level; | ||
| 138 | }; | ||
| 139 | |||
| 140 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp new file mode 100644 index 000000000..279932778 --- /dev/null +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -0,0 +1,1233 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // This files contains code from Ryujinx | ||
| 6 | // A copy of the code can be obtained from https://github.com/Ryujinx/Ryujinx | ||
| 7 | // The sections using code from Ryujinx are marked with a link to the original version | ||
| 8 | |||
| 9 | // MIT License | ||
| 10 | // | ||
| 11 | // Copyright (c) Ryujinx Team and Contributors | ||
| 12 | // | ||
| 13 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and | ||
| 14 | // associated documentation files (the "Software"), to deal in the Software without restriction, | ||
| 15 | // including without limitation the rights to use, copy, modify, merge, publish, distribute, | ||
| 16 | // sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is | ||
| 17 | // furnished to do so, subject to the following conditions: | ||
| 18 | // | ||
| 19 | // The above copyright notice and this permission notice shall be included in all copies or | ||
| 20 | // substantial portions of the Software. | ||
| 21 | // | ||
| 22 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT | ||
| 23 | // NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
| 24 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, | ||
| 25 | // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
| 26 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
| 27 | // | ||
| 28 | |||
| 29 | #include <algorithm> | ||
| 30 | #include <array> | ||
| 31 | #include <numeric> | ||
| 32 | #include <optional> | ||
| 33 | #include <span> | ||
| 34 | #include <vector> | ||
| 35 | |||
| 36 | #include "common/alignment.h" | ||
| 37 | #include "common/assert.h" | ||
| 38 | #include "common/bit_util.h" | ||
| 39 | #include "common/common_types.h" | ||
| 40 | #include "common/div_ceil.h" | ||
| 41 | #include "video_core/compatible_formats.h" | ||
| 42 | #include "video_core/engines/maxwell_3d.h" | ||
| 43 | #include "video_core/memory_manager.h" | ||
| 44 | #include "video_core/surface.h" | ||
| 45 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 46 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 47 | #include "video_core/texture_cache/formatter.h" | ||
| 48 | #include "video_core/texture_cache/samples_helper.h" | ||
| 49 | #include "video_core/texture_cache/util.h" | ||
| 50 | #include "video_core/textures/astc.h" | ||
| 51 | #include "video_core/textures/decoders.h" | ||
| 52 | |||
| 53 | namespace VideoCommon { | ||
| 54 | |||
| 55 | namespace { | ||
| 56 | |||
| 57 | using Tegra::Texture::GOB_SIZE; | ||
| 58 | using Tegra::Texture::GOB_SIZE_SHIFT; | ||
| 59 | using Tegra::Texture::GOB_SIZE_X; | ||
| 60 | using Tegra::Texture::GOB_SIZE_X_SHIFT; | ||
| 61 | using Tegra::Texture::GOB_SIZE_Y; | ||
| 62 | using Tegra::Texture::GOB_SIZE_Y_SHIFT; | ||
| 63 | using Tegra::Texture::GOB_SIZE_Z; | ||
| 64 | using Tegra::Texture::GOB_SIZE_Z_SHIFT; | ||
| 65 | using Tegra::Texture::MsaaMode; | ||
| 66 | using Tegra::Texture::SwizzleTexture; | ||
| 67 | using Tegra::Texture::TextureFormat; | ||
| 68 | using Tegra::Texture::TextureType; | ||
| 69 | using Tegra::Texture::TICEntry; | ||
| 70 | using Tegra::Texture::UnswizzleTexture; | ||
| 71 | using VideoCore::Surface::BytesPerBlock; | ||
| 72 | using VideoCore::Surface::DefaultBlockHeight; | ||
| 73 | using VideoCore::Surface::DefaultBlockWidth; | ||
| 74 | using VideoCore::Surface::IsCopyCompatible; | ||
| 75 | using VideoCore::Surface::IsPixelFormatASTC; | ||
| 76 | using VideoCore::Surface::IsViewCompatible; | ||
| 77 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 78 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 79 | using VideoCore::Surface::SurfaceType; | ||
| 80 | |||
| 81 | constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 82 | |||
| 83 | struct LevelInfo { | ||
| 84 | Extent3D size; | ||
| 85 | Extent3D block; | ||
| 86 | Extent2D tile_size; | ||
| 87 | u32 bpp_log2; | ||
| 88 | u32 tile_width_spacing; | ||
| 89 | }; | ||
| 90 | |||
| 91 | [[nodiscard]] constexpr u32 AdjustTileSize(u32 shift, u32 unit_factor, u32 dimension) { | ||
| 92 | if (shift == 0) { | ||
| 93 | return 0; | ||
| 94 | } | ||
| 95 | u32 x = unit_factor << (shift - 1); | ||
| 96 | if (x >= dimension) { | ||
| 97 | while (--shift) { | ||
| 98 | x >>= 1; | ||
| 99 | if (x < dimension) { | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | } | ||
| 104 | return shift; | ||
| 105 | } | ||
| 106 | |||
| 107 | [[nodiscard]] constexpr u32 AdjustMipSize(u32 size, u32 level) { | ||
| 108 | return std::max<u32>(size >> level, 1); | ||
| 109 | } | ||
| 110 | |||
| 111 | [[nodiscard]] constexpr Extent3D AdjustMipSize(Extent3D size, s32 level) { | ||
| 112 | return Extent3D{ | ||
| 113 | .width = AdjustMipSize(size.width, level), | ||
| 114 | .height = AdjustMipSize(size.height, level), | ||
| 115 | .depth = AdjustMipSize(size.depth, level), | ||
| 116 | }; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] Extent3D AdjustSamplesSize(Extent3D size, s32 num_samples) { | ||
| 120 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 121 | return Extent3D{ | ||
| 122 | .width = size.width >> samples_x, | ||
| 123 | .height = size.height >> samples_y, | ||
| 124 | .depth = size.depth, | ||
| 125 | }; | ||
| 126 | } | ||
| 127 | |||
| 128 | template <u32 GOB_EXTENT> | ||
| 129 | [[nodiscard]] constexpr u32 AdjustMipBlockSize(u32 num_tiles, u32 block_size, u32 level) { | ||
| 130 | do { | ||
| 131 | while (block_size > 0 && num_tiles <= (1U << (block_size - 1)) * GOB_EXTENT) { | ||
| 132 | --block_size; | ||
| 133 | } | ||
| 134 | } while (level--); | ||
| 135 | return block_size; | ||
| 136 | } | ||
| 137 | |||
| 138 | [[nodiscard]] constexpr Extent3D AdjustMipBlockSize(Extent3D num_tiles, Extent3D block_size, | ||
| 139 | u32 level) { | ||
| 140 | return { | ||
| 141 | .width = AdjustMipBlockSize<GOB_SIZE_X>(num_tiles.width, block_size.width, level), | ||
| 142 | .height = AdjustMipBlockSize<GOB_SIZE_Y>(num_tiles.height, block_size.height, level), | ||
| 143 | .depth = AdjustMipBlockSize<GOB_SIZE_Z>(num_tiles.depth, block_size.depth, level), | ||
| 144 | }; | ||
| 145 | } | ||
| 146 | |||
| 147 | [[nodiscard]] constexpr Extent3D AdjustTileSize(Extent3D size, Extent2D tile_size) { | ||
| 148 | return { | ||
| 149 | .width = Common::DivCeil(size.width, tile_size.width), | ||
| 150 | .height = Common::DivCeil(size.height, tile_size.height), | ||
| 151 | .depth = size.depth, | ||
| 152 | }; | ||
| 153 | } | ||
| 154 | |||
| 155 | [[nodiscard]] constexpr u32 BytesPerBlockLog2(u32 bytes_per_block) { | ||
| 156 | return std::countl_zero(bytes_per_block) ^ 0x1F; | ||
| 157 | } | ||
| 158 | |||
| 159 | [[nodiscard]] constexpr u32 BytesPerBlockLog2(PixelFormat format) { | ||
| 160 | return BytesPerBlockLog2(BytesPerBlock(format)); | ||
| 161 | } | ||
| 162 | |||
| 163 | [[nodiscard]] constexpr u32 NumBlocks(Extent3D size, Extent2D tile_size) { | ||
| 164 | const Extent3D num_blocks = AdjustTileSize(size, tile_size); | ||
| 165 | return num_blocks.width * num_blocks.height * num_blocks.depth; | ||
| 166 | } | ||
| 167 | |||
| 168 | [[nodiscard]] constexpr u32 AdjustSize(u32 size, u32 level, u32 block_size) { | ||
| 169 | return Common::DivCeil(AdjustMipSize(size, level), block_size); | ||
| 170 | } | ||
| 171 | |||
| 172 | [[nodiscard]] constexpr u32 LayerSize(const TICEntry& config, PixelFormat format) { | ||
| 173 | return config.Width() * config.Height() * BytesPerBlock(format); | ||
| 174 | } | ||
| 175 | |||
| 176 | [[nodiscard]] constexpr bool HasTwoDimsPerLayer(TextureType type) { | ||
| 177 | switch (type) { | ||
| 178 | case TextureType::Texture2D: | ||
| 179 | case TextureType::Texture2DArray: | ||
| 180 | case TextureType::Texture2DNoMipmap: | ||
| 181 | case TextureType::Texture3D: | ||
| 182 | case TextureType::TextureCubeArray: | ||
| 183 | case TextureType::TextureCubemap: | ||
| 184 | return true; | ||
| 185 | case TextureType::Texture1D: | ||
| 186 | case TextureType::Texture1DArray: | ||
| 187 | case TextureType::Texture1DBuffer: | ||
| 188 | return false; | ||
| 189 | } | ||
| 190 | return false; | ||
| 191 | } | ||
| 192 | |||
| 193 | [[nodiscard]] constexpr bool HasTwoDimsPerLayer(ImageType type) { | ||
| 194 | switch (type) { | ||
| 195 | case ImageType::e2D: | ||
| 196 | case ImageType::e3D: | ||
| 197 | case ImageType::Linear: | ||
| 198 | return true; | ||
| 199 | case ImageType::e1D: | ||
| 200 | case ImageType::Buffer: | ||
| 201 | return false; | ||
| 202 | } | ||
| 203 | UNREACHABLE_MSG("Invalid image type={}", static_cast<int>(type)); | ||
| 204 | } | ||
| 205 | |||
| 206 | [[nodiscard]] constexpr std::pair<int, int> Samples(int num_samples) { | ||
| 207 | switch (num_samples) { | ||
| 208 | case 1: | ||
| 209 | return {1, 1}; | ||
| 210 | case 2: | ||
| 211 | return {2, 1}; | ||
| 212 | case 4: | ||
| 213 | return {2, 2}; | ||
| 214 | case 8: | ||
| 215 | return {4, 2}; | ||
| 216 | case 16: | ||
| 217 | return {4, 4}; | ||
| 218 | } | ||
| 219 | UNREACHABLE_MSG("Invalid number of samples={}", num_samples); | ||
| 220 | return {1, 1}; | ||
| 221 | } | ||
| 222 | |||
| 223 | [[nodiscard]] constexpr Extent2D DefaultBlockSize(PixelFormat format) { | ||
| 224 | return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; | ||
| 225 | } | ||
| 226 | |||
| 227 | [[nodiscard]] constexpr Extent3D NumLevelBlocks(const LevelInfo& info, u32 level) { | ||
| 228 | return Extent3D{ | ||
| 229 | .width = AdjustSize(info.size.width, level, info.tile_size.width) << info.bpp_log2, | ||
| 230 | .height = AdjustSize(info.size.height, level, info.tile_size.height), | ||
| 231 | .depth = AdjustMipSize(info.size.depth, level), | ||
| 232 | }; | ||
| 233 | } | ||
| 234 | |||
| 235 | [[nodiscard]] constexpr Extent3D TileShift(const LevelInfo& info, u32 level) { | ||
| 236 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 237 | return Extent3D{ | ||
| 238 | .width = AdjustTileSize(info.block.width, GOB_SIZE_X, blocks.width), | ||
| 239 | .height = AdjustTileSize(info.block.height, GOB_SIZE_Y, blocks.height), | ||
| 240 | .depth = AdjustTileSize(info.block.depth, GOB_SIZE_Z, blocks.depth), | ||
| 241 | }; | ||
| 242 | } | ||
| 243 | |||
| 244 | [[nodiscard]] constexpr Extent2D GobSize(u32 bpp_log2, u32 block_height, u32 tile_width_spacing) { | ||
| 245 | return Extent2D{ | ||
| 246 | .width = GOB_SIZE_X_SHIFT - bpp_log2 + tile_width_spacing, | ||
| 247 | .height = GOB_SIZE_Y_SHIFT + block_height, | ||
| 248 | }; | ||
| 249 | } | ||
| 250 | |||
| 251 | [[nodiscard]] constexpr bool IsSmallerThanGobSize(Extent3D num_tiles, Extent2D gob, | ||
| 252 | u32 block_depth) { | ||
| 253 | return num_tiles.width <= (1U << gob.width) || num_tiles.height <= (1U << gob.height) || | ||
| 254 | num_tiles.depth < (1U << block_depth); | ||
| 255 | } | ||
| 256 | |||
| 257 | [[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, Extent2D gob, | ||
| 258 | u32 bpp_log2) { | ||
| 259 | if (IsSmallerThanGobSize(num_tiles, gob, block.depth)) { | ||
| 260 | return GOB_SIZE_X_SHIFT - bpp_log2; | ||
| 261 | } else { | ||
| 262 | return gob.width; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | [[nodiscard]] constexpr u32 StrideAlignment(Extent3D num_tiles, Extent3D block, u32 bpp_log2, | ||
| 267 | u32 tile_width_spacing) { | ||
| 268 | const Extent2D gob = GobSize(bpp_log2, block.height, tile_width_spacing); | ||
| 269 | return StrideAlignment(num_tiles, block, gob, bpp_log2); | ||
| 270 | } | ||
| 271 | |||
| 272 | [[nodiscard]] constexpr Extent2D NumGobs(const LevelInfo& info, u32 level) { | ||
| 273 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 274 | const Extent2D gobs{ | ||
| 275 | .width = Common::DivCeilLog2(blocks.width, GOB_SIZE_X_SHIFT), | ||
| 276 | .height = Common::DivCeilLog2(blocks.height, GOB_SIZE_Y_SHIFT), | ||
| 277 | }; | ||
| 278 | const Extent2D gob = GobSize(info.bpp_log2, info.block.height, info.tile_width_spacing); | ||
| 279 | const bool is_small = IsSmallerThanGobSize(blocks, gob, info.block.depth); | ||
| 280 | const u32 alignment = is_small ? 0 : info.tile_width_spacing; | ||
| 281 | return Extent2D{ | ||
| 282 | .width = Common::AlignBits(gobs.width, alignment), | ||
| 283 | .height = gobs.height, | ||
| 284 | }; | ||
| 285 | } | ||
| 286 | |||
| 287 | [[nodiscard]] constexpr Extent3D LevelTiles(const LevelInfo& info, u32 level) { | ||
| 288 | const Extent3D blocks = NumLevelBlocks(info, level); | ||
| 289 | const Extent3D tile_shift = TileShift(info, level); | ||
| 290 | const Extent2D gobs = NumGobs(info, level); | ||
| 291 | return Extent3D{ | ||
| 292 | .width = Common::DivCeilLog2(gobs.width, tile_shift.width), | ||
| 293 | .height = Common::DivCeilLog2(gobs.height, tile_shift.height), | ||
| 294 | .depth = Common::DivCeilLog2(blocks.depth, tile_shift.depth), | ||
| 295 | }; | ||
| 296 | } | ||
| 297 | |||
| 298 | [[nodiscard]] constexpr u32 CalculateLevelSize(const LevelInfo& info, u32 level) { | ||
| 299 | const Extent3D tile_shift = TileShift(info, level); | ||
| 300 | const Extent3D tiles = LevelTiles(info, level); | ||
| 301 | const u32 num_tiles = tiles.width * tiles.height * tiles.depth; | ||
| 302 | const u32 shift = GOB_SIZE_SHIFT + tile_shift.width + tile_shift.height + tile_shift.depth; | ||
| 303 | return num_tiles << shift; | ||
| 304 | } | ||
| 305 | |||
| 306 | [[nodiscard]] constexpr std::array<u32, MAX_MIP_LEVELS> CalculateLevelSizes(const LevelInfo& info, | ||
| 307 | u32 num_levels) { | ||
| 308 | ASSERT(num_levels <= MAX_MIP_LEVELS); | ||
| 309 | std::array<u32, MAX_MIP_LEVELS> sizes{}; | ||
| 310 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 311 | sizes[level] = CalculateLevelSize(info, level); | ||
| 312 | } | ||
| 313 | return sizes; | ||
| 314 | } | ||
| 315 | |||
| 316 | [[nodiscard]] constexpr LevelInfo MakeLevelInfo(PixelFormat format, Extent3D size, Extent3D block, | ||
| 317 | u32 num_samples, u32 tile_width_spacing) { | ||
| 318 | const auto [samples_x, samples_y] = Samples(num_samples); | ||
| 319 | const u32 bytes_per_block = BytesPerBlock(format); | ||
| 320 | return { | ||
| 321 | .size = | ||
| 322 | { | ||
| 323 | .width = size.width * samples_x, | ||
| 324 | .height = size.height * samples_y, | ||
| 325 | .depth = size.depth, | ||
| 326 | }, | ||
| 327 | .block = block, | ||
| 328 | .tile_size = DefaultBlockSize(format), | ||
| 329 | .bpp_log2 = BytesPerBlockLog2(bytes_per_block), | ||
| 330 | .tile_width_spacing = tile_width_spacing, | ||
| 331 | }; | ||
| 332 | } | ||
| 333 | |||
| 334 | [[nodiscard]] constexpr LevelInfo MakeLevelInfo(const ImageInfo& info) { | ||
| 335 | return MakeLevelInfo(info.format, info.size, info.block, info.num_samples, | ||
| 336 | info.tile_width_spacing); | ||
| 337 | } | ||
| 338 | |||
| 339 | [[nodiscard]] constexpr u32 CalculateLevelOffset(PixelFormat format, Extent3D size, Extent3D block, | ||
| 340 | u32 num_samples, u32 tile_width_spacing, | ||
| 341 | u32 level) { | ||
| 342 | const LevelInfo info = MakeLevelInfo(format, size, block, num_samples, tile_width_spacing); | ||
| 343 | u32 offset = 0; | ||
| 344 | for (u32 current_level = 0; current_level < level; ++current_level) { | ||
| 345 | offset += CalculateLevelSize(info, current_level); | ||
| 346 | } | ||
| 347 | return offset; | ||
| 348 | } | ||
| 349 | |||
| 350 | [[nodiscard]] constexpr u32 AlignLayerSize(u32 size_bytes, Extent3D size, Extent3D block, | ||
| 351 | u32 tile_size_y, u32 tile_width_spacing) { | ||
| 352 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L134 | ||
| 353 | if (tile_width_spacing > 0) { | ||
| 354 | const u32 alignment_log2 = GOB_SIZE_SHIFT + tile_width_spacing + block.height + block.depth; | ||
| 355 | return Common::AlignBits(size_bytes, alignment_log2); | ||
| 356 | } | ||
| 357 | const u32 aligned_height = Common::AlignUp(size.height, tile_size_y); | ||
| 358 | while (block.height != 0 && aligned_height <= (1U << (block.height - 1)) * GOB_SIZE_Y) { | ||
| 359 | --block.height; | ||
| 360 | } | ||
| 361 | while (block.depth != 0 && size.depth <= (1U << (block.depth - 1))) { | ||
| 362 | --block.depth; | ||
| 363 | } | ||
| 364 | const u32 block_shift = GOB_SIZE_SHIFT + block.height + block.depth; | ||
| 365 | const u32 num_blocks = size_bytes >> block_shift; | ||
| 366 | if (size_bytes != num_blocks << block_shift) { | ||
| 367 | return (num_blocks + 1) << block_shift; | ||
| 368 | } | ||
| 369 | return size_bytes; | ||
| 370 | } | ||
| 371 | |||
| 372 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapEqualAddress(const ImageInfo& new_info, | ||
| 373 | const ImageBase& overlap, | ||
| 374 | bool strict_size) { | ||
| 375 | const ImageInfo& info = overlap.info; | ||
| 376 | if (!IsBlockLinearSizeCompatible(new_info, info, 0, 0, strict_size)) { | ||
| 377 | return std::nullopt; | ||
| 378 | } | ||
| 379 | if (new_info.block != info.block) { | ||
| 380 | return std::nullopt; | ||
| 381 | } | ||
| 382 | const SubresourceExtent resources = new_info.resources; | ||
| 383 | return SubresourceExtent{ | ||
| 384 | .levels = std::max(resources.levels, info.resources.levels), | ||
| 385 | .layers = std::max(resources.layers, info.resources.layers), | ||
| 386 | }; | ||
| 387 | } | ||
| 388 | |||
| 389 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | ||
| 390 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | ||
| 391 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | ||
| 392 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | ||
| 393 | const auto it = std::ranges::find(slice_offsets, diff); | ||
| 394 | if (it == slice_offsets.end()) { | ||
| 395 | return std::nullopt; | ||
| 396 | } | ||
| 397 | const std::vector subresources = CalculateSliceSubresources(new_info); | ||
| 398 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | ||
| 399 | const ImageInfo& info = overlap.info; | ||
| 400 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | ||
| 401 | return std::nullopt; | ||
| 402 | } | ||
| 403 | const u32 mip_depth = std::max(1U, new_info.size.depth << base.level); | ||
| 404 | if (mip_depth < info.size.depth + base.layer) { | ||
| 405 | return std::nullopt; | ||
| 406 | } | ||
| 407 | if (MipBlockSize(new_info, base.level) != info.block) { | ||
| 408 | return std::nullopt; | ||
| 409 | } | ||
| 410 | return SubresourceExtent{ | ||
| 411 | .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), | ||
| 412 | .layers = 1, | ||
| 413 | }; | ||
| 414 | } | ||
| 415 | |||
| 416 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( | ||
| 417 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | ||
| 418 | const u32 layer_stride = new_info.layer_stride; | ||
| 419 | const s32 new_size = layer_stride * new_info.resources.layers; | ||
| 420 | const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); | ||
| 421 | if (diff > new_size) { | ||
| 422 | return std::nullopt; | ||
| 423 | } | ||
| 424 | const s32 base_layer = diff / layer_stride; | ||
| 425 | const s32 mip_offset = diff % layer_stride; | ||
| 426 | const std::array offsets = CalculateMipLevelOffsets(new_info); | ||
| 427 | const auto end = offsets.begin() + new_info.resources.levels; | ||
| 428 | const auto it = std::find(offsets.begin(), end, mip_offset); | ||
| 429 | if (it == end) { | ||
| 430 | // Mipmap is not aligned to any valid size | ||
| 431 | return std::nullopt; | ||
| 432 | } | ||
| 433 | const SubresourceBase base{ | ||
| 434 | .level = static_cast<s32>(std::distance(offsets.begin(), it)), | ||
| 435 | .layer = base_layer, | ||
| 436 | }; | ||
| 437 | const ImageInfo& info = overlap.info; | ||
| 438 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | ||
| 439 | return std::nullopt; | ||
| 440 | } | ||
| 441 | if (MipBlockSize(new_info, base.level) != info.block) { | ||
| 442 | return std::nullopt; | ||
| 443 | } | ||
| 444 | return SubresourceExtent{ | ||
| 445 | .levels = std::max(new_info.resources.levels, info.resources.levels + base.level), | ||
| 446 | .layers = std::max(new_info.resources.layers, info.resources.layers + base.layer), | ||
| 447 | }; | ||
| 448 | } | ||
| 449 | |||
| 450 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlapRightAddress(const ImageInfo& new_info, | ||
| 451 | GPUVAddr gpu_addr, | ||
| 452 | VAddr cpu_addr, | ||
| 453 | const ImageBase& overlap, | ||
| 454 | bool strict_size) { | ||
| 455 | std::optional<SubresourceExtent> resources; | ||
| 456 | if (new_info.type != ImageType::e3D) { | ||
| 457 | resources = ResolveOverlapRightAddress2D(new_info, gpu_addr, overlap, strict_size); | ||
| 458 | } else { | ||
| 459 | resources = ResolveOverlapRightAddress3D(new_info, gpu_addr, overlap, strict_size); | ||
| 460 | } | ||
| 461 | if (!resources) { | ||
| 462 | return std::nullopt; | ||
| 463 | } | ||
| 464 | return OverlapResult{ | ||
| 465 | .gpu_addr = gpu_addr, | ||
| 466 | .cpu_addr = cpu_addr, | ||
| 467 | .resources = *resources, | ||
| 468 | }; | ||
| 469 | } | ||
| 470 | |||
| 471 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlapLeftAddress(const ImageInfo& new_info, | ||
| 472 | GPUVAddr gpu_addr, | ||
| 473 | VAddr cpu_addr, | ||
| 474 | const ImageBase& overlap, | ||
| 475 | bool strict_size) { | ||
| 476 | const std::optional<SubresourceBase> base = overlap.TryFindBase(gpu_addr); | ||
| 477 | if (!base) { | ||
| 478 | return std::nullopt; | ||
| 479 | } | ||
| 480 | const ImageInfo& info = overlap.info; | ||
| 481 | if (!IsBlockLinearSizeCompatible(new_info, info, base->level, 0, strict_size)) { | ||
| 482 | return std::nullopt; | ||
| 483 | } | ||
| 484 | if (new_info.block != MipBlockSize(info, base->level)) { | ||
| 485 | return std::nullopt; | ||
| 486 | } | ||
| 487 | const SubresourceExtent resources = new_info.resources; | ||
| 488 | s32 layers = 1; | ||
| 489 | if (info.type != ImageType::e3D) { | ||
| 490 | layers = std::max(resources.layers, info.resources.layers + base->layer); | ||
| 491 | } | ||
| 492 | return OverlapResult{ | ||
| 493 | .gpu_addr = overlap.gpu_addr, | ||
| 494 | .cpu_addr = overlap.cpu_addr, | ||
| 495 | .resources = | ||
| 496 | { | ||
| 497 | .levels = std::max(resources.levels + base->level, info.resources.levels), | ||
| 498 | .layers = layers, | ||
| 499 | }, | ||
| 500 | }; | ||
| 501 | } | ||
| 502 | |||
| 503 | [[nodiscard]] Extent2D PitchLinearAlignedSize(const ImageInfo& info) { | ||
| 504 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L212 | ||
| 505 | static constexpr u32 STRIDE_ALIGNMENT = 32; | ||
| 506 | ASSERT(info.type == ImageType::Linear); | ||
| 507 | const Extent2D num_tiles{ | ||
| 508 | .width = Common::DivCeil(info.size.width, DefaultBlockWidth(info.format)), | ||
| 509 | .height = Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)), | ||
| 510 | }; | ||
| 511 | const u32 width_alignment = STRIDE_ALIGNMENT / BytesPerBlock(info.format); | ||
| 512 | return Extent2D{ | ||
| 513 | .width = Common::AlignUp(num_tiles.width, width_alignment), | ||
| 514 | .height = num_tiles.height, | ||
| 515 | }; | ||
| 516 | } | ||
| 517 | |||
| 518 | [[nodiscard]] Extent3D BlockLinearAlignedSize(const ImageInfo& info, u32 level) { | ||
| 519 | // https://github.com/Ryujinx/Ryujinx/blob/1c9aba6de1520aea5480c032e0ff5664ac1bb36f/Ryujinx.Graphics.Texture/SizeCalculator.cs#L176 | ||
| 520 | ASSERT(info.type != ImageType::Linear); | ||
| 521 | const Extent3D size = AdjustMipSize(info.size, level); | ||
| 522 | const Extent3D num_tiles{ | ||
| 523 | .width = Common::DivCeil(size.width, DefaultBlockWidth(info.format)), | ||
| 524 | .height = Common::DivCeil(size.height, DefaultBlockHeight(info.format)), | ||
| 525 | .depth = size.depth, | ||
| 526 | }; | ||
| 527 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 528 | const u32 alignment = StrideAlignment(num_tiles, info.block, bpp_log2, info.tile_width_spacing); | ||
| 529 | const Extent3D mip_block = AdjustMipBlockSize(num_tiles, info.block, 0); | ||
| 530 | return Extent3D{ | ||
| 531 | .width = Common::AlignBits(num_tiles.width, alignment), | ||
| 532 | .height = Common::AlignBits(num_tiles.height, GOB_SIZE_Y_SHIFT + mip_block.height), | ||
| 533 | .depth = Common::AlignBits(num_tiles.depth, GOB_SIZE_Z_SHIFT + mip_block.depth), | ||
| 534 | }; | ||
| 535 | } | ||
| 536 | |||
| 537 | [[nodiscard]] constexpr u32 NumBlocksPerLayer(const ImageInfo& info, Extent2D tile_size) noexcept { | ||
| 538 | u32 num_blocks = 0; | ||
| 539 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 540 | const Extent3D mip_size = AdjustMipSize(info.size, level); | ||
| 541 | num_blocks += NumBlocks(mip_size, tile_size); | ||
| 542 | } | ||
| 543 | return num_blocks; | ||
| 544 | } | ||
| 545 | |||
| 546 | [[nodiscard]] u32 NumSlices(const ImageInfo& info) noexcept { | ||
| 547 | ASSERT(info.type == ImageType::e3D); | ||
| 548 | u32 num_slices = 0; | ||
| 549 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 550 | num_slices += AdjustMipSize(info.size.depth, level); | ||
| 551 | } | ||
| 552 | return num_slices; | ||
| 553 | } | ||
| 554 | |||
| 555 | void SwizzlePitchLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 556 | const ImageInfo& info, const BufferImageCopy& copy, | ||
| 557 | std::span<const u8> memory) { | ||
| 558 | ASSERT(copy.image_offset.z == 0); | ||
| 559 | ASSERT(copy.image_extent.depth == 1); | ||
| 560 | ASSERT(copy.image_subresource.base_level == 0); | ||
| 561 | ASSERT(copy.image_subresource.base_layer == 0); | ||
| 562 | ASSERT(copy.image_subresource.num_layers == 1); | ||
| 563 | |||
| 564 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 565 | const u32 row_length = copy.image_extent.width * bytes_per_block; | ||
| 566 | const u32 guest_offset_x = copy.image_offset.x * bytes_per_block; | ||
| 567 | |||
| 568 | for (u32 line = 0; line < copy.image_extent.height; ++line) { | ||
| 569 | const u32 host_offset_y = line * info.pitch; | ||
| 570 | const u32 guest_offset_y = (copy.image_offset.y + line) * info.pitch; | ||
| 571 | const u32 guest_offset = guest_offset_x + guest_offset_y; | ||
| 572 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, memory.data() + host_offset_y, | ||
| 573 | row_length); | ||
| 574 | } | ||
| 575 | } | ||
| 576 | |||
| 577 | void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 578 | const ImageInfo& info, const BufferImageCopy& copy, | ||
| 579 | std::span<const u8> input) { | ||
| 580 | const Extent3D size = info.size; | ||
| 581 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 582 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 583 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 584 | |||
| 585 | const s32 level = copy.image_subresource.base_level; | ||
| 586 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 587 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 588 | const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; | ||
| 589 | |||
| 590 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||
| 591 | |||
| 592 | UNIMPLEMENTED_IF(copy.image_offset.x != 0); | ||
| 593 | UNIMPLEMENTED_IF(copy.image_offset.y != 0); | ||
| 594 | UNIMPLEMENTED_IF(copy.image_offset.z != 0); | ||
| 595 | UNIMPLEMENTED_IF(copy.image_extent != level_size); | ||
| 596 | |||
| 597 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 598 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 599 | |||
| 600 | size_t host_offset = copy.buffer_offset; | ||
| 601 | |||
| 602 | const u32 num_levels = info.resources.levels; | ||
| 603 | const std::array sizes = CalculateLevelSizes(level_info, num_levels); | ||
| 604 | size_t guest_offset = std::reduce(sizes.begin(), sizes.begin() + level, 0); | ||
| 605 | const size_t layer_stride = | ||
| 606 | AlignLayerSize(std::reduce(sizes.begin(), sizes.begin() + num_levels, 0), size, | ||
| 607 | level_info.block, tile_size.height, info.tile_width_spacing); | ||
| 608 | const size_t subresource_size = sizes[level]; | ||
| 609 | |||
| 610 | const auto dst_data = std::make_unique<u8[]>(subresource_size); | ||
| 611 | const std::span<u8> dst(dst_data.get(), subresource_size); | ||
| 612 | |||
| 613 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 614 | const std::span<const u8> src = input.subspan(host_offset); | ||
| 615 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | ||
| 616 | num_tiles.depth, block.height, block.depth); | ||
| 617 | |||
| 618 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | ||
| 619 | |||
| 620 | host_offset += host_bytes_per_layer; | ||
| 621 | guest_offset += layer_stride; | ||
| 622 | } | ||
| 623 | ASSERT(host_offset - copy.buffer_offset == copy.buffer_size); | ||
| 624 | } | ||
| 625 | |||
| 626 | } // Anonymous namespace | ||
| 627 | |||
| 628 | u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept { | ||
| 629 | if (info.type == ImageType::Buffer) { | ||
| 630 | return info.size.width * BytesPerBlock(info.format); | ||
| 631 | } | ||
| 632 | if (info.type == ImageType::Linear) { | ||
| 633 | return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||
| 634 | } | ||
| 635 | if (info.resources.layers > 1) { | ||
| 636 | ASSERT(info.layer_stride != 0); | ||
| 637 | return info.layer_stride * info.resources.layers; | ||
| 638 | } else { | ||
| 639 | return CalculateLayerSize(info); | ||
| 640 | } | ||
| 641 | } | ||
| 642 | |||
| 643 | u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { | ||
| 644 | if (info.type == ImageType::Buffer) { | ||
| 645 | return info.size.width * BytesPerBlock(info.format); | ||
| 646 | } | ||
| 647 | if (info.num_samples > 1) { | ||
| 648 | // Multisample images can't be uploaded or downloaded to the host | ||
| 649 | return 0; | ||
| 650 | } | ||
| 651 | if (info.type == ImageType::Linear) { | ||
| 652 | return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||
| 653 | } | ||
| 654 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 655 | return NumBlocksPerLayer(info, tile_size) * info.resources.layers * BytesPerBlock(info.format); | ||
| 656 | } | ||
| 657 | |||
| 658 | u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { | ||
| 659 | if (info.type == ImageType::Buffer) { | ||
| 660 | return info.size.width * BytesPerBlock(info.format); | ||
| 661 | } | ||
| 662 | static constexpr Extent2D TILE_SIZE{1, 1}; | ||
| 663 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; | ||
| 664 | } | ||
| 665 | |||
| 666 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { | ||
| 667 | ASSERT(info.type != ImageType::Linear); | ||
| 668 | const u32 layer_size = CalculateLayerSize(info); | ||
| 669 | const Extent3D size = info.size; | ||
| 670 | const Extent3D block = info.block; | ||
| 671 | const u32 tile_size_y = DefaultBlockHeight(info.format); | ||
| 672 | return AlignLayerSize(layer_size, size, block, tile_size_y, info.tile_width_spacing); | ||
| 673 | } | ||
| 674 | |||
| 675 | u32 CalculateLayerSize(const ImageInfo& info) noexcept { | ||
| 676 | ASSERT(info.type != ImageType::Linear); | ||
| 677 | return CalculateLevelOffset(info.format, info.size, info.block, info.num_samples, | ||
| 678 | info.tile_width_spacing, info.resources.levels); | ||
| 679 | } | ||
| 680 | |||
| 681 | std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets(const ImageInfo& info) noexcept { | ||
| 682 | ASSERT(info.resources.levels <= MAX_MIP_LEVELS); | ||
| 683 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 684 | std::array<u32, MAX_MIP_LEVELS> offsets{}; | ||
| 685 | u32 offset = 0; | ||
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 687 | offsets[level] = offset; | ||
| 688 | offset += CalculateLevelSize(level_info, level); | ||
| 689 | } | ||
| 690 | return offsets; | ||
| 691 | } | ||
| 692 | |||
| 693 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | ||
| 694 | ASSERT(info.type == ImageType::e3D); | ||
| 695 | std::vector<u32> offsets; | ||
| 696 | offsets.reserve(NumSlices(info)); | ||
| 697 | |||
| 698 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 699 | u32 mip_offset = 0; | ||
| 700 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 701 | const Extent3D tile_shift = TileShift(level_info, level); | ||
| 702 | const Extent3D tiles = LevelTiles(level_info, level); | ||
| 703 | const u32 gob_size_shift = tile_shift.height + GOB_SIZE_SHIFT; | ||
| 704 | const u32 slice_size = (tiles.width * tiles.height) << gob_size_shift; | ||
| 705 | const u32 z_mask = (1U << tile_shift.depth) - 1; | ||
| 706 | const u32 depth = AdjustMipSize(info.size.depth, level); | ||
| 707 | for (u32 slice = 0; slice < depth; ++slice) { | ||
| 708 | const u32 z_low = slice & z_mask; | ||
| 709 | const u32 z_high = slice & ~z_mask; | ||
| 710 | offsets.push_back(mip_offset + (z_low << gob_size_shift) + (z_high * slice_size)); | ||
| 711 | } | ||
| 712 | mip_offset += CalculateLevelSize(level_info, level); | ||
| 713 | } | ||
| 714 | return offsets; | ||
| 715 | } | ||
| 716 | |||
| 717 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | ||
| 718 | ASSERT(info.type == ImageType::e3D); | ||
| 719 | std::vector<SubresourceBase> subresources; | ||
| 720 | subresources.reserve(NumSlices(info)); | ||
| 721 | for (s32 level = 0; level < info.resources.levels; ++level) { | ||
| 722 | const s32 depth = AdjustMipSize(info.size.depth, level); | ||
| 723 | for (s32 slice = 0; slice < depth; ++slice) { | ||
| 724 | subresources.emplace_back(SubresourceBase{ | ||
| 725 | .level = level, | ||
| 726 | .layer = slice, | ||
| 727 | }); | ||
| 728 | } | ||
| 729 | } | ||
| 730 | return subresources; | ||
| 731 | } | ||
| 732 | |||
| 733 | u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level) { | ||
| 734 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 735 | const Extent3D level_size = AdjustMipSize(info.size, level); | ||
| 736 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 737 | const Extent3D block = AdjustMipBlockSize(num_tiles, info.block, level); | ||
| 738 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 739 | return StrideAlignment(num_tiles, block, bpp_log2, info.tile_width_spacing); | ||
| 740 | } | ||
| 741 | |||
| 742 | PixelFormat PixelFormatFromTIC(const TICEntry& config) noexcept { | ||
| 743 | return PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type, | ||
| 744 | config.a_type, config.srgb_conversion); | ||
| 745 | } | ||
| 746 | |||
| 747 | ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | ||
| 748 | switch (info.type) { | ||
| 749 | case ImageType::e2D: | ||
| 750 | return info.resources.layers > 1 ? ImageViewType::e2DArray : ImageViewType::e2D; | ||
| 751 | case ImageType::e3D: | ||
| 752 | return ImageViewType::e2DArray; | ||
| 753 | case ImageType::Linear: | ||
| 754 | return ImageViewType::e2D; | ||
| 755 | default: | ||
| 756 | UNIMPLEMENTED_MSG("Unimplemented image type={}", static_cast<int>(info.type)); | ||
| 757 | return ImageViewType{}; | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 761 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | ||
| 762 | SubresourceBase base) { | ||
| 763 | ASSERT(dst.resources.levels >= src.resources.levels); | ||
| 764 | ASSERT(dst.num_samples == src.num_samples); | ||
| 765 | |||
| 766 | const bool is_dst_3d = dst.type == ImageType::e3D; | ||
| 767 | if (is_dst_3d) { | ||
| 768 | ASSERT(src.type == ImageType::e3D); | ||
| 769 | ASSERT(src.resources.levels == 1); | ||
| 770 | } | ||
| 771 | |||
| 772 | std::vector<ImageCopy> copies; | ||
| 773 | copies.reserve(src.resources.levels); | ||
| 774 | for (s32 level = 0; level < src.resources.levels; ++level) { | ||
| 775 | ImageCopy& copy = copies.emplace_back(); | ||
| 776 | copy.src_subresource = SubresourceLayers{ | ||
| 777 | .base_level = level, | ||
| 778 | .base_layer = 0, | ||
| 779 | .num_layers = src.resources.layers, | ||
| 780 | }; | ||
| 781 | copy.dst_subresource = SubresourceLayers{ | ||
| 782 | .base_level = base.level + level, | ||
| 783 | .base_layer = is_dst_3d ? 0 : base.layer, | ||
| 784 | .num_layers = is_dst_3d ? 1 : src.resources.layers, | ||
| 785 | }; | ||
| 786 | copy.src_offset = Offset3D{ | ||
| 787 | .x = 0, | ||
| 788 | .y = 0, | ||
| 789 | .z = 0, | ||
| 790 | }; | ||
| 791 | copy.dst_offset = Offset3D{ | ||
| 792 | .x = 0, | ||
| 793 | .y = 0, | ||
| 794 | .z = is_dst_3d ? base.layer : 0, | ||
| 795 | }; | ||
| 796 | const Extent3D mip_size = AdjustMipSize(dst.size, base.level + level); | ||
| 797 | copy.extent = AdjustSamplesSize(mip_size, dst.num_samples); | ||
| 798 | if (is_dst_3d) { | ||
| 799 | copy.extent.depth = src.size.depth; | ||
| 800 | } | ||
| 801 | } | ||
| 802 | return copies; | ||
| 803 | } | ||
| 804 | |||
| 805 | bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||
| 806 | if (config.Address() == 0) { | ||
| 807 | return false; | ||
| 808 | } | ||
| 809 | if (config.Address() > (u64(1) << 48)) { | ||
| 810 | return false; | ||
| 811 | } | ||
| 812 | return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); | ||
| 813 | } | ||
| 814 | |||
| 815 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 816 | const ImageInfo& info, std::span<u8> output) { | ||
| 817 | const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); | ||
| 818 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | ||
| 819 | const Extent3D size = info.size; | ||
| 820 | |||
| 821 | if (info.type == ImageType::Linear) { | ||
| 822 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); | ||
| 823 | |||
| 824 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); | ||
| 825 | return {{ | ||
| 826 | .buffer_offset = 0, | ||
| 827 | .buffer_size = guest_size_bytes, | ||
| 828 | .buffer_row_length = info.pitch >> bpp_log2, | ||
| 829 | .buffer_image_height = size.height, | ||
| 830 | .image_subresource = | ||
| 831 | { | ||
| 832 | .base_level = 0, | ||
| 833 | .base_layer = 0, | ||
| 834 | .num_layers = 1, | ||
| 835 | }, | ||
| 836 | .image_offset = {0, 0, 0}, | ||
| 837 | .image_extent = size, | ||
| 838 | }}; | ||
| 839 | } | ||
| 840 | const auto input_data = std::make_unique<u8[]>(guest_size_bytes); | ||
| 841 | gpu_memory.ReadBlockUnsafe(gpu_addr, input_data.get(), guest_size_bytes); | ||
| 842 | const std::span<const u8> input(input_data.get(), guest_size_bytes); | ||
| 843 | |||
| 844 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 845 | const s32 num_layers = info.resources.layers; | ||
| 846 | const s32 num_levels = info.resources.levels; | ||
| 847 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 848 | const std::array level_sizes = CalculateLevelSizes(level_info, num_levels); | ||
| 849 | const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing); | ||
| 850 | const u32 layer_size = std::reduce(level_sizes.begin(), level_sizes.begin() + num_levels, 0); | ||
| 851 | const u32 layer_stride = AlignLayerSize(layer_size, size, level_info.block, tile_size.height, | ||
| 852 | info.tile_width_spacing); | ||
| 853 | size_t guest_offset = 0; | ||
| 854 | u32 host_offset = 0; | ||
| 855 | std::vector<BufferImageCopy> copies(num_levels); | ||
| 856 | |||
| 857 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 858 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 859 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 860 | const u32 host_bytes_per_layer = num_blocks_per_layer << bpp_log2; | ||
| 861 | copies[level] = BufferImageCopy{ | ||
| 862 | .buffer_offset = host_offset, | ||
| 863 | .buffer_size = static_cast<size_t>(host_bytes_per_layer) * num_layers, | ||
| 864 | .buffer_row_length = Common::AlignUp(level_size.width, tile_size.width), | ||
| 865 | .buffer_image_height = Common::AlignUp(level_size.height, tile_size.height), | ||
| 866 | .image_subresource = | ||
| 867 | { | ||
| 868 | .base_level = level, | ||
| 869 | .base_layer = 0, | ||
| 870 | .num_layers = info.resources.layers, | ||
| 871 | }, | ||
| 872 | .image_offset = {0, 0, 0}, | ||
| 873 | .image_extent = level_size, | ||
| 874 | }; | ||
| 875 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 876 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 877 | const u32 stride_alignment = StrideAlignment(num_tiles, info.block, gob, bpp_log2); | ||
| 878 | size_t guest_layer_offset = 0; | ||
| 879 | |||
| 880 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | ||
| 881 | const std::span<u8> dst = output.subspan(host_offset); | ||
| 882 | const std::span<const u8> src = input.subspan(guest_offset + guest_layer_offset); | ||
| 883 | UnswizzleTexture(dst, src, 1U << bpp_log2, num_tiles.width, num_tiles.height, | ||
| 884 | num_tiles.depth, block.height, block.depth, stride_alignment); | ||
| 885 | guest_layer_offset += layer_stride; | ||
| 886 | host_offset += host_bytes_per_layer; | ||
| 887 | } | ||
| 888 | guest_offset += level_sizes[level]; | ||
| 889 | } | ||
| 890 | return copies; | ||
| 891 | } | ||
| 892 | |||
| 893 | BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 894 | const ImageBase& image, std::span<u8> output) { | ||
| 895 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); | ||
| 896 | return BufferCopy{ | ||
| 897 | .src_offset = 0, | ||
| 898 | .dst_offset = 0, | ||
| 899 | .size = image.guest_size_bytes, | ||
| 900 | }; | ||
| 901 | } | ||
| 902 | |||
| 903 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||
| 904 | std::span<BufferImageCopy> copies) { | ||
| 905 | u32 output_offset = 0; | ||
| 906 | |||
| 907 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 908 | for (BufferImageCopy& copy : copies) { | ||
| 909 | const u32 level = copy.image_subresource.base_level; | ||
| 910 | const Extent3D mip_size = AdjustMipSize(info.size, level); | ||
| 911 | ASSERT(copy.image_offset == Offset3D{}); | ||
| 912 | ASSERT(copy.image_subresource.base_layer == 0); | ||
| 913 | ASSERT(copy.image_extent == mip_size); | ||
| 914 | ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); | ||
| 915 | ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); | ||
| 916 | |||
| 917 | if (IsPixelFormatASTC(info.format)) { | ||
| 918 | ASSERT(copy.image_extent.depth == 1); | ||
| 919 | Tegra::Texture::ASTC::Decompress(input.subspan(copy.buffer_offset), | ||
| 920 | copy.image_extent.width, copy.image_extent.height, | ||
| 921 | copy.image_subresource.num_layers, tile_size.width, | ||
| 922 | tile_size.height, output.subspan(output_offset)); | ||
| 923 | } else { | ||
| 924 | DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, | ||
| 925 | output.subspan(output_offset)); | ||
| 926 | } | ||
| 927 | copy.buffer_offset = output_offset; | ||
| 928 | copy.buffer_row_length = mip_size.width; | ||
| 929 | copy.buffer_image_height = mip_size.height; | ||
| 930 | |||
| 931 | output_offset += copy.image_extent.width * copy.image_extent.height * | ||
| 932 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | ||
| 933 | } | ||
| 934 | } | ||
| 935 | |||
| 936 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | ||
| 937 | const Extent3D size = info.size; | ||
| 938 | const u32 bytes_per_block = BytesPerBlock(info.format); | ||
| 939 | if (info.type == ImageType::Linear) { | ||
| 940 | ASSERT(info.pitch % bytes_per_block == 0); | ||
| 941 | return {{ | ||
| 942 | .buffer_offset = 0, | ||
| 943 | .buffer_size = static_cast<size_t>(info.pitch) * size.height, | ||
| 944 | .buffer_row_length = info.pitch / bytes_per_block, | ||
| 945 | .buffer_image_height = size.height, | ||
| 946 | .image_subresource = | ||
| 947 | { | ||
| 948 | .base_level = 0, | ||
| 949 | .base_layer = 0, | ||
| 950 | .num_layers = 1, | ||
| 951 | }, | ||
| 952 | .image_offset = {0, 0, 0}, | ||
| 953 | .image_extent = size, | ||
| 954 | }}; | ||
| 955 | } | ||
| 956 | UNIMPLEMENTED_IF(info.tile_width_spacing > 0); | ||
| 957 | |||
| 958 | const s32 num_layers = info.resources.layers; | ||
| 959 | const s32 num_levels = info.resources.levels; | ||
| 960 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 961 | |||
| 962 | u32 host_offset = 0; | ||
| 963 | |||
| 964 | std::vector<BufferImageCopy> copies(num_levels); | ||
| 965 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 966 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 967 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | ||
| 968 | const u32 host_bytes_per_level = num_blocks_per_layer * bytes_per_block * num_layers; | ||
| 969 | copies[level] = BufferImageCopy{ | ||
| 970 | .buffer_offset = host_offset, | ||
| 971 | .buffer_size = host_bytes_per_level, | ||
| 972 | .buffer_row_length = level_size.width, | ||
| 973 | .buffer_image_height = level_size.height, | ||
| 974 | .image_subresource = | ||
| 975 | { | ||
| 976 | .base_level = level, | ||
| 977 | .base_layer = 0, | ||
| 978 | .num_layers = info.resources.layers, | ||
| 979 | }, | ||
| 980 | .image_offset = {0, 0, 0}, | ||
| 981 | .image_extent = level_size, | ||
| 982 | }; | ||
| 983 | host_offset += host_bytes_per_level; | ||
| 984 | } | ||
| 985 | return copies; | ||
| 986 | } | ||
| 987 | |||
| 988 | Extent3D MipSize(Extent3D size, u32 level) { | ||
| 989 | return AdjustMipSize(size, level); | ||
| 990 | } | ||
| 991 | |||
| 992 | Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | ||
| 993 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 994 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 995 | const Extent3D level_size = AdjustMipSize(info.size, level); | ||
| 996 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 997 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 998 | } | ||
| 999 | |||
| 1000 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | ||
| 1001 | const Extent2D tile_size = DefaultBlockSize(info.format); | ||
| 1002 | if (info.type == ImageType::Linear) { | ||
| 1003 | return std::vector{SwizzleParameters{ | ||
| 1004 | .num_tiles = AdjustTileSize(info.size, tile_size), | ||
| 1005 | .block = {}, | ||
| 1006 | .buffer_offset = 0, | ||
| 1007 | .level = 0, | ||
| 1008 | }}; | ||
| 1009 | } | ||
| 1010 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 1011 | const Extent3D size = info.size; | ||
| 1012 | const s32 num_levels = info.resources.levels; | ||
| 1013 | |||
| 1014 | u32 guest_offset = 0; | ||
| 1015 | std::vector<SwizzleParameters> params(num_levels); | ||
| 1016 | for (s32 level = 0; level < num_levels; ++level) { | ||
| 1017 | const Extent3D level_size = AdjustMipSize(size, level); | ||
| 1018 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | ||
| 1019 | const Extent3D block = AdjustMipBlockSize(num_tiles, level_info.block, level); | ||
| 1020 | params[level] = SwizzleParameters{ | ||
| 1021 | .num_tiles = num_tiles, | ||
| 1022 | .block = block, | ||
| 1023 | .buffer_offset = guest_offset, | ||
| 1024 | .level = level, | ||
| 1025 | }; | ||
| 1026 | guest_offset += CalculateLevelSize(level_info, level); | ||
| 1027 | } | ||
| 1028 | return params; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 1032 | std::span<const BufferImageCopy> copies, std::span<const u8> memory) { | ||
| 1033 | const bool is_pitch_linear = info.type == ImageType::Linear; | ||
| 1034 | for (const BufferImageCopy& copy : copies) { | ||
| 1035 | if (is_pitch_linear) { | ||
| 1036 | SwizzlePitchLinearImage(gpu_memory, gpu_addr, info, copy, memory); | ||
| 1037 | } else { | ||
| 1038 | SwizzleBlockLinearImage(gpu_memory, gpu_addr, info, copy, memory); | ||
| 1039 | } | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 lhs_level, | ||
| 1044 | u32 rhs_level, bool strict_size) noexcept { | ||
| 1045 | ASSERT(lhs.type != ImageType::Linear); | ||
| 1046 | ASSERT(rhs.type != ImageType::Linear); | ||
| 1047 | if (strict_size) { | ||
| 1048 | const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); | ||
| 1049 | const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); | ||
| 1050 | return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; | ||
| 1051 | } else { | ||
| 1052 | const Extent3D lhs_size = BlockLinearAlignedSize(lhs, lhs_level); | ||
| 1053 | const Extent3D rhs_size = BlockLinearAlignedSize(rhs, rhs_level); | ||
| 1054 | return lhs_size.width == rhs_size.width && lhs_size.height == rhs_size.height; | ||
| 1055 | } | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { | ||
| 1059 | ASSERT(lhs.type == ImageType::Linear); | ||
| 1060 | ASSERT(rhs.type == ImageType::Linear); | ||
| 1061 | if (strict_size) { | ||
| 1062 | return lhs.size.width == rhs.size.width && lhs.size.height == rhs.size.height; | ||
| 1063 | } else { | ||
| 1064 | const Extent2D lhs_size = PitchLinearAlignedSize(lhs); | ||
| 1065 | const Extent2D rhs_size = PitchLinearAlignedSize(rhs); | ||
| 1066 | return lhs_size == rhs_size; | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr, | ||
| 1071 | VAddr cpu_addr, const ImageBase& overlap, | ||
| 1072 | bool strict_size, bool broken_views) { | ||
| 1073 | ASSERT(new_info.type != ImageType::Linear); | ||
| 1074 | ASSERT(overlap.info.type != ImageType::Linear); | ||
| 1075 | if (!IsLayerStrideCompatible(new_info, overlap.info)) { | ||
| 1076 | return std::nullopt; | ||
| 1077 | } | ||
| 1078 | if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) { | ||
| 1079 | return std::nullopt; | ||
| 1080 | } | ||
| 1081 | if (gpu_addr == overlap.gpu_addr) { | ||
| 1082 | const std::optional solution = ResolveOverlapEqualAddress(new_info, overlap, strict_size); | ||
| 1083 | if (!solution) { | ||
| 1084 | return std::nullopt; | ||
| 1085 | } | ||
| 1086 | return OverlapResult{ | ||
| 1087 | .gpu_addr = gpu_addr, | ||
| 1088 | .cpu_addr = cpu_addr, | ||
| 1089 | .resources = *solution, | ||
| 1090 | }; | ||
| 1091 | } | ||
| 1092 | if (overlap.gpu_addr > gpu_addr) { | ||
| 1093 | return ResolveOverlapRightAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); | ||
| 1094 | } | ||
| 1095 | // if overlap.gpu_addr < gpu_addr | ||
| 1096 | return ResolveOverlapLeftAddress(new_info, gpu_addr, cpu_addr, overlap, strict_size); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) { | ||
| 1100 | // If either of the layer strides is zero, we can assume they are compatible | ||
| 1101 | // These images generally come from rendertargets | ||
| 1102 | if (lhs.layer_stride == 0) { | ||
| 1103 | return true; | ||
| 1104 | } | ||
| 1105 | if (rhs.layer_stride == 0) { | ||
| 1106 | return true; | ||
| 1107 | } | ||
| 1108 | // It's definitely compatible if the layer stride matches | ||
| 1109 | if (lhs.layer_stride == rhs.layer_stride) { | ||
| 1110 | return true; | ||
| 1111 | } | ||
| 1112 | // Although we also have to compare for cases where it can be unaligned | ||
| 1113 | // This can happen if the image doesn't have layers, so the stride is not aligned | ||
| 1114 | if (lhs.maybe_unaligned_layer_stride == rhs.maybe_unaligned_layer_stride) { | ||
| 1115 | return true; | ||
| 1116 | } | ||
| 1117 | return false; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image, | ||
| 1121 | GPUVAddr candidate_addr, RelaxedOptions options, | ||
| 1122 | bool broken_views) { | ||
| 1123 | const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); | ||
| 1124 | if (!base) { | ||
| 1125 | return std::nullopt; | ||
| 1126 | } | ||
| 1127 | const ImageInfo& existing = image.info; | ||
| 1128 | if (False(options & RelaxedOptions::Format)) { | ||
| 1129 | if (!IsViewCompatible(existing.format, candidate.format, broken_views)) { | ||
| 1130 | return std::nullopt; | ||
| 1131 | } | ||
| 1132 | } | ||
| 1133 | if (!IsLayerStrideCompatible(existing, candidate)) { | ||
| 1134 | return std::nullopt; | ||
| 1135 | } | ||
| 1136 | if (existing.type != candidate.type) { | ||
| 1137 | return std::nullopt; | ||
| 1138 | } | ||
| 1139 | if (False(options & RelaxedOptions::Samples)) { | ||
| 1140 | if (existing.num_samples != candidate.num_samples) { | ||
| 1141 | return std::nullopt; | ||
| 1142 | } | ||
| 1143 | } | ||
| 1144 | if (existing.resources.levels < candidate.resources.levels + base->level) { | ||
| 1145 | return std::nullopt; | ||
| 1146 | } | ||
| 1147 | if (existing.type == ImageType::e3D) { | ||
| 1148 | const u32 mip_depth = std::max(1U, existing.size.depth << base->level); | ||
| 1149 | if (mip_depth < candidate.size.depth + base->layer) { | ||
| 1150 | return std::nullopt; | ||
| 1151 | } | ||
| 1152 | } else { | ||
| 1153 | if (existing.resources.layers < candidate.resources.layers + base->layer) { | ||
| 1154 | return std::nullopt; | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | const bool strict_size = False(options & RelaxedOptions::Size); | ||
| 1158 | if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { | ||
| 1159 | return std::nullopt; | ||
| 1160 | } | ||
| 1161 | // TODO: compare block sizes | ||
| 1162 | return base; | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr, | ||
| 1166 | RelaxedOptions options, bool broken_views) { | ||
| 1167 | return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value(); | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||
| 1171 | const ImageBase* src) { | ||
| 1172 | if (src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | ||
| 1173 | src_info.format = src->info.format; | ||
| 1174 | } | ||
| 1175 | if (dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | ||
| 1176 | dst_info.format = dst->info.format; | ||
| 1177 | } | ||
| 1178 | if (!dst && src && GetFormatType(src->info.format) != SurfaceType::ColorTexture) { | ||
| 1179 | dst_info.format = src->info.format; | ||
| 1180 | } | ||
| 1181 | if (!src && dst && GetFormatType(dst->info.format) != SurfaceType::ColorTexture) { | ||
| 1182 | src_info.format = src->info.format; | ||
| 1183 | } | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | u32 MapSizeBytes(const ImageBase& image) { | ||
| 1187 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 1188 | return image.guest_size_bytes; | ||
| 1189 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 1190 | return image.converted_size_bytes; | ||
| 1191 | } else { | ||
| 1192 | return image.unswizzled_size_bytes; | ||
| 1193 | } | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | using P = PixelFormat; | ||
| 1197 | |||
| 1198 | static_assert(CalculateLevelSize(LevelInfo{{1920, 1080}, {0, 2, 0}, {1, 1}, 2, 0}, 0) == 0x7f8000); | ||
| 1199 | static_assert(CalculateLevelSize(LevelInfo{{32, 32}, {0, 0, 4}, {1, 1}, 4, 0}, 0) == 0x4000); | ||
| 1200 | |||
| 1201 | static_assert(CalculateLevelOffset(P::R8_SINT, {1920, 1080}, {0, 2}, 1, 0, 7) == 0x2afc00); | ||
| 1202 | static_assert(CalculateLevelOffset(P::ASTC_2D_12X12_UNORM, {8192, 4096}, {0, 2}, 1, 0, 12) == | ||
| 1203 | 0x50d200); | ||
| 1204 | |||
| 1205 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 0) == 0); | ||
| 1206 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 1) == 0x400000); | ||
| 1207 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 2) == 0x500000); | ||
| 1208 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 3) == 0x540000); | ||
| 1209 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 4) == 0x550000); | ||
| 1210 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 5) == 0x554000); | ||
| 1211 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 6) == 0x555000); | ||
| 1212 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 7) == 0x555400); | ||
| 1213 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 8) == 0x555600); | ||
| 1214 | static_assert(CalculateLevelOffset(P::A8B8G8R8_UNORM, {1024, 1024}, {0, 4}, 1, 0, 9) == 0x555800); | ||
| 1215 | |||
| 1216 | constexpr u32 ValidateLayerSize(PixelFormat format, u32 width, u32 height, u32 block_height, | ||
| 1217 | u32 tile_width_spacing, u32 level) { | ||
| 1218 | const Extent3D size{width, height, 1}; | ||
| 1219 | const Extent3D block{0, block_height, 0}; | ||
| 1220 | const u32 offset = CalculateLevelOffset(format, size, block, 1, tile_width_spacing, level); | ||
| 1221 | return AlignLayerSize(offset, size, block, DefaultBlockHeight(format), tile_width_spacing); | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | static_assert(ValidateLayerSize(P::ASTC_2D_12X12_UNORM, 8192, 4096, 2, 0, 12) == 0x50d800); | ||
| 1225 | static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 1024, 1024, 2, 0, 10) == 0x556000); | ||
| 1226 | static_assert(ValidateLayerSize(P::BC3_UNORM, 128, 128, 2, 0, 8) == 0x6000); | ||
| 1227 | |||
| 1228 | static_assert(ValidateLayerSize(P::A8B8G8R8_UNORM, 518, 572, 4, 3, 1) == 0x190000, | ||
| 1229 | "Tile width spacing is not working"); | ||
| 1230 | static_assert(ValidateLayerSize(P::BC5_UNORM, 1024, 1024, 3, 4, 11) == 0x160000, | ||
| 1231 | "Compressed tile width spacing is not working"); | ||
| 1232 | |||
| 1233 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h new file mode 100644 index 000000000..52a9207d6 --- /dev/null +++ b/src/video_core/texture_cache/util.h | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <span> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/surface.h" | ||
| 14 | #include "video_core/texture_cache/image_base.h" | ||
| 15 | #include "video_core/texture_cache/image_view_base.h" | ||
| 16 | #include "video_core/texture_cache/types.h" | ||
| 17 | #include "video_core/textures/texture.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | using Tegra::Texture::TICEntry; | ||
| 22 | |||
| 23 | struct OverlapResult { | ||
| 24 | GPUVAddr gpu_addr; | ||
| 25 | VAddr cpu_addr; | ||
| 26 | SubresourceExtent resources; | ||
| 27 | }; | ||
| 28 | |||
| 29 | [[nodiscard]] u32 CalculateGuestSizeInBytes(const ImageInfo& info) noexcept; | ||
| 30 | |||
| 31 | [[nodiscard]] u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept; | ||
| 32 | |||
| 33 | [[nodiscard]] u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept; | ||
| 34 | |||
| 35 | [[nodiscard]] u32 CalculateLayerStride(const ImageInfo& info) noexcept; | ||
| 36 | |||
| 37 | [[nodiscard]] u32 CalculateLayerSize(const ImageInfo& info) noexcept; | ||
| 38 | |||
| 39 | [[nodiscard]] std::array<u32, MAX_MIP_LEVELS> CalculateMipLevelOffsets( | ||
| 40 | const ImageInfo& info) noexcept; | ||
| 41 | |||
| 42 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | ||
| 43 | |||
| 44 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | ||
| 45 | |||
| 46 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | ||
| 47 | |||
| 48 | [[nodiscard]] VideoCore::Surface::PixelFormat PixelFormatFromTIC( | ||
| 49 | const Tegra::Texture::TICEntry& config) noexcept; | ||
| 50 | |||
| 51 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | ||
| 52 | |||
| 53 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | ||
| 54 | const ImageInfo& src, | ||
| 55 | SubresourceBase base); | ||
| 56 | |||
| 57 | [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | ||
| 58 | |||
| 59 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | ||
| 60 | GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 61 | std::span<u8> output); | ||
| 62 | |||
| 63 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 64 | const ImageBase& image, std::span<u8> output); | ||
| 65 | |||
| 66 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | ||
| 67 | std::span<BufferImageCopy> copies); | ||
| 68 | |||
| 69 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | ||
| 70 | |||
| 71 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | ||
| 72 | |||
| 73 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | ||
| 74 | |||
| 75 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | ||
| 76 | |||
| 77 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | ||
| 78 | std::span<const BufferImageCopy> copies, std::span<const u8> memory); | ||
| 79 | |||
| 80 | [[nodiscard]] bool IsBlockLinearSizeCompatible(const ImageInfo& new_info, | ||
| 81 | const ImageInfo& overlap_info, u32 new_level, | ||
| 82 | u32 overlap_level, bool strict_size) noexcept; | ||
| 83 | |||
| 84 | [[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, | ||
| 85 | bool strict_size) noexcept; | ||
| 86 | |||
| 87 | [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | ||
| 88 | GPUVAddr gpu_addr, VAddr cpu_addr, | ||
| 89 | const ImageBase& overlap, | ||
| 90 | bool strict_size, bool broken_views); | ||
| 91 | |||
| 92 | [[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs); | ||
| 93 | |||
| 94 | [[nodiscard]] std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, | ||
| 95 | const ImageBase& image, | ||
| 96 | GPUVAddr candidate_addr, | ||
| 97 | RelaxedOptions options, | ||
| 98 | bool broken_views); | ||
| 99 | |||
| 100 | [[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, | ||
| 101 | GPUVAddr candidate_addr, RelaxedOptions options, | ||
| 102 | bool broken_views); | ||
| 103 | |||
| 104 | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||
| 105 | const ImageBase* src); | ||
| 106 | |||
| 107 | [[nodiscard]] u32 MapSizeBytes(const ImageBase& image); | ||
| 108 | |||
| 109 | } // namespace VideoCommon | ||