summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-10-14 14:42:38 -0400
committerGravatar GitHub2018-10-14 14:42:38 -0400
commitb82bbfba77ef1afe1cc4f8beb2c273c8b66092c8 (patch)
tree000e7462c1c946a96f7536140d29ea3237181913 /src
parentMerge pull request #1492 from lioncash/proc (diff)
parentShorten the implementation of 3D swizzle to only 3 functions (diff)
downloadyuzu-b82bbfba77ef1afe1cc4f8beb2c273c8b66092c8.tar.gz
yuzu-b82bbfba77ef1afe1cc4f8beb2c273c8b66092c8.tar.xz
yuzu-b82bbfba77ef1afe1cc4f8beb2c273c8b66092c8.zip
Merge pull request #1480 from FernandoS27/neue-swizzle
Introduce 3D Swizzle seamlessly
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/fermi_2d.cpp14
-rw-r--r--src/video_core/engines/maxwell_dma.cpp10
-rw-r--r--src/video_core/engines/maxwell_dma.h4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp43
-rw-r--r--src/video_core/textures/decoders.cpp191
-rw-r--r--src/video_core/textures/decoders.h15
-rw-r--r--src/video_core/textures/texture.h1
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp5
8 files changed, 176 insertions, 107 deletions
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 912e785b9..597b279b9 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -62,14 +62,16 @@ void Fermi2D::HandleSurfaceCopy() {
62 u8* dst_buffer = Memory::GetPointer(dest_cpu); 62 u8* dst_buffer = Memory::GetPointer(dest_cpu);
63 if (!regs.src.linear && regs.dst.linear) { 63 if (!regs.src.linear && regs.dst.linear) {
64 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 64 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
65 Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, 65 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
66 dst_bytes_per_pixel, src_buffer, dst_buffer, true, 66 src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer,
67 regs.src.BlockHeight()); 67 dst_buffer, true, regs.src.BlockHeight(),
68 regs.src.BlockDepth());
68 } else { 69 } else {
69 // If the input is linear and the output is tiled, swizzle the input and copy it over. 70 // If the input is linear and the output is tiled, swizzle the input and copy it over.
70 Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, 71 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
71 dst_bytes_per_pixel, dst_buffer, src_buffer, false, 72 src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer,
72 regs.dst.BlockHeight()); 73 src_buffer, false, regs.dst.BlockHeight(),
74 regs.dst.BlockDepth());
73 } 75 }
74 } 76 }
75} 77}
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index aa7481b8c..bf2a21bb6 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -68,12 +68,14 @@ void MaxwellDMA::HandleCopy() {
68 68
69 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 69 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
70 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 70 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
71 Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer, 71 Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y,
72 dst_buffer, true, regs.src_params.BlockHeight()); 72 regs.src_params.size_z, 1, 1, src_buffer, dst_buffer, true,
73 regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
73 } else { 74 } else {
74 // If the input is linear and the output is tiled, swizzle the input and copy it over. 75 // If the input is linear and the output is tiled, swizzle the input and copy it over.
75 Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer, 76 Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y,
76 src_buffer, false, regs.dst_params.BlockHeight()); 77 regs.dst_params.size_z, 1, 1, dst_buffer, src_buffer, false,
78 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
77 } 79 }
78} 80}
79 81
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 311ccb616..df19e02e2 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -43,6 +43,10 @@ public:
43 u32 BlockHeight() const { 43 u32 BlockHeight() const {
44 return 1 << block_height; 44 return 1 << block_height;
45 } 45 }
46
47 u32 BlockDepth() const {
48 return 1 << block_depth;
49 }
46 }; 50 };
47 51
48 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); 52 static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 65a220c41..8fdb59b5b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -323,8 +323,8 @@ static bool IsFormatBCn(PixelFormat format) {
323} 323}
324 324
325template <bool morton_to_gl, PixelFormat format> 325template <bool morton_to_gl, PixelFormat format>
326void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size, 326void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer,
327 VAddr addr) { 327 std::size_t gl_buffer_size, VAddr addr) {
328 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; 328 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
329 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); 329 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
330 330
@@ -333,7 +333,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si
333 // pixel values. 333 // pixel values.
334 const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; 334 const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
335 const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( 335 const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
336 addr, tile_size, bytes_per_pixel, stride, height, block_height); 336 addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth);
337 const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; 337 const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())};
338 memcpy(gl_buffer, data.data(), size_to_copy); 338 memcpy(gl_buffer, data.data(), size_to_copy);
339 } else { 339 } else {
@@ -345,7 +345,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si
345 } 345 }
346} 346}
347 347
348static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), 348static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
349 SurfaceParams::MaxPixelFormat> 349 SurfaceParams::MaxPixelFormat>
350 morton_to_gl_fns = { 350 morton_to_gl_fns = {
351 // clang-format off 351 // clang-format off
@@ -403,7 +403,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr),
403 // clang-format on 403 // clang-format on
404}; 404};
405 405
406static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), 406static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr),
407 SurfaceParams::MaxPixelFormat> 407 SurfaceParams::MaxPixelFormat>
408 gl_to_morton_fns = { 408 gl_to_morton_fns = {
409 // clang-format off 409 // clang-format off
@@ -827,36 +827,23 @@ void CachedSurface::LoadGLBuffer() {
827 827
828 if (params.is_tiled) { 828 if (params.is_tiled) {
829 gl_buffer.resize(total_size); 829 gl_buffer.resize(total_size);
830 u32 depth = params.depth;
831 u32 block_depth = params.block_depth;
830 832
831 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 833 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
832 params.block_width, static_cast<u32>(params.target)); 834 params.block_width, static_cast<u32>(params.target));
833 ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}",
834 params.block_depth, static_cast<u32>(params.target));
835 835
836 // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do 836 if (params.target == SurfaceParams::SurfaceTarget::Texture2D) {
837 // this for 3D textures, etc. 837 // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
838 switch (params.target) { 838 depth = 1U;
839 case SurfaceParams::SurfaceTarget::Texture2D: 839 block_depth = 1U;
840 // Pass impl. to the fallback code below
841 break;
842 case SurfaceParams::SurfaceTarget::Texture2DArray:
843 case SurfaceParams::SurfaceTarget::TextureCubemap:
844 for (std::size_t index = 0; index < params.depth; ++index) {
845 const std::size_t offset{index * copy_size};
846 morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
847 params.width, params.block_height, params.height, gl_buffer.data() + offset,
848 copy_size, params.addr + offset);
849 }
850 break;
851 default:
852 LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
853 static_cast<u32>(params.target));
854 UNREACHABLE();
855 } 840 }
856 841
842 const std::size_t size = copy_size * depth;
843
857 morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( 844 morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)](
858 params.width, params.block_height, params.height, gl_buffer.data(), copy_size, 845 params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(),
859 params.addr); 846 size, params.addr);
860 } else { 847 } else {
861 const u8* const texture_src_data_end{texture_src_data + total_size}; 848 const u8* const texture_src_data_end{texture_src_data + total_size};
862 gl_buffer.assign(texture_src_data, texture_src_data_end); 849 gl_buffer.assign(texture_src_data, texture_src_data_end);
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 0d2456b56..0fe91622e 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -40,72 +40,146 @@ struct alignas(64) SwizzleTable {
40constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>(); 40constexpr auto legacy_swizzle_table = SwizzleTable<8, 64, 1>();
41constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>(); 41constexpr auto fast_swizzle_table = SwizzleTable<8, 4, 16>();
42 42
43static void LegacySwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 43/**
44 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, 44 * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
45 u32 block_height) { 45 * Instead of going gob by gob, we map the coordinates inside a block and manage from
46 * those. Block_Width is assumed to be 1.
47 */
48void PreciseProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
49 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
50 const u32 y_end, const u32 z_end, const u32 tile_offset,
51 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
52 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
46 std::array<u8*, 2> data_ptrs; 53 std::array<u8*, 2> data_ptrs;
47 const std::size_t stride = width * bytes_per_pixel; 54 u32 z_address = tile_offset;
48 const std::size_t gobs_in_x = 64; 55 const u32 gob_size_x = 64;
49 const std::size_t gobs_in_y = 8; 56 const u32 gob_size_y = 8;
50 const std::size_t gobs_size = gobs_in_x * gobs_in_y; 57 const u32 gob_size_z = 1;
51 const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x}; 58 const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
52 for (std::size_t y = 0; y < height; ++y) { 59 for (u32 z = z_start; z < z_end; z++) {
53 const std::size_t gob_y_address = 60 u32 y_address = z_address;
54 (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs + 61 u32 pixel_base = layer_z * z + y_start * stride_x;
55 (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size; 62 for (u32 y = y_start; y < y_end; y++) {
56 const auto& table = legacy_swizzle_table[y % gobs_in_y]; 63 const auto& table = legacy_swizzle_table[y % gob_size_y];
57 for (std::size_t x = 0; x < width; ++x) { 64 for (u32 x = x_start; x < x_end; x++) {
58 const std::size_t gob_address = 65 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]};
59 gob_y_address + (x * bytes_per_pixel / gobs_in_x) * gobs_size * block_height; 66 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base};
60 const std::size_t x2 = x * bytes_per_pixel; 67 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
61 const std::size_t swizzle_offset = gob_address + table[x2 % gobs_in_x]; 68 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
62 const std::size_t pixel_index = (x + y * width) * out_bytes_per_pixel; 69 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
63 70 }
64 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 71 pixel_base += stride_x;
65 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 72 if ((y + 1) % gob_size_y == 0)
66 73 y_address += gob_size;
67 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
68 } 74 }
75 z_address += xy_block_size;
69 } 76 }
70} 77}
71 78
72static void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 79/**
73 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, 80 * This function manages ALL the GOBs(Group of Bytes) Inside a single block.
74 u32 block_height) { 81 * Instead of going gob by gob, we map the coordinates inside a block and manage from
82 * those. Block_Width is assumed to be 1.
83 */
84void FastProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle,
85 const u32 x_start, const u32 y_start, const u32 z_start, const u32 x_end,
86 const u32 y_end, const u32 z_end, const u32 tile_offset,
87 const u32 xy_block_size, const u32 layer_z, const u32 stride_x,
88 const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) {
75 std::array<u8*, 2> data_ptrs; 89 std::array<u8*, 2> data_ptrs;
76 const std::size_t stride{width * bytes_per_pixel}; 90 u32 z_address = tile_offset;
77 const std::size_t gobs_in_x = 64; 91 const u32 x_startb = x_start * bytes_per_pixel;
78 const std::size_t gobs_in_y = 8; 92 const u32 x_endb = x_end * bytes_per_pixel;
79 const std::size_t gobs_size = gobs_in_x * gobs_in_y; 93 const u32 copy_size = 16;
80 const std::size_t image_width_in_gobs{(stride + gobs_in_x - 1) / gobs_in_x}; 94 const u32 gob_size_x = 64;
81 const std::size_t copy_size{16}; 95 const u32 gob_size_y = 8;
82 for (std::size_t y = 0; y < height; ++y) { 96 const u32 gob_size_z = 1;
83 const std::size_t initial_gob = 97 const u32 gob_size = gob_size_x * gob_size_y * gob_size_z;
84 (y / (gobs_in_y * block_height)) * gobs_size * block_height * image_width_in_gobs + 98 for (u32 z = z_start; z < z_end; z++) {
85 (y % (gobs_in_y * block_height) / gobs_in_y) * gobs_size; 99 u32 y_address = z_address;
86 const std::size_t pixel_base{y * width * out_bytes_per_pixel}; 100 u32 pixel_base = layer_z * z + y_start * stride_x;
87 const auto& table = fast_swizzle_table[y % gobs_in_y]; 101 for (u32 y = y_start; y < y_end; y++) {
88 for (std::size_t xb = 0; xb < stride; xb += copy_size) { 102 const auto& table = fast_swizzle_table[y % gob_size_y];
89 const std::size_t gob_address{initial_gob + 103 for (u32 xb = x_startb; xb < x_endb; xb += copy_size) {
90 (xb / gobs_in_x) * gobs_size * block_height}; 104 const u32 swizzle_offset{y_address + table[(xb / copy_size) % 4]};
91 const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; 105 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel;
92 const std::size_t out_x = xb * out_bytes_per_pixel / bytes_per_pixel; 106 const u32 pixel_index{out_x + pixel_base};
93 const std::size_t pixel_index{out_x + pixel_base}; 107 data_ptrs[unswizzle] = swizzled_data + swizzle_offset;
94 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; 108 data_ptrs[!unswizzle] = unswizzled_data + pixel_index;
95 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; 109 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size);
96 std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); 110 }
111 pixel_base += stride_x;
112 if ((y + 1) % gob_size_y == 0)
113 y_address += gob_size;
114 }
115 z_address += xy_block_size;
116 }
117}
118
119/**
120 * This function unswizzles or swizzles a texture by mapping Linear to BlockLinear Textue.
121 * The body of this function takes care of splitting the swizzled texture into blocks,
122 * and managing the extents of it. Once all the parameters of a single block are obtained,
123 * the function calls 'ProcessBlock' to process that particular Block.
124 *
125 * Documentation for the memory layout and decoding can be found at:
126 * https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html#blocklinear-surfaces
127 */
128template <bool fast>
129void SwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswizzle, const u32 width,
130 const u32 height, const u32 depth, const u32 bytes_per_pixel,
131 const u32 out_bytes_per_pixel, const u32 block_height, const u32 block_depth) {
132 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
133 const u32 stride_x = width * out_bytes_per_pixel;
134 const u32 layer_z = height * stride_x;
135 const u32 gob_x_bytes = 64;
136 const u32 gob_elements_x = gob_x_bytes / bytes_per_pixel;
137 const u32 gob_elements_y = 8;
138 const u32 gob_elements_z = 1;
139 const u32 block_x_elements = gob_elements_x;
140 const u32 block_y_elements = gob_elements_y * block_height;
141 const u32 block_z_elements = gob_elements_z * block_depth;
142 const u32 blocks_on_x = div_ceil(width, block_x_elements);
143 const u32 blocks_on_y = div_ceil(height, block_y_elements);
144 const u32 blocks_on_z = div_ceil(depth, block_z_elements);
145 const u32 blocks = blocks_on_x * blocks_on_y * blocks_on_z;
146 const u32 gob_size = gob_x_bytes * gob_elements_y * gob_elements_z;
147 const u32 xy_block_size = gob_size * block_height;
148 const u32 block_size = xy_block_size * block_depth;
149 u32 tile_offset = 0;
150 for (u32 zb = 0; zb < blocks_on_z; zb++) {
151 const u32 z_start = zb * block_z_elements;
152 const u32 z_end = std::min(depth, z_start + block_z_elements);
153 for (u32 yb = 0; yb < blocks_on_y; yb++) {
154 const u32 y_start = yb * block_y_elements;
155 const u32 y_end = std::min(height, y_start + block_y_elements);
156 for (u32 xb = 0; xb < blocks_on_x; xb++) {
157 const u32 x_start = xb * block_x_elements;
158 const u32 x_end = std::min(width, x_start + block_x_elements);
159 if (fast) {
160 FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
161 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
162 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
163 } else {
164 PreciseProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start,
165 z_start, x_end, y_end, z_end, tile_offset, xy_block_size,
166 layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel);
167 }
168 tile_offset += block_size;
169 }
97 } 170 }
98 } 171 }
99} 172}
100 173
101void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 174void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
102 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) { 175 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
176 bool unswizzle, u32 block_height, u32 block_depth) {
103 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { 177 if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) {
104 FastSwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data, 178 SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
105 unswizzled_data, unswizzle, block_height); 179 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
106 } else { 180 } else {
107 LegacySwizzleData(width, height, bytes_per_pixel, out_bytes_per_pixel, swizzled_data, 181 SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth,
108 unswizzled_data, unswizzle, block_height); 182 bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth);
109 } 183 }
110} 184}
111 185
@@ -153,10 +227,11 @@ u32 BytesPerPixel(TextureFormat format) {
153} 227}
154 228
155std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, 229std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
156 u32 height, u32 block_height) { 230 u32 height, u32 depth, u32 block_height, u32 block_depth) {
157 std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); 231 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
158 CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, 232 CopySwizzledData(width / tile_size, height / tile_size, depth, bytes_per_pixel, bytes_per_pixel,
159 Memory::GetPointer(address), unswizzled_data.data(), true, block_height); 233 Memory::GetPointer(address), unswizzled_data.data(), true, block_height,
234 block_depth);
160 return unswizzled_data; 235 return unswizzled_data;
161} 236}
162 237
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 234d250af..aaf316947 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -14,17 +14,14 @@ namespace Tegra::Texture {
14 * Unswizzles a swizzled texture without changing its format. 14 * Unswizzles a swizzled texture without changing its format.
15 */ 15 */
16std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, 16std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width,
17 u32 height, u32 block_height = TICEntry::DefaultBlockHeight); 17 u32 height, u32 depth,
18 18 u32 block_height = TICEntry::DefaultBlockHeight,
19/** 19 u32 block_depth = TICEntry::DefaultBlockHeight);
20 * Unswizzles a swizzled depth texture without changing its format.
21 */
22std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height,
23 u32 block_height = TICEntry::DefaultBlockHeight);
24 20
25/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. 21/// Copies texture data from a buffer and performs swizzling/unswizzling as necessary.
26void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, 22void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel,
27 u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height); 23 u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
24 bool unswizzle, u32 block_height, u32 block_depth);
28 25
29/** 26/**
30 * Decodes an unswizzled texture into a A8R8G8B8 texture. 27 * Decodes an unswizzled texture into a A8R8G8B8 texture.
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 58d17abcb..5947bd2b9 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -141,6 +141,7 @@ static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
141 141
142struct TICEntry { 142struct TICEntry {
143 static constexpr u32 DefaultBlockHeight = 16; 143 static constexpr u32 DefaultBlockHeight = 16;
144 static constexpr u32 DefaultBlockDepth = 1;
144 145
145 union { 146 union {
146 u32 raw; 147 u32 raw;
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index cbcd5dd5f..44d423da2 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -386,8 +386,9 @@ void GraphicsSurfaceWidget::OnUpdate() {
386 386
387 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. 387 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
388 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. 388 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
389 auto unswizzled_data = Tegra::Texture::UnswizzleTexture( 389 auto unswizzled_data =
390 *address, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height); 390 Tegra::Texture::UnswizzleTexture(*address, 1, Tegra::Texture::BytesPerPixel(surface_format),
391 surface_width, surface_height, 1U);
391 392
392 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, 393 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
393 surface_width, surface_height); 394 surface_width, surface_height);