summaryrefslogtreecommitdiff
path: root/src/video_core/textures/decoders.cpp
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-07-30 01:38:44 -0300
committerGravatar ReinUsesLisp2020-08-10 20:45:03 -0300
commitf00641459e773636584c524143a3e05446ee296c (patch)
treefa6b31c0cbc7d52b8ac06ce6309d635bc2d0be45 /src/video_core/textures/decoders.cpp
parentMerge pull request #4496 from lioncash/ce-desig (diff)
downloadyuzu-f00641459e773636584c524143a3e05446ee296c.tar.gz
yuzu-f00641459e773636584c524143a3e05446ee296c.tar.xz
yuzu-f00641459e773636584c524143a3e05446ee296c.zip
textures/decoders: Fix block linear to pitch copies
There were two issues with block linear copies. First the swizzling was wrong and this commit reimplements them. The other issue was that these copies are generally used to download render targets from the GPU and yuzu was not downloading them from host GPU memory unless the extreme GPU accuracy setting was selected. This commit enables cached memory reads for all accuracy levels. - Fixes level thumbnails in Super Mario Maker 2.
Diffstat (limited to 'src/video_core/textures/decoders.cpp')
-rw-r--r--src/video_core/textures/decoders.cpp42
1 files changed, 24 insertions, 18 deletions
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 474ae620a..16d46a018 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -228,24 +228,30 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
228 } 228 }
229} 229}
230 230
231void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 231void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
232 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 232 u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
233 u32 block_height_bit, u32 offset_x, u32 offset_y) { 233 const u32 stride = width * bytes_per_pixel;
234 const u32 block_height = 1U << block_height_bit; 234 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
235 for (u32 line = 0; line < subrect_height; ++line) { 235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 const u32 y2 = line + offset_y; 236
237 const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height + 237 const u32 block_height_mask = (1U << block_height) - 1;
238 ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
239 const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y]; 239
240 for (u32 x = 0; x < subrect_width; ++x) { 240 for (u32 line = 0; line < line_count; ++line) {
241 const u32 x2 = (x + offset_x) * bytes_per_pixel; 241 const u32 src_y = line + origin_y;
242 const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height; 242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X]; 243
244 const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel; 244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 u8* dest_line = unswizzled_data + unswizzled_offset; 245 const u32 src_offset_y = (block_y >> block_height) * block_size +
246 u8* source_addr = swizzled_data + swizzled_offset; 246 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
247 for (u32 column = 0; column < line_length_in; ++column) {
248 const u32 src_x = (column + origin_x) * bytes_per_pixel;
249 const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
250
251 const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
252 const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
247 253
248 std::memcpy(dest_line, source_addr, bytes_per_pixel); 254 std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
249 } 255 }
250 } 256 }
251} 257}
@@ -261,7 +267,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
261 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); 267 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
262 268
263 const u32 block_height_mask = (1U << block_height) - 1; 269 const u32 block_height_mask = (1U << block_height) - 1;
264 const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); 270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
265 271
266 for (u32 line = 0; line < line_count; ++line) { 272 for (u32 line = 0; line < line_count; ++line) {
267 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];