summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-07-30 01:38:44 -0300
committerGravatar ReinUsesLisp2020-08-10 20:45:03 -0300
commitf00641459e773636584c524143a3e05446ee296c (patch)
treefa6b31c0cbc7d52b8ac06ce6309d635bc2d0be45 /src
parentMerge pull request #4496 from lioncash/ce-desig (diff)
downloadyuzu-f00641459e773636584c524143a3e05446ee296c.tar.gz
yuzu-f00641459e773636584c524143a3e05446ee296c.tar.xz
yuzu-f00641459e773636584c524143a3e05446ee296c.zip
textures/decoders: Fix block linear to pitch copies
There were two issues with block linear copies. First the swizzling was wrong and this commit reimplements them. The other issue was that these copies are generally used to download render targets from the GPU and yuzu was not downloading them from host GPU memory unless the extreme GPU accuracy setting was selected. This commit enables cached memory reads for all accuracy levels. - Fixes level thumbnails in Super Mario Maker 2.
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/maxwell_dma.cpp21
-rw-r--r--src/video_core/textures/decoders.cpp42
-rw-r--r--src/video_core/textures/decoders.h5
3 files changed, 34 insertions, 34 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a2d3d7823..e88290754 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -94,7 +94,8 @@ void MaxwellDMA::CopyPitchToPitch() {
94} 94}
95 95
96void MaxwellDMA::CopyBlockLinearToPitch() { 96void MaxwellDMA::CopyBlockLinearToPitch() {
97 ASSERT(regs.src_params.block_size.depth == 0); 97 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
98 UNIMPLEMENTED_IF(regs.src_params.layer != 0);
98 99
99 // Optimized path for micro copies. 100 // Optimized path for micro copies.
100 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 101 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
@@ -123,17 +124,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
123 write_buffer.resize(dst_size); 124 write_buffer.resize(dst_size);
124 } 125 }
125 126
126 if (Settings::IsGPULevelExtreme()) { 127 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
127 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); 128 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
128 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
129 } else {
130 memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
131 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
132 }
133 129
134 UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, 130 UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel,
135 read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(), 131 block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(),
136 block_height, src_params.origin.x, src_params.origin.y); 132 read_buffer.data());
137 133
138 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 134 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
139} 135}
@@ -198,7 +194,6 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
198 if (read_buffer.size() < src_size) { 194 if (read_buffer.size() < src_size) {
199 read_buffer.resize(src_size); 195 read_buffer.resize(src_size);
200 } 196 }
201
202 if (write_buffer.size() < dst_size) { 197 if (write_buffer.size() < dst_size) {
203 write_buffer.resize(dst_size); 198 write_buffer.resize(dst_size);
204 } 199 }
@@ -212,8 +207,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
212 } 207 }
213 208
214 UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, 209 UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width,
215 bytes_per_pixel, read_buffer.data(), write_buffer.data(), 210 bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y,
216 regs.src_params.block_size.height, pos_x, pos_y); 211 write_buffer.data(), read_buffer.data());
217 212
218 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 213 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
219} 214}
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 474ae620a..16d46a018 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -228,24 +228,30 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
228 } 228 }
229} 229}
230 230
231void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 231void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
232 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 232 u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) {
233 u32 block_height_bit, u32 offset_x, u32 offset_y) { 233 const u32 stride = width * bytes_per_pixel;
234 const u32 block_height = 1U << block_height_bit; 234 const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X;
235 for (u32 line = 0; line < subrect_height; ++line) { 235 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height);
236 const u32 y2 = line + offset_y; 236
237 const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height + 237 const u32 block_height_mask = (1U << block_height) - 1;
238 ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; 238 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height;
239 const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y]; 239
240 for (u32 x = 0; x < subrect_width; ++x) { 240 for (u32 line = 0; line < line_count; ++line) {
241 const u32 x2 = (x + offset_x) * bytes_per_pixel; 241 const u32 src_y = line + origin_y;
242 const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height; 242 const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y];
243 const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X]; 243
244 const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel; 244 const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT;
245 u8* dest_line = unswizzled_data + unswizzled_offset; 245 const u32 src_offset_y = (block_y >> block_height) * block_size +
246 u8* source_addr = swizzled_data + swizzled_offset; 246 ((block_y & block_height_mask) << GOB_SIZE_SHIFT);
247 for (u32 column = 0; column < line_length_in; ++column) {
248 const u32 src_x = (column + origin_x) * bytes_per_pixel;
249 const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift;
250
251 const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X];
252 const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel;
247 253
248 std::memcpy(dest_line, source_addr, bytes_per_pixel); 254 std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel);
249 } 255 }
250 } 256 }
251} 257}
@@ -261,7 +267,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt
261 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); 267 const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
262 268
263 const u32 block_height_mask = (1U << block_height) - 1; 269 const u32 block_height_mask = (1U << block_height) - 1;
264 const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); 270 const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth;
265 271
266 for (u32 line = 0; line < line_count; ++line) { 272 for (u32 line = 0; line < line_count; ++line) {
267 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; 273 const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y];
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index d6fe35d37..01e156bc8 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -48,9 +48,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
48 u32 block_height_bit, u32 offset_x, u32 offset_y); 48 u32 block_height_bit, u32 offset_x, u32 offset_y);
49 49
50/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel,
52 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, 52 u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input);
53 u32 offset_x, u32 offset_y);
54 53
55/// @brief Swizzles a 2D array of pixels into a 3D texture 54/// @brief Swizzles a 2D array of pixels into a 3D texture
56/// @param line_length_in Number of pixels per line 55/// @param line_length_in Number of pixels per line