summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/maxwell_dma.cpp40
-rw-r--r--src/video_core/textures/decoders.cpp14
-rw-r--r--src/video_core/textures/decoders.h3
3 files changed, 57 insertions, 0 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 22ca730bc..01d7df405 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() {
90 ASSERT(regs.exec.enable_2d == 1); 90 ASSERT(regs.exec.enable_2d == 1);
91 91
92 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 92 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
93
93 ASSERT(regs.src_params.BlockDepth() == 0); 94 ASSERT(regs.src_params.BlockDepth() == 0);
95 // Optimized path for micro copies.
96 if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
97 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
98 const std::size_t src_size = Texture::GetGOBSize();
99 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
100 u32 pos_x = regs.src_params.pos_x;
101 u32 pos_y = regs.src_params.pos_y;
102 const u64 offset =
103 Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
104 regs.src_params.BlockDepth(), bytes_per_pixel);
105 const u32 x_in_gob = 64 / bytes_per_pixel;
106 pos_x = pos_x % x_in_gob;
107 pos_y = pos_y % 8;
108
109 if (read_buffer.size() < src_size) {
110 read_buffer.resize(src_size);
111 }
112
113 if (write_buffer.size() < dst_size) {
114 write_buffer.resize(dst_size);
115 }
116
117 if (Settings::IsGPULevelExtreme()) {
118 memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
119 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
120 } else {
121 memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
122 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
123 }
124
125 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
126 regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
127 write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
128 pos_y);
129
130 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
131
132 return;
133 }
94 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 134 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
95 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; 135 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
96 const std::size_t src_size = Texture::CalculateSize( 136 const std::size_t src_size = Texture::CalculateSize(
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index fae8638ec..548e4c3fe 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -382,4 +382,18 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
382 } 382 }
383} 383}
384 384
385u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
386 u32 bytes_per_pixel) {
387 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
388 const u32 gobs_in_block = 1 << block_height;
389 const u32 y_blocks = gob_size_y << block_height;
390 const u32 x_per_gob = gob_size_x / bytes_per_pixel;
391 const u32 x_blocks = div_ceil(width, x_per_gob);
392 const u32 block_size = gob_size * gobs_in_block;
393 const u32 stride = block_size * x_blocks;
394 const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
395 const u32 relative_y = dst_y % y_blocks;
396 return base + (relative_y / gob_size_y) * gob_size;
397}
398
385} // namespace Tegra::Texture 399} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 9f2d6d308..e0ff83754 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -59,4 +59,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
59void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, 59void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
60 std::size_t copy_size, const u8* source_data, u8* swizzle_data); 60 std::size_t copy_size, const u8* source_data, u8* swizzle_data);
61 61
62u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
63 u32 bytes_per_pixel);
64
62} // namespace Tegra::Texture 65} // namespace Tegra::Texture