diff options
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 40 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/textures/decoders.h | 3 |
3 files changed, 57 insertions, 0 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 22ca730bc..01d7df405 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() { | |||
| 90 | ASSERT(regs.exec.enable_2d == 1); | 90 | ASSERT(regs.exec.enable_2d == 1); |
| 91 | 91 | ||
| 92 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 92 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
| 93 | |||
| 93 | ASSERT(regs.src_params.BlockDepth() == 0); | 94 | ASSERT(regs.src_params.BlockDepth() == 0); |
| 95 | // Optimized path for micro copies. | ||
| 96 | if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { | ||
| 97 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||
| 98 | const std::size_t src_size = Texture::GetGOBSize(); | ||
| 99 | const std::size_t dst_size = regs.dst_pitch * regs.y_count; | ||
| 100 | u32 pos_x = regs.src_params.pos_x; | ||
| 101 | u32 pos_y = regs.src_params.pos_y; | ||
| 102 | const u64 offset = | ||
| 103 | Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, | ||
| 104 | regs.src_params.BlockDepth(), bytes_per_pixel); | ||
| 105 | const u32 x_in_gob = 64 / bytes_per_pixel; | ||
| 106 | pos_x = pos_x % x_in_gob; | ||
| 107 | pos_y = pos_y % 8; | ||
| 108 | |||
| 109 | if (read_buffer.size() < src_size) { | ||
| 110 | read_buffer.resize(src_size); | ||
| 111 | } | ||
| 112 | |||
| 113 | if (write_buffer.size() < dst_size) { | ||
| 114 | write_buffer.resize(dst_size); | ||
| 115 | } | ||
| 116 | |||
| 117 | if (Settings::IsGPULevelExtreme()) { | ||
| 118 | memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); | ||
| 119 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 120 | } else { | ||
| 121 | memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); | ||
| 122 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 123 | } | ||
| 124 | |||
| 125 | Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | ||
| 126 | regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), | ||
| 127 | write_buffer.data(), regs.src_params.BlockHeight(), pos_x, | ||
| 128 | pos_y); | ||
| 129 | |||
| 130 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||
| 131 | |||
| 132 | return; | ||
| 133 | } | ||
| 94 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. | 134 | // If the input is tiled and the output is linear, deswizzle the input and copy it over. |
| 95 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | 135 | const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |
| 96 | const std::size_t src_size = Texture::CalculateSize( | 136 | const std::size_t src_size = Texture::CalculateSize( |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index fae8638ec..548e4c3fe 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -382,4 +382,18 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 382 | } | 382 | } |
| 383 | } | 383 | } |
| 384 | 384 | ||
| 385 | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 386 | u32 bytes_per_pixel) { | ||
| 387 | auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||
| 388 | const u32 gobs_in_block = 1 << block_height; | ||
| 389 | const u32 y_blocks = gob_size_y << block_height; | ||
| 390 | const u32 x_per_gob = gob_size_x / bytes_per_pixel; | ||
| 391 | const u32 x_blocks = div_ceil(width, x_per_gob); | ||
| 392 | const u32 block_size = gob_size * gobs_in_block; | ||
| 393 | const u32 stride = block_size * x_blocks; | ||
| 394 | const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; | ||
| 395 | const u32 relative_y = dst_y % y_blocks; | ||
| 396 | return base + (relative_y / gob_size_y) * gob_size; | ||
| 397 | } | ||
| 398 | |||
| 385 | } // namespace Tegra::Texture | 399 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 9f2d6d308..e0ff83754 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -59,4 +59,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 59 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | 59 | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, |
| 60 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); | 60 | std::size_t copy_size, const u8* source_data, u8* swizzle_data); |
| 61 | 61 | ||
| 62 | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||
| 63 | u32 bytes_per_pixel); | ||
| 64 | |||
| 62 | } // namespace Tegra::Texture | 65 | } // namespace Tegra::Texture |