summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar bunnei2022-08-14 02:36:36 -0700
committerGravatar Fernando Sahmkow2022-10-06 21:00:53 +0200
commitf5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 (patch)
tree5156a04816d6556b8babe7d69301f18098b8dd1d /src/video_core/engines
parentMaxwell3D: Add small_index_2 (diff)
downloadyuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.gz
yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.xz
yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.zip
DMA & InlineToMemory Engines Rework.
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/engine_upload.cpp46
-rw-r--r--src/video_core/engines/engine_upload.h6
-rw-r--r--src/video_core/engines/kepler_compute.cpp13
-rw-r--r--src/video_core/engines/kepler_memory.cpp13
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp91
-rw-r--r--src/video_core/engines/maxwell_dma.h6
7 files changed, 127 insertions, 53 deletions
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 6ff5b1eca..a34819234 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -3,6 +3,7 @@
3 3
4#include <cstring> 4#include <cstring>
5 5
6#include "common/algorithm.h"
6#include "common/assert.h" 7#include "common/assert.h"
7#include "video_core/engines/engine_upload.h" 8#include "video_core/engines/engine_upload.h"
8#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
@@ -34,21 +35,48 @@ void State::ProcessData(const u32 data, const bool is_last_call) {
34 if (!is_last_call) { 35 if (!is_last_call) {
35 return; 36 return;
36 } 37 }
38 ProcessData(inner_buffer);
39}
40
41void State::ProcessData(const u32* data, size_t num_data) {
42 std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32));
43 ProcessData(read_buffer);
44}
45
46void State::ProcessData(std::span<const u8> read_buffer) {
37 const GPUVAddr address{regs.dest.Address()}; 47 const GPUVAddr address{regs.dest.Address()};
38 if (is_linear) { 48 if (is_linear) {
39 rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer); 49 if (regs.line_count == 1) {
50 rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
51 } else {
52 for (u32 line = 0; line < regs.line_count; ++line) {
53 const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch;
54 memory_manager.WriteBlockUnsafe(
55 dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in,
56 regs.line_length_in);
57 }
58 memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count);
59 }
40 } else { 60 } else {
41 UNIMPLEMENTED_IF(regs.dest.z != 0); 61 u32 width = regs.dest.width;
42 UNIMPLEMENTED_IF(regs.dest.depth != 1); 62 u32 x_elements = regs.line_length_in;
43 UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); 63 u32 x_offset = regs.dest.x;
44 UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); 64 const u32 bpp_shift = Common::FoldRight(
65 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
66 width, x_elements, x_offset, static_cast<u32>(address));
67 width >>= bpp_shift;
68 x_elements >>= bpp_shift;
69 x_offset >>= bpp_shift;
70 const u32 bytes_per_pixel = 1U << bpp_shift;
45 const std::size_t dst_size = Tegra::Texture::CalculateSize( 71 const std::size_t dst_size = Tegra::Texture::CalculateSize(
46 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); 72 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
73 regs.dest.BlockHeight(), regs.dest.BlockDepth());
47 tmp_buffer.resize(dst_size); 74 tmp_buffer.resize(dst_size);
48 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); 75 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
49 Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, 76 Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
50 regs.dest.BlockHeight(), copy_size, inner_buffer.data(), 77 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
51 tmp_buffer.data()); 78 x_elements, regs.line_count, regs.dest.BlockHeight(),
79 regs.dest.BlockDepth(), regs.line_length_in);
52 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); 80 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size);
53 } 81 }
54} 82}
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 94ff3314a..f08f6e36a 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -3,6 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <span>
6#include <vector> 7#include <vector>
7#include "common/bit_field.h" 8#include "common/bit_field.h"
8#include "common/common_types.h" 9#include "common/common_types.h"
@@ -33,7 +34,7 @@ struct Registers {
33 u32 width; 34 u32 width;
34 u32 height; 35 u32 height;
35 u32 depth; 36 u32 depth;
36 u32 z; 37 u32 layer;
37 u32 x; 38 u32 x;
38 u32 y; 39 u32 y;
39 40
@@ -62,11 +63,14 @@ public:
62 63
63 void ProcessExec(bool is_linear_); 64 void ProcessExec(bool is_linear_);
64 void ProcessData(u32 data, bool is_last_call); 65 void ProcessData(u32 data, bool is_last_call);
66 void ProcessData(const u32* data, size_t num_data);
65 67
66 /// Binds a rasterizer to this engine. 68 /// Binds a rasterizer to this engine.
67 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); 69 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
68 70
69private: 71private:
72 void ProcessData(std::span<const u8> read_buffer);
73
70 u32 write_offset = 0; 74 u32 write_offset = 0;
71 u32 copy_size = 0; 75 u32 copy_size = 0;
72 std::vector<u8> inner_buffer; 76 std::vector<u8> inner_buffer;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 5db254d94..7c50bdbe0 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -36,8 +36,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
36 } 36 }
37 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 37 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
38 upload_state.ProcessData(method_argument, is_last_call); 38 upload_state.ProcessData(method_argument, is_last_call);
39 if (is_last_call) {
40 }
41 break; 39 break;
42 } 40 }
43 case KEPLER_COMPUTE_REG_INDEX(launch): 41 case KEPLER_COMPUTE_REG_INDEX(launch):
@@ -50,8 +48,15 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
50 48
51void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 49void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
52 u32 methods_pending) { 50 u32 methods_pending) {
53 for (std::size_t i = 0; i < amount; i++) { 51 switch (method) {
54 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); 52 case KEPLER_COMPUTE_REG_INDEX(data_upload):
53 upload_state.ProcessData(base_start, static_cast<size_t>(amount));
54 return;
55 default:
56 for (std::size_t i = 0; i < amount; i++) {
57 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
58 }
59 break;
55 } 60 }
56} 61}
57 62
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index e2b029542..a3fbab1e5 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -33,8 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
33 } 33 }
34 case KEPLERMEMORY_REG_INDEX(data): { 34 case KEPLERMEMORY_REG_INDEX(data): {
35 upload_state.ProcessData(method_argument, is_last_call); 35 upload_state.ProcessData(method_argument, is_last_call);
36 if (is_last_call) {
37 }
38 break; 36 break;
39 } 37 }
40 } 38 }
@@ -42,8 +40,15 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call
42 40
43void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 41void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
44 u32 methods_pending) { 42 u32 methods_pending) {
45 for (std::size_t i = 0; i < amount; i++) { 43 switch (method) {
46 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); 44 case KEPLERMEMORY_REG_INDEX(data):
45 upload_state.ProcessData(base_start, static_cast<size_t>(amount));
46 return;
47 default:
48 for (std::size_t i = 0; i < amount; i++) {
49 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
50 }
51 break;
47 } 52 }
48} 53}
49 54
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index add1ccebe..632052c53 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -239,8 +239,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
239 return upload_state.ProcessExec(regs.exec_upload.linear != 0); 239 return upload_state.ProcessExec(regs.exec_upload.linear != 0);
240 case MAXWELL3D_REG_INDEX(data_upload): 240 case MAXWELL3D_REG_INDEX(data_upload):
241 upload_state.ProcessData(argument, is_last_call); 241 upload_state.ProcessData(argument, is_last_call);
242 if (is_last_call) {
243 }
244 return; 242 return;
245 case MAXWELL3D_REG_INDEX(fragment_barrier): 243 case MAXWELL3D_REG_INDEX(fragment_barrier):
246 return rasterizer->FragmentBarrier(); 244 return rasterizer->FragmentBarrier();
@@ -316,6 +314,9 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
316 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: 314 case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15:
317 ProcessCBMultiData(base_start, amount); 315 ProcessCBMultiData(base_start, amount);
318 break; 316 break;
317 case MAXWELL3D_REG_INDEX(data_upload):
318 upload_state.ProcessData(base_start, static_cast<size_t>(amount));
319 return;
319 default: 320 default:
320 for (std::size_t i = 0; i < amount; i++) { 321 for (std::size_t i = 0; i < amount; i++) {
321 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); 322 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 0efe58282..a12a95ce2 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -1,6 +1,7 @@
1// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/algorithm.h"
4#include "common/assert.h" 5#include "common/assert.h"
5#include "common/logging/log.h" 6#include "common/logging/log.h"
6#include "common/microprofile.h" 7#include "common/microprofile.h"
@@ -54,8 +55,6 @@ void MaxwellDMA::Launch() {
54 const LaunchDMA& launch = regs.launch_dma; 55 const LaunchDMA& launch = regs.launch_dma;
55 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); 56 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
56 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); 57 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
57 ASSERT(regs.dst_params.origin.x == 0);
58 ASSERT(regs.dst_params.origin.y == 0);
59 58
60 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; 59 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
61 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; 60 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
@@ -121,12 +120,13 @@ void MaxwellDMA::CopyPitchToPitch() {
121 120
122void MaxwellDMA::CopyBlockLinearToPitch() { 121void MaxwellDMA::CopyBlockLinearToPitch() {
123 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); 122 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
124 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
125 UNIMPLEMENTED_IF(regs.src_params.layer != 0); 123 UNIMPLEMENTED_IF(regs.src_params.layer != 0);
126 124
125 const bool is_remapping = regs.launch_dma.remap_enable != 0;
126
127 // Optimized path for micro copies. 127 // Optimized path for micro copies.
128 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 128 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
129 if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && 129 if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X &&
130 regs.src_params.height > GOB_SIZE_Y) { 130 regs.src_params.height > GOB_SIZE_Y) {
131 FastCopyBlockLinearToPitch(); 131 FastCopyBlockLinearToPitch();
132 return; 132 return;
@@ -134,10 +134,27 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
134 134
135 // Deswizzle the input and copy it over. 135 // Deswizzle the input and copy it over.
136 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); 136 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
137 const u32 bytes_per_pixel =
138 regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
139 const Parameters& src_params = regs.src_params; 137 const Parameters& src_params = regs.src_params;
140 const u32 width = src_params.width; 138
139 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
140 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
141
142 const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
143
144 u32 width = src_params.width;
145 u32 x_elements = regs.line_length_in;
146 u32 x_offset = src_params.origin.x;
147 u32 bpp_shift = 0U;
148 if (!is_remapping) {
149 bpp_shift = Common::FoldRight(
150 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
151 width, x_elements, x_offset, static_cast<u32>(regs.offset_in));
152 width >>= bpp_shift;
153 x_elements >>= bpp_shift;
154 x_offset >>= bpp_shift;
155 }
156
157 const u32 bytes_per_pixel = base_bpp << bpp_shift;
141 const u32 height = src_params.height; 158 const u32 height = src_params.height;
142 const u32 depth = src_params.depth; 159 const u32 depth = src_params.depth;
143 const u32 block_height = src_params.block_size.height; 160 const u32 block_height = src_params.block_size.height;
@@ -155,30 +172,46 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
155 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); 172 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
156 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); 173 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
157 174
158 UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, 175 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
159 block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(), 176 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
160 read_buffer.data()); 177 regs.pitch_out);
161 178
162 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 179 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
163} 180}
164 181
165void MaxwellDMA::CopyPitchToBlockLinear() { 182void MaxwellDMA::CopyPitchToBlockLinear() {
166 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); 183 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
184 UNIMPLEMENTED_IF(regs.dst_params.layer != 0);
167 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); 185 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
168 186
187 const bool is_remapping = regs.launch_dma.remap_enable != 0;
188 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
189 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
190
169 const auto& dst_params = regs.dst_params; 191 const auto& dst_params = regs.dst_params;
170 const u32 bytes_per_pixel = 192
171 regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1; 193 const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
172 const u32 width = dst_params.width; 194
195 u32 width = dst_params.width;
196 u32 x_elements = regs.line_length_in;
197 u32 x_offset = dst_params.origin.x;
198 u32 bpp_shift = 0U;
199 if (!is_remapping) {
200 bpp_shift = Common::FoldRight(
201 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); },
202 width, x_elements, x_offset, static_cast<u32>(regs.offset_out));
203 width >>= bpp_shift;
204 x_elements >>= bpp_shift;
205 x_offset >>= bpp_shift;
206 }
207
208 const u32 bytes_per_pixel = base_bpp << bpp_shift;
173 const u32 height = dst_params.height; 209 const u32 height = dst_params.height;
174 const u32 depth = dst_params.depth; 210 const u32 depth = dst_params.depth;
175 const u32 block_height = dst_params.block_size.height; 211 const u32 block_height = dst_params.block_size.height;
176 const u32 block_depth = dst_params.block_size.depth; 212 const u32 block_depth = dst_params.block_size.depth;
177 const size_t dst_size = 213 const size_t dst_size =
178 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 214 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
179 const size_t dst_layer_size =
180 CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
181
182 const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; 215 const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
183 216
184 if (read_buffer.size() < src_size) { 217 if (read_buffer.size() < src_size) {
@@ -188,32 +221,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
188 write_buffer.resize(dst_size); 221 write_buffer.resize(dst_size);
189 } 222 }
190 223
224 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
191 if (Settings::IsGPULevelExtreme()) { 225 if (Settings::IsGPULevelExtreme()) {
192 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
193 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); 226 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
194 } else { 227 } else {
195 memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
196 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); 228 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
197 } 229 }
198 230
199 // If the input is linear and the output is tiled, swizzle the input and copy it over. 231 // If the input is linear and the output is tiled, swizzle the input and copy it over.
200 if (regs.dst_params.block_size.depth > 0) { 232 SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
201 ASSERT(dst_params.layer == 0); 233 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
202 SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, 234 regs.pitch_in);
203 bytes_per_pixel, block_height, block_depth, dst_params.origin.x,
204 dst_params.origin.y, write_buffer.data(), read_buffer.data());
205 } else {
206 SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel,
207 write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(),
208 block_height, dst_params.origin.x, dst_params.origin.y);
209 }
210 235
211 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 236 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
212} 237}
213 238
214void MaxwellDMA::FastCopyBlockLinearToPitch() { 239void MaxwellDMA::FastCopyBlockLinearToPitch() {
215 const u32 bytes_per_pixel = 240 const u32 bytes_per_pixel = 1U;
216 regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1;
217 const size_t src_size = GOB_SIZE; 241 const size_t src_size = GOB_SIZE;
218 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 242 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
219 u32 pos_x = regs.src_params.origin.x; 243 u32 pos_x = regs.src_params.origin.x;
@@ -239,9 +263,10 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
239 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); 263 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
240 } 264 }
241 265
242 UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, 266 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
243 bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y, 267 regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
244 write_buffer.data(), read_buffer.data()); 268 regs.src_params.block_size.height, regs.src_params.block_size.depth,
269 regs.pitch_out);
245 270
246 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 271 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
247} 272}
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 074bac92c..9c5d567a6 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -189,10 +189,16 @@ public:
189 BitField<4, 3, Swizzle> dst_y; 189 BitField<4, 3, Swizzle> dst_y;
190 BitField<8, 3, Swizzle> dst_z; 190 BitField<8, 3, Swizzle> dst_z;
191 BitField<12, 3, Swizzle> dst_w; 191 BitField<12, 3, Swizzle> dst_w;
192 BitField<0, 12, u32> dst_components_raw;
192 BitField<16, 2, u32> component_size_minus_one; 193 BitField<16, 2, u32> component_size_minus_one;
193 BitField<20, 2, u32> num_src_components_minus_one; 194 BitField<20, 2, u32> num_src_components_minus_one;
194 BitField<24, 2, u32> num_dst_components_minus_one; 195 BitField<24, 2, u32> num_dst_components_minus_one;
195 }; 196 };
197
198 Swizzle GetComponent(size_t i) {
199 const u32 raw = dst_components_raw;
200 return static_cast<Swizzle>((raw >> (i * 3)) & 0x7);
201 }
196 }; 202 };
197 static_assert(sizeof(RemapConst) == 12); 203 static_assert(sizeof(RemapConst) == 12);
198 204