summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2021-07-10 18:19:10 +0200
committerGravatar Fernando Sahmkow2021-07-11 01:33:17 +0200
commitbe1a3f7a0fb330b7cc5ac007ccb2cb73d4795602 (patch)
tree1e32f15d4c4ad9917c6fc86ddad53fdd85603859 /src/video_core/engines
parentMerge pull request #6557 from FernandoS27/staceys-mom-has-got-it-goin-on (diff)
downloadyuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.tar.gz
yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.tar.xz
yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.zip
accelerateDMA: Accelerate Buffer Copies.
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/maxwell_dma.cpp36
-rw-r--r--src/video_core/engines/maxwell_dma.h17
2 files changed, 43 insertions, 10 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 2ee980bab..24481952b 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -21,6 +21,10 @@ MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
21 21
22MaxwellDMA::~MaxwellDMA() = default; 22MaxwellDMA::~MaxwellDMA() = default;
23 23
24void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
25 rasterizer = rasterizer_;
26}
27
24void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { 28void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
25 ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); 29 ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
26 30
@@ -44,7 +48,6 @@ void MaxwellDMA::Launch() {
44 48
45 // TODO(Subv): Perform more research and implement all features of this engine. 49 // TODO(Subv): Perform more research and implement all features of this engine.
46 const LaunchDMA& launch = regs.launch_dma; 50 const LaunchDMA& launch = regs.launch_dma;
47 ASSERT(launch.remap_enable == 0);
48 ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); 51 ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
49 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); 52 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
50 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); 53 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
@@ -77,11 +80,29 @@ void MaxwellDMA::CopyPitchToPitch() {
77 // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D 80 // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
78 // buffer of length `line_length_in`. 81 // buffer of length `line_length_in`.
79 // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). 82 // Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
83 auto& accelerate = rasterizer->AccessAccelerateDMA();
80 if (!regs.launch_dma.multi_line_enable) { 84 if (!regs.launch_dma.multi_line_enable) {
81 memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); 85 const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 &&
86 regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
87 // TODO: allow multisized components.
88 if (is_buffer_clear) {
89 ASSERT(regs.remap_const.component_size_minus_one == 3);
90 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
91 memory_manager.WriteBlock(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()),
92 regs.line_length_in * sizeof(u32));
93 return;
94 }
95 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
96 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
97 std::vector<u8> tmp_buffer(regs.line_length_in);
98 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
99 memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
100 }
82 return; 101 return;
83 } 102 }
84 103
104 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
105
85 // Perform a line-by-line copy. 106 // Perform a line-by-line copy.
86 // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. 107 // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
87 // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. 108 // There is no need to manually flush/invalidate the regions because CopyBlock does that for us.
@@ -105,6 +126,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
105 } 126 }
106 127
107 // Deswizzle the input and copy it over. 128 // Deswizzle the input and copy it over.
129 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
108 const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; 130 const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in;
109 const Parameters& src_params = regs.src_params; 131 const Parameters& src_params = regs.src_params;
110 const u32 width = src_params.width; 132 const u32 width = src_params.width;
@@ -134,6 +156,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
134 156
135void MaxwellDMA::CopyPitchToBlockLinear() { 157void MaxwellDMA::CopyPitchToBlockLinear() {
136 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); 158 UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
159 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
137 160
138 const auto& dst_params = regs.dst_params; 161 const auto& dst_params = regs.dst_params;
139 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; 162 const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
@@ -156,13 +179,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
156 write_buffer.resize(dst_size); 179 write_buffer.resize(dst_size);
157 } 180 }
158 181
159 if (Settings::IsGPULevelExtreme()) { 182 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
160 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); 183 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
161 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
162 } else {
163 memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size);
164 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
165 }
166 184
167 // If the input is linear and the output is tiled, swizzle the input and copy it over. 185 // If the input is linear and the output is tiled, swizzle the input and copy it over.
168 if (regs.dst_params.block_size.depth > 0) { 186 if (regs.dst_params.block_size.depth > 0) {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c77f02a22..4ed0d0996 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -21,8 +21,18 @@ namespace Tegra {
21class MemoryManager; 21class MemoryManager;
22} 22}
23 23
24namespace VideoCore {
25class RasterizerInterface;
26}
27
24namespace Tegra::Engines { 28namespace Tegra::Engines {
25 29
30class AccelerateDMAInterface {
31public:
32 /// Write the value to the register identified by method.
33 virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
34};
35
26/** 36/**
27 * This engine is known as gk104_copy. Documentation can be found in: 37 * This engine is known as gk104_copy. Documentation can be found in:
28 * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h 38 * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
@@ -187,6 +197,8 @@ public:
187 }; 197 };
188 static_assert(sizeof(RemapConst) == 12); 198 static_assert(sizeof(RemapConst) == 12);
189 199
200 void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
201
190 explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); 202 explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
191 ~MaxwellDMA() override; 203 ~MaxwellDMA() override;
192 204
@@ -213,6 +225,7 @@ private:
213 Core::System& system; 225 Core::System& system;
214 226
215 MemoryManager& memory_manager; 227 MemoryManager& memory_manager;
228 VideoCore::RasterizerInterface* rasterizer;
216 229
217 std::vector<u8> read_buffer; 230 std::vector<u8> read_buffer;
218 std::vector<u8> write_buffer; 231 std::vector<u8> write_buffer;
@@ -240,7 +253,9 @@ private:
240 u32 pitch_out; 253 u32 pitch_out;
241 u32 line_length_in; 254 u32 line_length_in;
242 u32 line_count; 255 u32 line_count;
243 u32 reserved06[0xb8]; 256 u32 reserved06[0xb6];
257 u32 remap_consta_value;
258 u32 remap_constb_value;
244 RemapConst remap_const; 259 RemapConst remap_const;
245 Parameters dst_params; 260 Parameters dst_params;
246 u32 reserved07[0x1]; 261 u32 reserved07[0x1];