diff options
| author | 2021-07-10 18:19:10 +0200 | |
|---|---|---|
| committer | 2021-07-11 01:33:17 +0200 | |
| commit | be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602 (patch) | |
| tree | 1e32f15d4c4ad9917c6fc86ddad53fdd85603859 /src/video_core/engines | |
| parent | Merge pull request #6557 from FernandoS27/staceys-mom-has-got-it-goin-on (diff) | |
| download | yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.tar.gz yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.tar.xz yuzu-be1a3f7a0fb330b7cc5ac007ccb2cb73d4795602.zip | |
accelerateDMA: Accelerate Buffer Copies.
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 36 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 17 |
2 files changed, 43 insertions, 10 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2ee980bab..24481952b 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -21,6 +21,10 @@ MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) | |||
| 21 | 21 | ||
| 22 | MaxwellDMA::~MaxwellDMA() = default; | 22 | MaxwellDMA::~MaxwellDMA() = default; |
| 23 | 23 | ||
| 24 | void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||
| 25 | rasterizer = rasterizer_; | ||
| 26 | } | ||
| 27 | |||
| 24 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 28 | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| 25 | ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); | 29 | ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); |
| 26 | 30 | ||
| @@ -44,7 +48,6 @@ void MaxwellDMA::Launch() { | |||
| 44 | 48 | ||
| 45 | // TODO(Subv): Perform more research and implement all features of this engine. | 49 | // TODO(Subv): Perform more research and implement all features of this engine. |
| 46 | const LaunchDMA& launch = regs.launch_dma; | 50 | const LaunchDMA& launch = regs.launch_dma; |
| 47 | ASSERT(launch.remap_enable == 0); | ||
| 48 | ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); | 51 | ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); |
| 49 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); | 52 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |
| 50 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); | 53 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |
| @@ -77,11 +80,29 @@ void MaxwellDMA::CopyPitchToPitch() { | |||
| 77 | // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D | 80 | // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D |
| 78 | // buffer of length `line_length_in`. | 81 | // buffer of length `line_length_in`. |
| 79 | // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). | 82 | // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). |
| 83 | auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||
| 80 | if (!regs.launch_dma.multi_line_enable) { | 84 | if (!regs.launch_dma.multi_line_enable) { |
| 81 | memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); | 85 | const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 && |
| 86 | regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; | ||
| 87 | // TODO: allow multisized components. | ||
| 88 | if (is_buffer_clear) { | ||
| 89 | ASSERT(regs.remap_const.component_size_minus_one == 3); | ||
| 90 | std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); | ||
| 91 | memory_manager.WriteBlock(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()), | ||
| 92 | regs.line_length_in * sizeof(u32)); | ||
| 93 | return; | ||
| 94 | } | ||
| 95 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 96 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | ||
| 97 | std::vector<u8> tmp_buffer(regs.line_length_in); | ||
| 98 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); | ||
| 99 | memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); | ||
| 100 | } | ||
| 82 | return; | 101 | return; |
| 83 | } | 102 | } |
| 84 | 103 | ||
| 104 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 105 | |||
| 85 | // Perform a line-by-line copy. | 106 | // Perform a line-by-line copy. |
| 86 | // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. | 107 | // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. |
| 87 | // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. | 108 | // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. |
| @@ -105,6 +126,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 105 | } | 126 | } |
| 106 | 127 | ||
| 107 | // Deswizzle the input and copy it over. | 128 | // Deswizzle the input and copy it over. |
| 129 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 108 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; | 130 | const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; |
| 109 | const Parameters& src_params = regs.src_params; | 131 | const Parameters& src_params = regs.src_params; |
| 110 | const u32 width = src_params.width; | 132 | const u32 width = src_params.width; |
| @@ -134,6 +156,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 134 | 156 | ||
| 135 | void MaxwellDMA::CopyPitchToBlockLinear() { | 157 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 136 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | 158 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); |
| 159 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||
| 137 | 160 | ||
| 138 | const auto& dst_params = regs.dst_params; | 161 | const auto& dst_params = regs.dst_params; |
| 139 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | 162 | const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; |
| @@ -156,13 +179,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 156 | write_buffer.resize(dst_size); | 179 | write_buffer.resize(dst_size); |
| 157 | } | 180 | } |
| 158 | 181 | ||
| 159 | if (Settings::IsGPULevelExtreme()) { | 182 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 160 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 183 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 161 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 162 | } else { | ||
| 163 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); | ||
| 164 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | ||
| 165 | } | ||
| 166 | 184 | ||
| 167 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 185 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 168 | if (regs.dst_params.block_size.depth > 0) { | 186 | if (regs.dst_params.block_size.depth > 0) { |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c77f02a22..4ed0d0996 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -21,8 +21,18 @@ namespace Tegra { | |||
| 21 | class MemoryManager; | 21 | class MemoryManager; |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | namespace VideoCore { | ||
| 25 | class RasterizerInterface; | ||
| 26 | } | ||
| 27 | |||
| 24 | namespace Tegra::Engines { | 28 | namespace Tegra::Engines { |
| 25 | 29 | ||
| 30 | class AccelerateDMAInterface { | ||
| 31 | public: | ||
| 32 | /// Write the value to the register identified by method. | ||
| 33 | virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; | ||
| 34 | }; | ||
| 35 | |||
| 26 | /** | 36 | /** |
| 27 | * This engine is known as gk104_copy. Documentation can be found in: | 37 | * This engine is known as gk104_copy. Documentation can be found in: |
| 28 | * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h | 38 | * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h |
| @@ -187,6 +197,8 @@ public: | |||
| 187 | }; | 197 | }; |
| 188 | static_assert(sizeof(RemapConst) == 12); | 198 | static_assert(sizeof(RemapConst) == 12); |
| 189 | 199 | ||
| 200 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||
| 201 | |||
| 190 | explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); | 202 | explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); |
| 191 | ~MaxwellDMA() override; | 203 | ~MaxwellDMA() override; |
| 192 | 204 | ||
| @@ -213,6 +225,7 @@ private: | |||
| 213 | Core::System& system; | 225 | Core::System& system; |
| 214 | 226 | ||
| 215 | MemoryManager& memory_manager; | 227 | MemoryManager& memory_manager; |
| 228 | VideoCore::RasterizerInterface* rasterizer; | ||
| 216 | 229 | ||
| 217 | std::vector<u8> read_buffer; | 230 | std::vector<u8> read_buffer; |
| 218 | std::vector<u8> write_buffer; | 231 | std::vector<u8> write_buffer; |
| @@ -240,7 +253,9 @@ private: | |||
| 240 | u32 pitch_out; | 253 | u32 pitch_out; |
| 241 | u32 line_length_in; | 254 | u32 line_length_in; |
| 242 | u32 line_count; | 255 | u32 line_count; |
| 243 | u32 reserved06[0xb8]; | 256 | u32 reserved06[0xb6]; |
| 257 | u32 remap_consta_value; | ||
| 258 | u32 remap_constb_value; | ||
| 244 | RemapConst remap_const; | 259 | RemapConst remap_const; |
| 245 | Parameters dst_params; | 260 | Parameters dst_params; |
| 246 | u32 reserved07[0x1]; | 261 | u32 reserved07[0x1]; |