diff options
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 67 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 3 |
2 files changed, 69 insertions, 1 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 1bf6ca2dd..334429514 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -62,7 +62,8 @@ void MaxwellDMA::Launch() { | |||
| 62 | 62 | ||
| 63 | if (!is_src_pitch && !is_dst_pitch) { | 63 | if (!is_src_pitch && !is_dst_pitch) { |
| 64 | // If both the source and the destination are in block layout, assert. | 64 | // If both the source and the destination are in block layout, assert. |
| 65 | UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); | 65 | CopyBlockLinearToBlockLinear(); |
| 66 | ReleaseSemaphore(); | ||
| 66 | return; | 67 | return; |
| 67 | } | 68 | } |
| 68 | 69 | ||
| @@ -291,6 +292,70 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 291 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 292 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 292 | } | 293 | } |
| 293 | 294 | ||
| 295 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||
| 296 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||
| 297 | |||
| 298 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 299 | |||
| 300 | // Deswizzle the input and copy it over. | ||
| 301 | const Parameters& src = regs.src_params; | ||
| 302 | const Parameters& dst = regs.dst_params; | ||
| 303 | |||
| 304 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||
| 305 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||
| 306 | |||
| 307 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; | ||
| 308 | |||
| 309 | u32 src_width = src.width; | ||
| 310 | u32 dst_width = dst.width; | ||
| 311 | u32 x_elements = regs.line_length_in; | ||
| 312 | u32 src_x_offset = src.origin.x; | ||
| 313 | u32 dst_x_offset = dst.origin.x; | ||
| 314 | u32 bpp_shift = 0U; | ||
| 315 | if (!is_remapping) { | ||
| 316 | bpp_shift = Common::FoldRight( | ||
| 317 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 318 | src_width, dst_width, x_elements, src_x_offset, dst_x_offset, | ||
| 319 | static_cast<u32>(regs.offset_in), static_cast<u32>(regs.offset_out)); | ||
| 320 | src_width >>= bpp_shift; | ||
| 321 | dst_width >>= bpp_shift; | ||
| 322 | x_elements >>= bpp_shift; | ||
| 323 | src_x_offset >>= bpp_shift; | ||
| 324 | dst_x_offset >>= bpp_shift; | ||
| 325 | } | ||
| 326 | |||
| 327 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | ||
| 328 | const size_t src_size = CalculateSize(true, bytes_per_pixel, src_width, src.height, src.depth, | ||
| 329 | src.block_size.height, src.block_size.depth); | ||
| 330 | const size_t dst_size = CalculateSize(true, bytes_per_pixel, dst_width, dst.height, dst.depth, | ||
| 331 | dst.block_size.height, dst.block_size.depth); | ||
| 332 | |||
| 333 | const u32 pitch = x_elements * bytes_per_pixel; | ||
| 334 | const size_t mid_buffer_size = pitch * regs.line_count; | ||
| 335 | |||
| 336 | if (read_buffer.size() < src_size) { | ||
| 337 | read_buffer.resize(src_size); | ||
| 338 | } | ||
| 339 | if (write_buffer.size() < dst_size) { | ||
| 340 | write_buffer.resize(dst_size); | ||
| 341 | } | ||
| 342 | |||
| 343 | intermediate_buffer.resize(mid_buffer_size); | ||
| 344 | |||
| 345 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 346 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 347 | |||
| 348 | UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, | ||
| 349 | src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, | ||
| 350 | src.block_size.height, src.block_size.depth, pitch); | ||
| 351 | |||
| 352 | SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, | ||
| 353 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | ||
| 354 | dst.block_size.height, dst.block_size.depth, pitch); | ||
| 355 | |||
| 356 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 357 | } | ||
| 358 | |||
| 294 | void MaxwellDMA::ReleaseSemaphore() { | 359 | void MaxwellDMA::ReleaseSemaphore() { |
| 295 | const auto type = regs.launch_dma.semaphore_type; | 360 | const auto type = regs.launch_dma.semaphore_type; |
| 296 | const GPUVAddr address = regs.semaphore.address; | 361 | const GPUVAddr address = regs.semaphore.address; |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 953e34adc..d40d3d302 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -223,6 +223,8 @@ private: | |||
| 223 | 223 | ||
| 224 | void CopyPitchToBlockLinear(); | 224 | void CopyPitchToBlockLinear(); |
| 225 | 225 | ||
| 226 | void CopyBlockLinearToBlockLinear(); | ||
| 227 | |||
| 226 | void FastCopyBlockLinearToPitch(); | 228 | void FastCopyBlockLinearToPitch(); |
| 227 | 229 | ||
| 228 | void ReleaseSemaphore(); | 230 | void ReleaseSemaphore(); |
| @@ -234,6 +236,7 @@ private: | |||
| 234 | 236 | ||
| 235 | std::vector<u8> read_buffer; | 237 | std::vector<u8> read_buffer; |
| 236 | std::vector<u8> write_buffer; | 238 | std::vector<u8> write_buffer; |
| 239 | std::vector<u8> intermediate_buffer; | ||
| 237 | 240 | ||
| 238 | static constexpr std::size_t NUM_REGS = 0x800; | 241 | static constexpr std::size_t NUM_REGS = 0x800; |
| 239 | struct Regs { | 242 | struct Regs { |