diff options
| author | 2022-08-14 02:36:36 -0700 | |
|---|---|---|
| committer | 2022-10-06 21:00:53 +0200 | |
| commit | f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 (patch) | |
| tree | 5156a04816d6556b8babe7d69301f18098b8dd1d /src/video_core/engines | |
| parent | Maxwell3D: Add small_index_2 (diff) | |
| download | yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.gz yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.tar.xz yuzu-f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6.zip | |
DMA & InlineToMemory Engines Rework.
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 46 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.h | 6 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 91 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 6 |
7 files changed, 127 insertions, 53 deletions
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 6ff5b1eca..a34819234 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <cstring> | 4 | #include <cstring> |
| 5 | 5 | ||
| 6 | #include "common/algorithm.h" | ||
| 6 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 7 | #include "video_core/engines/engine_upload.h" | 8 | #include "video_core/engines/engine_upload.h" |
| 8 | #include "video_core/memory_manager.h" | 9 | #include "video_core/memory_manager.h" |
| @@ -34,21 +35,48 @@ void State::ProcessData(const u32 data, const bool is_last_call) { | |||
| 34 | if (!is_last_call) { | 35 | if (!is_last_call) { |
| 35 | return; | 36 | return; |
| 36 | } | 37 | } |
| 38 | ProcessData(inner_buffer); | ||
| 39 | } | ||
| 40 | |||
| 41 | void State::ProcessData(const u32* data, size_t num_data) { | ||
| 42 | std::span<const u8> read_buffer(reinterpret_cast<const u8*>(data), num_data * sizeof(u32)); | ||
| 43 | ProcessData(read_buffer); | ||
| 44 | } | ||
| 45 | |||
| 46 | void State::ProcessData(std::span<const u8> read_buffer) { | ||
| 37 | const GPUVAddr address{regs.dest.Address()}; | 47 | const GPUVAddr address{regs.dest.Address()}; |
| 38 | if (is_linear) { | 48 | if (is_linear) { |
| 39 | rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer); | 49 | if (regs.line_count == 1) { |
| 50 | rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); | ||
| 51 | } else { | ||
| 52 | for (u32 line = 0; line < regs.line_count; ++line) { | ||
| 53 | const GPUVAddr dest_line = address + static_cast<size_t>(line) * regs.dest.pitch; | ||
| 54 | memory_manager.WriteBlockUnsafe( | ||
| 55 | dest_line, read_buffer.data() + static_cast<size_t>(line) * regs.line_length_in, | ||
| 56 | regs.line_length_in); | ||
| 57 | } | ||
| 58 | memory_manager.InvalidateRegion(address, regs.dest.pitch * regs.line_count); | ||
| 59 | } | ||
| 40 | } else { | 60 | } else { |
| 41 | UNIMPLEMENTED_IF(regs.dest.z != 0); | 61 | u32 width = regs.dest.width; |
| 42 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | 62 | u32 x_elements = regs.line_length_in; |
| 43 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); | 63 | u32 x_offset = regs.dest.x; |
| 44 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); | 64 | const u32 bpp_shift = Common::FoldRight( |
| 65 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 66 | width, x_elements, x_offset, static_cast<u32>(address)); | ||
| 67 | width >>= bpp_shift; | ||
| 68 | x_elements >>= bpp_shift; | ||
| 69 | x_offset >>= bpp_shift; | ||
| 70 | const u32 bytes_per_pixel = 1U << bpp_shift; | ||
| 45 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 71 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 46 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); | 72 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |
| 73 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); | ||
| 47 | tmp_buffer.resize(dst_size); | 74 | tmp_buffer.resize(dst_size); |
| 48 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 75 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |
| 49 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, | 76 | Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, |
| 50 | regs.dest.BlockHeight(), copy_size, inner_buffer.data(), | 77 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, |
| 51 | tmp_buffer.data()); | 78 | x_elements, regs.line_count, regs.dest.BlockHeight(), |
| 79 | regs.dest.BlockDepth(), regs.line_length_in); | ||
| 52 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | 80 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); |
| 53 | } | 81 | } |
| 54 | } | 82 | } |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 94ff3314a..f08f6e36a 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <span> | ||
| 6 | #include <vector> | 7 | #include <vector> |
| 7 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 8 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| @@ -33,7 +34,7 @@ struct Registers { | |||
| 33 | u32 width; | 34 | u32 width; |
| 34 | u32 height; | 35 | u32 height; |
| 35 | u32 depth; | 36 | u32 depth; |
| 36 | u32 z; | 37 | u32 layer; |
| 37 | u32 x; | 38 | u32 x; |
| 38 | u32 y; | 39 | u32 y; |
| 39 | 40 | ||
| @@ -62,11 +63,14 @@ public: | |||
| 62 | 63 | ||
| 63 | void ProcessExec(bool is_linear_); | 64 | void ProcessExec(bool is_linear_); |
| 64 | void ProcessData(u32 data, bool is_last_call); | 65 | void ProcessData(u32 data, bool is_last_call); |
| 66 | void ProcessData(const u32* data, size_t num_data); | ||
| 65 | 67 | ||
| 66 | /// Binds a rasterizer to this engine. | 68 | /// Binds a rasterizer to this engine. |
| 67 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | 69 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 68 | 70 | ||
| 69 | private: | 71 | private: |
| 72 | void ProcessData(std::span<const u8> read_buffer); | ||
| 73 | |||
| 70 | u32 write_offset = 0; | 74 | u32 write_offset = 0; |
| 71 | u32 copy_size = 0; | 75 | u32 copy_size = 0; |
| 72 | std::vector<u8> inner_buffer; | 76 | std::vector<u8> inner_buffer; |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 5db254d94..7c50bdbe0 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -36,8 +36,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 36 | } | 36 | } |
| 37 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | 37 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { |
| 38 | upload_state.ProcessData(method_argument, is_last_call); | 38 | upload_state.ProcessData(method_argument, is_last_call); |
| 39 | if (is_last_call) { | ||
| 40 | } | ||
| 41 | break; | 39 | break; |
| 42 | } | 40 | } |
| 43 | case KEPLER_COMPUTE_REG_INDEX(launch): | 41 | case KEPLER_COMPUTE_REG_INDEX(launch): |
| @@ -50,8 +48,15 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 50 | 48 | ||
| 51 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 49 | void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 52 | u32 methods_pending) { | 50 | u32 methods_pending) { |
| 53 | for (std::size_t i = 0; i < amount; i++) { | 51 | switch (method) { |
| 54 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 52 | case KEPLER_COMPUTE_REG_INDEX(data_upload): |
| 53 | upload_state.ProcessData(base_start, static_cast<size_t>(amount)); | ||
| 54 | return; | ||
| 55 | default: | ||
| 56 | for (std::size_t i = 0; i < amount; i++) { | ||
| 57 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||
| 58 | } | ||
| 59 | break; | ||
| 55 | } | 60 | } |
| 56 | } | 61 | } |
| 57 | 62 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index e2b029542..a3fbab1e5 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -33,8 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
| 33 | } | 33 | } |
| 34 | case KEPLERMEMORY_REG_INDEX(data): { | 34 | case KEPLERMEMORY_REG_INDEX(data): { |
| 35 | upload_state.ProcessData(method_argument, is_last_call); | 35 | upload_state.ProcessData(method_argument, is_last_call); |
| 36 | if (is_last_call) { | ||
| 37 | } | ||
| 38 | break; | 36 | break; |
| 39 | } | 37 | } |
| 40 | } | 38 | } |
| @@ -42,8 +40,15 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
| 42 | 40 | ||
| 43 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | 41 | void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, |
| 44 | u32 methods_pending) { | 42 | u32 methods_pending) { |
| 45 | for (std::size_t i = 0; i < amount; i++) { | 43 | switch (method) { |
| 46 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 44 | case KEPLERMEMORY_REG_INDEX(data): |
| 45 | upload_state.ProcessData(base_start, static_cast<size_t>(amount)); | ||
| 46 | return; | ||
| 47 | default: | ||
| 48 | for (std::size_t i = 0; i < amount; i++) { | ||
| 49 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | ||
| 50 | } | ||
| 51 | break; | ||
| 47 | } | 52 | } |
| 48 | } | 53 | } |
| 49 | 54 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index add1ccebe..632052c53 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -239,8 +239,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 239 | return upload_state.ProcessExec(regs.exec_upload.linear != 0); | 239 | return upload_state.ProcessExec(regs.exec_upload.linear != 0); |
| 240 | case MAXWELL3D_REG_INDEX(data_upload): | 240 | case MAXWELL3D_REG_INDEX(data_upload): |
| 241 | upload_state.ProcessData(argument, is_last_call); | 241 | upload_state.ProcessData(argument, is_last_call); |
| 242 | if (is_last_call) { | ||
| 243 | } | ||
| 244 | return; | 242 | return; |
| 245 | case MAXWELL3D_REG_INDEX(fragment_barrier): | 243 | case MAXWELL3D_REG_INDEX(fragment_barrier): |
| 246 | return rasterizer->FragmentBarrier(); | 244 | return rasterizer->FragmentBarrier(); |
| @@ -316,6 +314,9 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | |||
| 316 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: | 314 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data) + 15: |
| 317 | ProcessCBMultiData(base_start, amount); | 315 | ProcessCBMultiData(base_start, amount); |
| 318 | break; | 316 | break; |
| 317 | case MAXWELL3D_REG_INDEX(data_upload): | ||
| 318 | upload_state.ProcessData(base_start, static_cast<size_t>(amount)); | ||
| 319 | return; | ||
| 319 | default: | 320 | default: |
| 320 | for (std::size_t i = 0; i < amount; i++) { | 321 | for (std::size_t i = 0; i < amount; i++) { |
| 321 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); | 322 | CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0efe58282..a12a95ce2 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/algorithm.h" | ||
| 4 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 5 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 6 | #include "common/microprofile.h" | 7 | #include "common/microprofile.h" |
| @@ -54,8 +55,6 @@ void MaxwellDMA::Launch() { | |||
| 54 | const LaunchDMA& launch = regs.launch_dma; | 55 | const LaunchDMA& launch = regs.launch_dma; |
| 55 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); | 56 | ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |
| 56 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); | 57 | ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |
| 57 | ASSERT(regs.dst_params.origin.x == 0); | ||
| 58 | ASSERT(regs.dst_params.origin.y == 0); | ||
| 59 | 58 | ||
| 60 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 59 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| 61 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 60 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| @@ -121,12 +120,13 @@ void MaxwellDMA::CopyPitchToPitch() { | |||
| 121 | 120 | ||
| 122 | void MaxwellDMA::CopyBlockLinearToPitch() { | 121 | void MaxwellDMA::CopyBlockLinearToPitch() { |
| 123 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | 122 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); |
| 124 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); | ||
| 125 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); | 123 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); |
| 126 | 124 | ||
| 125 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 126 | |||
| 127 | // Optimized path for micro copies. | 127 | // Optimized path for micro copies. |
| 128 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 128 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 129 | if (dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && | 129 | if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && |
| 130 | regs.src_params.height > GOB_SIZE_Y) { | 130 | regs.src_params.height > GOB_SIZE_Y) { |
| 131 | FastCopyBlockLinearToPitch(); | 131 | FastCopyBlockLinearToPitch(); |
| 132 | return; | 132 | return; |
| @@ -134,10 +134,27 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 134 | 134 | ||
| 135 | // Deswizzle the input and copy it over. | 135 | // Deswizzle the input and copy it over. |
| 136 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | 136 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); |
| 137 | const u32 bytes_per_pixel = | ||
| 138 | regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; | ||
| 139 | const Parameters& src_params = regs.src_params; | 137 | const Parameters& src_params = regs.src_params; |
| 140 | const u32 width = src_params.width; | 138 | |
| 139 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||
| 140 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||
| 141 | |||
| 142 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; | ||
| 143 | |||
| 144 | u32 width = src_params.width; | ||
| 145 | u32 x_elements = regs.line_length_in; | ||
| 146 | u32 x_offset = src_params.origin.x; | ||
| 147 | u32 bpp_shift = 0U; | ||
| 148 | if (!is_remapping) { | ||
| 149 | bpp_shift = Common::FoldRight( | ||
| 150 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 151 | width, x_elements, x_offset, static_cast<u32>(regs.offset_in)); | ||
| 152 | width >>= bpp_shift; | ||
| 153 | x_elements >>= bpp_shift; | ||
| 154 | x_offset >>= bpp_shift; | ||
| 155 | } | ||
| 156 | |||
| 157 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | ||
| 141 | const u32 height = src_params.height; | 158 | const u32 height = src_params.height; |
| 142 | const u32 depth = src_params.depth; | 159 | const u32 depth = src_params.depth; |
| 143 | const u32 block_height = src_params.block_size.height; | 160 | const u32 block_height = src_params.block_size.height; |
| @@ -155,30 +172,46 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 155 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 172 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |
| 156 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 173 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 157 | 174 | ||
| 158 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, | 175 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 159 | block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(), | 176 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 160 | read_buffer.data()); | 177 | regs.pitch_out); |
| 161 | 178 | ||
| 162 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 179 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 163 | } | 180 | } |
| 164 | 181 | ||
| 165 | void MaxwellDMA::CopyPitchToBlockLinear() { | 182 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| 166 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | 183 | UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); |
| 184 | UNIMPLEMENTED_IF(regs.dst_params.layer != 0); | ||
| 167 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | 185 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); |
| 168 | 186 | ||
| 187 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 188 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||
| 189 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||
| 190 | |||
| 169 | const auto& dst_params = regs.dst_params; | 191 | const auto& dst_params = regs.dst_params; |
| 170 | const u32 bytes_per_pixel = | 192 | |
| 171 | regs.launch_dma.remap_enable ? regs.pitch_in / regs.line_length_in : 1; | 193 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; |
| 172 | const u32 width = dst_params.width; | 194 | |
| 195 | u32 width = dst_params.width; | ||
| 196 | u32 x_elements = regs.line_length_in; | ||
| 197 | u32 x_offset = dst_params.origin.x; | ||
| 198 | u32 bpp_shift = 0U; | ||
| 199 | if (!is_remapping) { | ||
| 200 | bpp_shift = Common::FoldRight( | ||
| 201 | 4U, [](u32 x, u32 y) { return std::min(x, static_cast<u32>(std::countr_zero(y))); }, | ||
| 202 | width, x_elements, x_offset, static_cast<u32>(regs.offset_out)); | ||
| 203 | width >>= bpp_shift; | ||
| 204 | x_elements >>= bpp_shift; | ||
| 205 | x_offset >>= bpp_shift; | ||
| 206 | } | ||
| 207 | |||
| 208 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | ||
| 173 | const u32 height = dst_params.height; | 209 | const u32 height = dst_params.height; |
| 174 | const u32 depth = dst_params.depth; | 210 | const u32 depth = dst_params.depth; |
| 175 | const u32 block_height = dst_params.block_size.height; | 211 | const u32 block_height = dst_params.block_size.height; |
| 176 | const u32 block_depth = dst_params.block_size.depth; | 212 | const u32 block_depth = dst_params.block_size.depth; |
| 177 | const size_t dst_size = | 213 | const size_t dst_size = |
| 178 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 214 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 179 | const size_t dst_layer_size = | ||
| 180 | CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); | ||
| 181 | |||
| 182 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; | 215 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; |
| 183 | 216 | ||
| 184 | if (read_buffer.size() < src_size) { | 217 | if (read_buffer.size() < src_size) { |
| @@ -188,32 +221,23 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 188 | write_buffer.resize(dst_size); | 221 | write_buffer.resize(dst_size); |
| 189 | } | 222 | } |
| 190 | 223 | ||
| 224 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 191 | if (Settings::IsGPULevelExtreme()) { | 225 | if (Settings::IsGPULevelExtreme()) { |
| 192 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||
| 193 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 226 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 194 | } else { | 227 | } else { |
| 195 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); | ||
| 196 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | 228 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |
| 197 | } | 229 | } |
| 198 | 230 | ||
| 199 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 231 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 200 | if (regs.dst_params.block_size.depth > 0) { | 232 | SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 201 | ASSERT(dst_params.layer == 0); | 233 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 202 | SwizzleSliceToVoxel(regs.line_length_in, regs.line_count, regs.pitch_in, width, height, | 234 | regs.pitch_in); |
| 203 | bytes_per_pixel, block_height, block_depth, dst_params.origin.x, | ||
| 204 | dst_params.origin.y, write_buffer.data(), read_buffer.data()); | ||
| 205 | } else { | ||
| 206 | SwizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_in, width, bytes_per_pixel, | ||
| 207 | write_buffer.data() + dst_layer_size * dst_params.layer, read_buffer.data(), | ||
| 208 | block_height, dst_params.origin.x, dst_params.origin.y); | ||
| 209 | } | ||
| 210 | 235 | ||
| 211 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 236 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 212 | } | 237 | } |
| 213 | 238 | ||
| 214 | void MaxwellDMA::FastCopyBlockLinearToPitch() { | 239 | void MaxwellDMA::FastCopyBlockLinearToPitch() { |
| 215 | const u32 bytes_per_pixel = | 240 | const u32 bytes_per_pixel = 1U; |
| 216 | regs.launch_dma.remap_enable ? regs.pitch_out / regs.line_length_in : 1; | ||
| 217 | const size_t src_size = GOB_SIZE; | 241 | const size_t src_size = GOB_SIZE; |
| 218 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 242 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |
| 219 | u32 pos_x = regs.src_params.origin.x; | 243 | u32 pos_x = regs.src_params.origin.x; |
| @@ -239,9 +263,10 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 239 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | 263 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |
| 240 | } | 264 | } |
| 241 | 265 | ||
| 242 | UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, | 266 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width, |
| 243 | bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y, | 267 | regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count, |
| 244 | write_buffer.data(), read_buffer.data()); | 268 | regs.src_params.block_size.height, regs.src_params.block_size.depth, |
| 269 | regs.pitch_out); | ||
| 245 | 270 | ||
| 246 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 271 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); |
| 247 | } | 272 | } |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 074bac92c..9c5d567a6 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -189,10 +189,16 @@ public: | |||
| 189 | BitField<4, 3, Swizzle> dst_y; | 189 | BitField<4, 3, Swizzle> dst_y; |
| 190 | BitField<8, 3, Swizzle> dst_z; | 190 | BitField<8, 3, Swizzle> dst_z; |
| 191 | BitField<12, 3, Swizzle> dst_w; | 191 | BitField<12, 3, Swizzle> dst_w; |
| 192 | BitField<0, 12, u32> dst_components_raw; | ||
| 192 | BitField<16, 2, u32> component_size_minus_one; | 193 | BitField<16, 2, u32> component_size_minus_one; |
| 193 | BitField<20, 2, u32> num_src_components_minus_one; | 194 | BitField<20, 2, u32> num_src_components_minus_one; |
| 194 | BitField<24, 2, u32> num_dst_components_minus_one; | 195 | BitField<24, 2, u32> num_dst_components_minus_one; |
| 195 | }; | 196 | }; |
| 197 | |||
| 198 | Swizzle GetComponent(size_t i) { | ||
| 199 | const u32 raw = dst_components_raw; | ||
| 200 | return static_cast<Swizzle>((raw >> (i * 3)) & 0x7); | ||
| 201 | } | ||
| 196 | }; | 202 | }; |
| 197 | static_assert(sizeof(RemapConst) == 12); | 203 | static_assert(sizeof(RemapConst) == 12); |
| 198 | 204 | ||