diff options
| author | 2023-03-05 15:30:47 +0100 | |
|---|---|---|
| committer | 2023-03-05 15:30:47 +0100 | |
| commit | fdae95efaa84fe1baeab0b4dd1435720cae0f88d (patch) | |
| tree | 383070bd0d4a33189f38423ceea9a5692d38ba09 /src/video_core/engines | |
| parent | Merge pull request #9884 from liamwhite/service-cleanup (diff) | |
| parent | Engines: Implement Accelerate DMA Texture. (diff) | |
| download | yuzu-fdae95efaa84fe1baeab0b4dd1435720cae0f88d.tar.gz yuzu-fdae95efaa84fe1baeab0b4dd1435720cae0f88d.tar.xz yuzu-fdae95efaa84fe1baeab0b4dd1435720cae0f88d.zip | |
Merge pull request #9786 from FernandoS27/the-gaia-is-a-lie
YFC - Engines: Implement Accelerate DMA Texture.
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 107 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.h | 88 |
2 files changed, 117 insertions, 78 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 7762c7d96..e68850dc5 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -14,7 +14,13 @@ | |||
| 14 | #include "video_core/textures/decoders.h" | 14 | #include "video_core/textures/decoders.h" |
| 15 | 15 | ||
| 16 | MICROPROFILE_DECLARE(GPU_DMAEngine); | 16 | MICROPROFILE_DECLARE(GPU_DMAEngine); |
| 17 | MICROPROFILE_DECLARE(GPU_DMAEngineBL); | ||
| 18 | MICROPROFILE_DECLARE(GPU_DMAEngineLB); | ||
| 19 | MICROPROFILE_DECLARE(GPU_DMAEngineBB); | ||
| 17 | MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128)); | 20 | MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128)); |
| 21 | MICROPROFILE_DEFINE(GPU_DMAEngineBL, "GPU", "DMA Engine Block - Linear", MP_RGB(224, 224, 128)); | ||
| 22 | MICROPROFILE_DEFINE(GPU_DMAEngineLB, "GPU", "DMA Engine Linear - Block", MP_RGB(224, 224, 128)); | ||
| 23 | MICROPROFILE_DEFINE(GPU_DMAEngineBB, "GPU", "DMA Engine Block - Block", MP_RGB(224, 224, 128)); | ||
| 18 | 24 | ||
| 19 | namespace Tegra::Engines { | 25 | namespace Tegra::Engines { |
| 20 | 26 | ||
| @@ -72,6 +78,7 @@ void MaxwellDMA::Launch() { | |||
| 72 | memory_manager.FlushCaching(); | 78 | memory_manager.FlushCaching(); |
| 73 | if (!is_src_pitch && !is_dst_pitch) { | 79 | if (!is_src_pitch && !is_dst_pitch) { |
| 74 | // If both the source and the destination are in block layout, assert. | 80 | // If both the source and the destination are in block layout, assert. |
| 81 | MICROPROFILE_SCOPE(GPU_DMAEngineBB); | ||
| 75 | CopyBlockLinearToBlockLinear(); | 82 | CopyBlockLinearToBlockLinear(); |
| 76 | ReleaseSemaphore(); | 83 | ReleaseSemaphore(); |
| 77 | return; | 84 | return; |
| @@ -87,8 +94,10 @@ void MaxwellDMA::Launch() { | |||
| 87 | } | 94 | } |
| 88 | } else { | 95 | } else { |
| 89 | if (!is_src_pitch && is_dst_pitch) { | 96 | if (!is_src_pitch && is_dst_pitch) { |
| 97 | MICROPROFILE_SCOPE(GPU_DMAEngineBL); | ||
| 90 | CopyBlockLinearToPitch(); | 98 | CopyBlockLinearToPitch(); |
| 91 | } else { | 99 | } else { |
| 100 | MICROPROFILE_SCOPE(GPU_DMAEngineLB); | ||
| 92 | CopyPitchToBlockLinear(); | 101 | CopyPitchToBlockLinear(); |
| 93 | } | 102 | } |
| 94 | } | 103 | } |
| @@ -153,21 +162,35 @@ void MaxwellDMA::Launch() { | |||
| 153 | } | 162 | } |
| 154 | 163 | ||
| 155 | void MaxwellDMA::CopyBlockLinearToPitch() { | 164 | void MaxwellDMA::CopyBlockLinearToPitch() { |
| 156 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | 165 | UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); |
| 157 | UNIMPLEMENTED_IF(regs.src_params.layer != 0); | 166 | |
| 158 | 167 | u32 bytes_per_pixel = 1; | |
| 159 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | 168 | DMA::ImageOperand src_operand; |
| 160 | 169 | src_operand.bytes_per_pixel = bytes_per_pixel; | |
| 161 | // Optimized path for micro copies. | 170 | src_operand.params = regs.src_params; |
| 162 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 171 | src_operand.address = regs.offset_in; |
| 163 | if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && | 172 | |
| 164 | regs.src_params.height > GOB_SIZE_Y) { | 173 | DMA::BufferOperand dst_operand; |
| 165 | FastCopyBlockLinearToPitch(); | 174 | dst_operand.pitch = regs.pitch_out; |
| 175 | dst_operand.width = regs.line_length_in; | ||
| 176 | dst_operand.height = regs.line_count; | ||
| 177 | dst_operand.address = regs.offset_out; | ||
| 178 | DMA::ImageCopy copy_info{}; | ||
| 179 | copy_info.length_x = regs.line_length_in; | ||
| 180 | copy_info.length_y = regs.line_count; | ||
| 181 | auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||
| 182 | if (accelerate.ImageToBuffer(copy_info, src_operand, dst_operand)) { | ||
| 166 | return; | 183 | return; |
| 167 | } | 184 | } |
| 168 | 185 | ||
| 186 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||
| 187 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); | ||
| 188 | UNIMPLEMENTED_IF(regs.src_params.block_size.depth == 0 && regs.src_params.depth != 1); | ||
| 189 | |||
| 169 | // Deswizzle the input and copy it over. | 190 | // Deswizzle the input and copy it over. |
| 170 | const Parameters& src_params = regs.src_params; | 191 | const DMA::Parameters& src_params = regs.src_params; |
| 192 | |||
| 193 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||
| 171 | 194 | ||
| 172 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | 195 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; |
| 173 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | 196 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; |
| @@ -187,7 +210,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 187 | x_offset >>= bpp_shift; | 210 | x_offset >>= bpp_shift; |
| 188 | } | 211 | } |
| 189 | 212 | ||
| 190 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | 213 | bytes_per_pixel = base_bpp << bpp_shift; |
| 191 | const u32 height = src_params.height; | 214 | const u32 height = src_params.height; |
| 192 | const u32 depth = src_params.depth; | 215 | const u32 depth = src_params.depth; |
| 193 | const u32 block_height = src_params.block_size.height; | 216 | const u32 block_height = src_params.block_size.height; |
| @@ -195,11 +218,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 195 | const size_t src_size = | 218 | const size_t src_size = |
| 196 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 219 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 197 | 220 | ||
| 221 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | ||
| 198 | read_buffer.resize_destructive(src_size); | 222 | read_buffer.resize_destructive(src_size); |
| 199 | write_buffer.resize_destructive(dst_size); | 223 | write_buffer.resize_destructive(dst_size); |
| 200 | 224 | ||
| 201 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 225 | memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); |
| 202 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 226 | memory_manager.ReadBlockUnsafe(dst_operand.address, write_buffer.data(), dst_size); |
| 203 | 227 | ||
| 204 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | 228 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 205 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 229 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| @@ -216,6 +240,24 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 216 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | 240 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; |
| 217 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | 241 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; |
| 218 | 242 | ||
| 243 | u32 bytes_per_pixel = 1; | ||
| 244 | DMA::ImageOperand dst_operand; | ||
| 245 | dst_operand.bytes_per_pixel = bytes_per_pixel; | ||
| 246 | dst_operand.params = regs.dst_params; | ||
| 247 | dst_operand.address = regs.offset_out; | ||
| 248 | DMA::BufferOperand src_operand; | ||
| 249 | src_operand.pitch = regs.pitch_in; | ||
| 250 | src_operand.width = regs.line_length_in; | ||
| 251 | src_operand.height = regs.line_count; | ||
| 252 | src_operand.address = regs.offset_in; | ||
| 253 | DMA::ImageCopy copy_info{}; | ||
| 254 | copy_info.length_x = regs.line_length_in; | ||
| 255 | copy_info.length_y = regs.line_count; | ||
| 256 | auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||
| 257 | if (accelerate.BufferToImage(copy_info, src_operand, dst_operand)) { | ||
| 258 | return; | ||
| 259 | } | ||
| 260 | |||
| 219 | const auto& dst_params = regs.dst_params; | 261 | const auto& dst_params = regs.dst_params; |
| 220 | 262 | ||
| 221 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; | 263 | const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; |
| @@ -233,7 +275,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 233 | x_offset >>= bpp_shift; | 275 | x_offset >>= bpp_shift; |
| 234 | } | 276 | } |
| 235 | 277 | ||
| 236 | const u32 bytes_per_pixel = base_bpp << bpp_shift; | 278 | bytes_per_pixel = base_bpp << bpp_shift; |
| 237 | const u32 height = dst_params.height; | 279 | const u32 height = dst_params.height; |
| 238 | const u32 depth = dst_params.depth; | 280 | const u32 depth = dst_params.depth; |
| 239 | const u32 block_height = dst_params.block_size.height; | 281 | const u32 block_height = dst_params.block_size.height; |
| @@ -260,45 +302,14 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 260 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | 302 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 261 | } | 303 | } |
| 262 | 304 | ||
| 263 | void MaxwellDMA::FastCopyBlockLinearToPitch() { | ||
| 264 | const u32 bytes_per_pixel = 1U; | ||
| 265 | const size_t src_size = GOB_SIZE; | ||
| 266 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | ||
| 267 | u32 pos_x = regs.src_params.origin.x; | ||
| 268 | u32 pos_y = regs.src_params.origin.y; | ||
| 269 | const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y, | ||
| 270 | regs.src_params.block_size.height, bytes_per_pixel); | ||
| 271 | const u32 x_in_gob = 64 / bytes_per_pixel; | ||
| 272 | pos_x = pos_x % x_in_gob; | ||
| 273 | pos_y = pos_y % 8; | ||
| 274 | |||
| 275 | read_buffer.resize_destructive(src_size); | ||
| 276 | write_buffer.resize_destructive(dst_size); | ||
| 277 | |||
| 278 | if (Settings::IsGPULevelExtreme()) { | ||
| 279 | memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); | ||
| 280 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||
| 281 | } else { | ||
| 282 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size); | ||
| 283 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | ||
| 284 | } | ||
| 285 | |||
| 286 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width, | ||
| 287 | regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count, | ||
| 288 | regs.src_params.block_size.height, regs.src_params.block_size.depth, | ||
| 289 | regs.pitch_out); | ||
| 290 | |||
| 291 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||
| 292 | } | ||
| 293 | |||
| 294 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | 305 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { |
| 295 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | 306 | UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); |
| 296 | 307 | ||
| 297 | const bool is_remapping = regs.launch_dma.remap_enable != 0; | 308 | const bool is_remapping = regs.launch_dma.remap_enable != 0; |
| 298 | 309 | ||
| 299 | // Deswizzle the input and copy it over. | 310 | // Deswizzle the input and copy it over. |
| 300 | const Parameters& src = regs.src_params; | 311 | const DMA::Parameters& src = regs.src_params; |
| 301 | const Parameters& dst = regs.dst_params; | 312 | const DMA::Parameters& dst = regs.dst_params; |
| 302 | 313 | ||
| 303 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | 314 | const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; |
| 304 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | 315 | const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 0e594fa74..69e26cb32 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -24,6 +24,54 @@ namespace VideoCore { | |||
| 24 | class RasterizerInterface; | 24 | class RasterizerInterface; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | namespace Tegra { | ||
| 28 | namespace DMA { | ||
| 29 | |||
| 30 | union Origin { | ||
| 31 | BitField<0, 16, u32> x; | ||
| 32 | BitField<16, 16, u32> y; | ||
| 33 | }; | ||
| 34 | static_assert(sizeof(Origin) == 4); | ||
| 35 | |||
| 36 | struct ImageCopy { | ||
| 37 | u32 length_x{}; | ||
| 38 | u32 length_y{}; | ||
| 39 | }; | ||
| 40 | |||
| 41 | union BlockSize { | ||
| 42 | BitField<0, 4, u32> width; | ||
| 43 | BitField<4, 4, u32> height; | ||
| 44 | BitField<8, 4, u32> depth; | ||
| 45 | BitField<12, 4, u32> gob_height; | ||
| 46 | }; | ||
| 47 | static_assert(sizeof(BlockSize) == 4); | ||
| 48 | |||
| 49 | struct Parameters { | ||
| 50 | BlockSize block_size; | ||
| 51 | u32 width; | ||
| 52 | u32 height; | ||
| 53 | u32 depth; | ||
| 54 | u32 layer; | ||
| 55 | Origin origin; | ||
| 56 | }; | ||
| 57 | static_assert(sizeof(Parameters) == 24); | ||
| 58 | |||
| 59 | struct ImageOperand { | ||
| 60 | u32 bytes_per_pixel; | ||
| 61 | Parameters params; | ||
| 62 | GPUVAddr address; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct BufferOperand { | ||
| 66 | u32 pitch; | ||
| 67 | u32 width; | ||
| 68 | u32 height; | ||
| 69 | GPUVAddr address; | ||
| 70 | }; | ||
| 71 | |||
| 72 | } // namespace DMA | ||
| 73 | } // namespace Tegra | ||
| 74 | |||
| 27 | namespace Tegra::Engines { | 75 | namespace Tegra::Engines { |
| 28 | 76 | ||
| 29 | class AccelerateDMAInterface { | 77 | class AccelerateDMAInterface { |
| @@ -32,6 +80,12 @@ public: | |||
| 32 | virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; | 80 | virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; |
| 33 | 81 | ||
| 34 | virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0; | 82 | virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0; |
| 83 | |||
| 84 | virtual bool ImageToBuffer(const DMA::ImageCopy& copy_info, const DMA::ImageOperand& src, | ||
| 85 | const DMA::BufferOperand& dst) = 0; | ||
| 86 | |||
| 87 | virtual bool BufferToImage(const DMA::ImageCopy& copy_info, const DMA::BufferOperand& src, | ||
| 88 | const DMA::ImageOperand& dst) = 0; | ||
| 35 | }; | 89 | }; |
| 36 | 90 | ||
| 37 | /** | 91 | /** |
| @@ -51,30 +105,6 @@ public: | |||
| 51 | } | 105 | } |
| 52 | }; | 106 | }; |
| 53 | 107 | ||
| 54 | union BlockSize { | ||
| 55 | BitField<0, 4, u32> width; | ||
| 56 | BitField<4, 4, u32> height; | ||
| 57 | BitField<8, 4, u32> depth; | ||
| 58 | BitField<12, 4, u32> gob_height; | ||
| 59 | }; | ||
| 60 | static_assert(sizeof(BlockSize) == 4); | ||
| 61 | |||
| 62 | union Origin { | ||
| 63 | BitField<0, 16, u32> x; | ||
| 64 | BitField<16, 16, u32> y; | ||
| 65 | }; | ||
| 66 | static_assert(sizeof(Origin) == 4); | ||
| 67 | |||
| 68 | struct Parameters { | ||
| 69 | BlockSize block_size; | ||
| 70 | u32 width; | ||
| 71 | u32 height; | ||
| 72 | u32 depth; | ||
| 73 | u32 layer; | ||
| 74 | Origin origin; | ||
| 75 | }; | ||
| 76 | static_assert(sizeof(Parameters) == 24); | ||
| 77 | |||
| 78 | struct Semaphore { | 108 | struct Semaphore { |
| 79 | PackedGPUVAddr address; | 109 | PackedGPUVAddr address; |
| 80 | u32 payload; | 110 | u32 payload; |
| @@ -227,8 +257,6 @@ private: | |||
| 227 | 257 | ||
| 228 | void CopyBlockLinearToBlockLinear(); | 258 | void CopyBlockLinearToBlockLinear(); |
| 229 | 259 | ||
| 230 | void FastCopyBlockLinearToPitch(); | ||
| 231 | |||
| 232 | void ReleaseSemaphore(); | 260 | void ReleaseSemaphore(); |
| 233 | 261 | ||
| 234 | void ConsumeSinkImpl() override; | 262 | void ConsumeSinkImpl() override; |
| @@ -261,17 +289,17 @@ private: | |||
| 261 | u32 reserved05[0x3f]; | 289 | u32 reserved05[0x3f]; |
| 262 | PackedGPUVAddr offset_in; | 290 | PackedGPUVAddr offset_in; |
| 263 | PackedGPUVAddr offset_out; | 291 | PackedGPUVAddr offset_out; |
| 264 | u32 pitch_in; | 292 | s32 pitch_in; |
| 265 | u32 pitch_out; | 293 | s32 pitch_out; |
| 266 | u32 line_length_in; | 294 | u32 line_length_in; |
| 267 | u32 line_count; | 295 | u32 line_count; |
| 268 | u32 reserved06[0xb6]; | 296 | u32 reserved06[0xb6]; |
| 269 | u32 remap_consta_value; | 297 | u32 remap_consta_value; |
| 270 | u32 remap_constb_value; | 298 | u32 remap_constb_value; |
| 271 | RemapConst remap_const; | 299 | RemapConst remap_const; |
| 272 | Parameters dst_params; | 300 | DMA::Parameters dst_params; |
| 273 | u32 reserved07[0x1]; | 301 | u32 reserved07[0x1]; |
| 274 | Parameters src_params; | 302 | DMA::Parameters src_params; |
| 275 | u32 reserved08[0x275]; | 303 | u32 reserved08[0x275]; |
| 276 | u32 pm_trigger_end; | 304 | u32 pm_trigger_end; |
| 277 | u32 reserved09[0x3ba]; | 305 | u32 reserved09[0x3ba]; |