summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar Fernando S2023-03-05 15:30:47 +0100
committerGravatar GitHub2023-03-05 15:30:47 +0100
commitfdae95efaa84fe1baeab0b4dd1435720cae0f88d (patch)
tree383070bd0d4a33189f38423ceea9a5692d38ba09 /src/video_core/engines
parentMerge pull request #9884 from liamwhite/service-cleanup (diff)
parentEngines: Implement Accelerate DMA Texture. (diff)
downloadyuzu-fdae95efaa84fe1baeab0b4dd1435720cae0f88d.tar.gz
yuzu-fdae95efaa84fe1baeab0b4dd1435720cae0f88d.tar.xz
yuzu-fdae95efaa84fe1baeab0b4dd1435720cae0f88d.zip
Merge pull request #9786 from FernandoS27/the-gaia-is-a-lie
YFC - Engines: Implement Accelerate DMA Texture.
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/maxwell_dma.cpp107
-rw-r--r--src/video_core/engines/maxwell_dma.h88
2 files changed, 117 insertions, 78 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 7762c7d96..e68850dc5 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -14,7 +14,13 @@
14#include "video_core/textures/decoders.h" 14#include "video_core/textures/decoders.h"
15 15
16MICROPROFILE_DECLARE(GPU_DMAEngine); 16MICROPROFILE_DECLARE(GPU_DMAEngine);
17MICROPROFILE_DECLARE(GPU_DMAEngineBL);
18MICROPROFILE_DECLARE(GPU_DMAEngineLB);
19MICROPROFILE_DECLARE(GPU_DMAEngineBB);
17MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128)); 20MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
21MICROPROFILE_DEFINE(GPU_DMAEngineBL, "GPU", "DMA Engine Block - Linear", MP_RGB(224, 224, 128));
22MICROPROFILE_DEFINE(GPU_DMAEngineLB, "GPU", "DMA Engine Linear - Block", MP_RGB(224, 224, 128));
23MICROPROFILE_DEFINE(GPU_DMAEngineBB, "GPU", "DMA Engine Block - Block", MP_RGB(224, 224, 128));
18 24
19namespace Tegra::Engines { 25namespace Tegra::Engines {
20 26
@@ -72,6 +78,7 @@ void MaxwellDMA::Launch() {
72 memory_manager.FlushCaching(); 78 memory_manager.FlushCaching();
73 if (!is_src_pitch && !is_dst_pitch) { 79 if (!is_src_pitch && !is_dst_pitch) {
74 // If both the source and the destination are in block layout, assert. 80 // If both the source and the destination are in block layout, assert.
81 MICROPROFILE_SCOPE(GPU_DMAEngineBB);
75 CopyBlockLinearToBlockLinear(); 82 CopyBlockLinearToBlockLinear();
76 ReleaseSemaphore(); 83 ReleaseSemaphore();
77 return; 84 return;
@@ -87,8 +94,10 @@ void MaxwellDMA::Launch() {
87 } 94 }
88 } else { 95 } else {
89 if (!is_src_pitch && is_dst_pitch) { 96 if (!is_src_pitch && is_dst_pitch) {
97 MICROPROFILE_SCOPE(GPU_DMAEngineBL);
90 CopyBlockLinearToPitch(); 98 CopyBlockLinearToPitch();
91 } else { 99 } else {
100 MICROPROFILE_SCOPE(GPU_DMAEngineLB);
92 CopyPitchToBlockLinear(); 101 CopyPitchToBlockLinear();
93 } 102 }
94 } 103 }
@@ -153,21 +162,35 @@ void MaxwellDMA::Launch() {
153} 162}
154 163
155void MaxwellDMA::CopyBlockLinearToPitch() { 164void MaxwellDMA::CopyBlockLinearToPitch() {
156 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); 165 UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0);
157 UNIMPLEMENTED_IF(regs.src_params.layer != 0); 166
158 167 u32 bytes_per_pixel = 1;
159 const bool is_remapping = regs.launch_dma.remap_enable != 0; 168 DMA::ImageOperand src_operand;
160 169 src_operand.bytes_per_pixel = bytes_per_pixel;
161 // Optimized path for micro copies. 170 src_operand.params = regs.src_params;
162 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 171 src_operand.address = regs.offset_in;
163 if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && 172
164 regs.src_params.height > GOB_SIZE_Y) { 173 DMA::BufferOperand dst_operand;
165 FastCopyBlockLinearToPitch(); 174 dst_operand.pitch = regs.pitch_out;
175 dst_operand.width = regs.line_length_in;
176 dst_operand.height = regs.line_count;
177 dst_operand.address = regs.offset_out;
178 DMA::ImageCopy copy_info{};
179 copy_info.length_x = regs.line_length_in;
180 copy_info.length_y = regs.line_count;
181 auto& accelerate = rasterizer->AccessAccelerateDMA();
182 if (accelerate.ImageToBuffer(copy_info, src_operand, dst_operand)) {
166 return; 183 return;
167 } 184 }
168 185
186 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
187 UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
188 UNIMPLEMENTED_IF(regs.src_params.block_size.depth == 0 && regs.src_params.depth != 1);
189
169 // Deswizzle the input and copy it over. 190 // Deswizzle the input and copy it over.
170 const Parameters& src_params = regs.src_params; 191 const DMA::Parameters& src_params = regs.src_params;
192
193 const bool is_remapping = regs.launch_dma.remap_enable != 0;
171 194
172 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; 195 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
173 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; 196 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
@@ -187,7 +210,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
187 x_offset >>= bpp_shift; 210 x_offset >>= bpp_shift;
188 } 211 }
189 212
190 const u32 bytes_per_pixel = base_bpp << bpp_shift; 213 bytes_per_pixel = base_bpp << bpp_shift;
191 const u32 height = src_params.height; 214 const u32 height = src_params.height;
192 const u32 depth = src_params.depth; 215 const u32 depth = src_params.depth;
193 const u32 block_height = src_params.block_size.height; 216 const u32 block_height = src_params.block_size.height;
@@ -195,11 +218,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
195 const size_t src_size = 218 const size_t src_size =
196 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 219 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
197 220
221 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
198 read_buffer.resize_destructive(src_size); 222 read_buffer.resize_destructive(src_size);
199 write_buffer.resize_destructive(dst_size); 223 write_buffer.resize_destructive(dst_size);
200 224
201 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); 225 memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size);
202 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); 226 memory_manager.ReadBlockUnsafe(dst_operand.address, write_buffer.data(), dst_size);
203 227
204 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 228 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
205 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 229 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
@@ -216,6 +240,24 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
216 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; 240 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
217 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; 241 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
218 242
243 u32 bytes_per_pixel = 1;
244 DMA::ImageOperand dst_operand;
245 dst_operand.bytes_per_pixel = bytes_per_pixel;
246 dst_operand.params = regs.dst_params;
247 dst_operand.address = regs.offset_out;
248 DMA::BufferOperand src_operand;
249 src_operand.pitch = regs.pitch_in;
250 src_operand.width = regs.line_length_in;
251 src_operand.height = regs.line_count;
252 src_operand.address = regs.offset_in;
253 DMA::ImageCopy copy_info{};
254 copy_info.length_x = regs.line_length_in;
255 copy_info.length_y = regs.line_count;
256 auto& accelerate = rasterizer->AccessAccelerateDMA();
257 if (accelerate.BufferToImage(copy_info, src_operand, dst_operand)) {
258 return;
259 }
260
219 const auto& dst_params = regs.dst_params; 261 const auto& dst_params = regs.dst_params;
220 262
221 const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; 263 const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size;
@@ -233,7 +275,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
233 x_offset >>= bpp_shift; 275 x_offset >>= bpp_shift;
234 } 276 }
235 277
236 const u32 bytes_per_pixel = base_bpp << bpp_shift; 278 bytes_per_pixel = base_bpp << bpp_shift;
237 const u32 height = dst_params.height; 279 const u32 height = dst_params.height;
238 const u32 depth = dst_params.depth; 280 const u32 depth = dst_params.depth;
239 const u32 block_height = dst_params.block_size.height; 281 const u32 block_height = dst_params.block_size.height;
@@ -260,45 +302,14 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
260 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); 302 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
261} 303}
262 304
263void MaxwellDMA::FastCopyBlockLinearToPitch() {
264 const u32 bytes_per_pixel = 1U;
265 const size_t src_size = GOB_SIZE;
266 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count;
267 u32 pos_x = regs.src_params.origin.x;
268 u32 pos_y = regs.src_params.origin.y;
269 const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y,
270 regs.src_params.block_size.height, bytes_per_pixel);
271 const u32 x_in_gob = 64 / bytes_per_pixel;
272 pos_x = pos_x % x_in_gob;
273 pos_y = pos_y % 8;
274
275 read_buffer.resize_destructive(src_size);
276 write_buffer.resize_destructive(dst_size);
277
278 if (Settings::IsGPULevelExtreme()) {
279 memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size);
280 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
281 } else {
282 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size);
283 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
284 }
285
286 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width,
287 regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count,
288 regs.src_params.block_size.height, regs.src_params.block_size.depth,
289 regs.pitch_out);
290
291 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
292}
293
294void MaxwellDMA::CopyBlockLinearToBlockLinear() { 305void MaxwellDMA::CopyBlockLinearToBlockLinear() {
295 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); 306 UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
296 307
297 const bool is_remapping = regs.launch_dma.remap_enable != 0; 308 const bool is_remapping = regs.launch_dma.remap_enable != 0;
298 309
299 // Deswizzle the input and copy it over. 310 // Deswizzle the input and copy it over.
300 const Parameters& src = regs.src_params; 311 const DMA::Parameters& src = regs.src_params;
301 const Parameters& dst = regs.dst_params; 312 const DMA::Parameters& dst = regs.dst_params;
302 313
303 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; 314 const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1;
304 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; 315 const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 0e594fa74..69e26cb32 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -24,6 +24,54 @@ namespace VideoCore {
24class RasterizerInterface; 24class RasterizerInterface;
25} 25}
26 26
27namespace Tegra {
28namespace DMA {
29
30union Origin {
31 BitField<0, 16, u32> x;
32 BitField<16, 16, u32> y;
33};
34static_assert(sizeof(Origin) == 4);
35
36struct ImageCopy {
37 u32 length_x{};
38 u32 length_y{};
39};
40
41union BlockSize {
42 BitField<0, 4, u32> width;
43 BitField<4, 4, u32> height;
44 BitField<8, 4, u32> depth;
45 BitField<12, 4, u32> gob_height;
46};
47static_assert(sizeof(BlockSize) == 4);
48
49struct Parameters {
50 BlockSize block_size;
51 u32 width;
52 u32 height;
53 u32 depth;
54 u32 layer;
55 Origin origin;
56};
57static_assert(sizeof(Parameters) == 24);
58
59struct ImageOperand {
60 u32 bytes_per_pixel;
61 Parameters params;
62 GPUVAddr address;
63};
64
65struct BufferOperand {
66 u32 pitch;
67 u32 width;
68 u32 height;
69 GPUVAddr address;
70};
71
72} // namespace DMA
73} // namespace Tegra
74
27namespace Tegra::Engines { 75namespace Tegra::Engines {
28 76
29class AccelerateDMAInterface { 77class AccelerateDMAInterface {
@@ -32,6 +80,12 @@ public:
32 virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; 80 virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0;
33 81
34 virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0; 82 virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0;
83
84 virtual bool ImageToBuffer(const DMA::ImageCopy& copy_info, const DMA::ImageOperand& src,
85 const DMA::BufferOperand& dst) = 0;
86
87 virtual bool BufferToImage(const DMA::ImageCopy& copy_info, const DMA::BufferOperand& src,
88 const DMA::ImageOperand& dst) = 0;
35}; 89};
36 90
37/** 91/**
@@ -51,30 +105,6 @@ public:
51 } 105 }
52 }; 106 };
53 107
54 union BlockSize {
55 BitField<0, 4, u32> width;
56 BitField<4, 4, u32> height;
57 BitField<8, 4, u32> depth;
58 BitField<12, 4, u32> gob_height;
59 };
60 static_assert(sizeof(BlockSize) == 4);
61
62 union Origin {
63 BitField<0, 16, u32> x;
64 BitField<16, 16, u32> y;
65 };
66 static_assert(sizeof(Origin) == 4);
67
68 struct Parameters {
69 BlockSize block_size;
70 u32 width;
71 u32 height;
72 u32 depth;
73 u32 layer;
74 Origin origin;
75 };
76 static_assert(sizeof(Parameters) == 24);
77
78 struct Semaphore { 108 struct Semaphore {
79 PackedGPUVAddr address; 109 PackedGPUVAddr address;
80 u32 payload; 110 u32 payload;
@@ -227,8 +257,6 @@ private:
227 257
228 void CopyBlockLinearToBlockLinear(); 258 void CopyBlockLinearToBlockLinear();
229 259
230 void FastCopyBlockLinearToPitch();
231
232 void ReleaseSemaphore(); 260 void ReleaseSemaphore();
233 261
234 void ConsumeSinkImpl() override; 262 void ConsumeSinkImpl() override;
@@ -261,17 +289,17 @@ private:
261 u32 reserved05[0x3f]; 289 u32 reserved05[0x3f];
262 PackedGPUVAddr offset_in; 290 PackedGPUVAddr offset_in;
263 PackedGPUVAddr offset_out; 291 PackedGPUVAddr offset_out;
264 u32 pitch_in; 292 s32 pitch_in;
265 u32 pitch_out; 293 s32 pitch_out;
266 u32 line_length_in; 294 u32 line_length_in;
267 u32 line_count; 295 u32 line_count;
268 u32 reserved06[0xb6]; 296 u32 reserved06[0xb6];
269 u32 remap_consta_value; 297 u32 remap_consta_value;
270 u32 remap_constb_value; 298 u32 remap_constb_value;
271 RemapConst remap_const; 299 RemapConst remap_const;
272 Parameters dst_params; 300 DMA::Parameters dst_params;
273 u32 reserved07[0x1]; 301 u32 reserved07[0x1];
274 Parameters src_params; 302 DMA::Parameters src_params;
275 u32 reserved08[0x275]; 303 u32 reserved08[0x275];
276 u32 pm_trigger_end; 304 u32 pm_trigger_end;
277 u32 reserved09[0x3ba]; 305 u32 reserved09[0x3ba];