summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar bunnei2020-05-05 17:12:42 -0400
committerGravatar GitHub2020-05-05 17:12:42 -0400
commit41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa (patch)
tree64c61fda0aaa076cd54c46e8c271e67888c79c61 /src/video_core/engines
parentMerge pull request #3881 from lioncash/mem-warning (diff)
parentUpdate src/video_core/gpu.cpp (diff)
downloadyuzu-41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa.tar.gz
yuzu-41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa.tar.xz
yuzu-41682e0888f7cb640787ab8d9a7e5c0ebb83d8fa.zip
Merge pull request #3815 from FernandoS27/command-list-2
GPU: More optimizations to GPU Command List Processing and DMA Copy Optimizations
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/engine_interface.h22
-rw-r--r--src/video_core/engines/fermi_2d.cpp10
-rw-r--r--src/video_core/engines/fermi_2d.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp13
-rw-r--r--src/video_core/engines/kepler_compute.h8
-rw-r--r--src/video_core/engines/kepler_memory.cpp13
-rw-r--r--src/video_core/engines/kepler_memory.h8
-rw-r--r--src/video_core/engines/maxwell_3d.cpp28
-rw-r--r--src/video_core/engines/maxwell_3d.h10
-rw-r--r--src/video_core/engines/maxwell_dma.cpp50
-rw-r--r--src/video_core/engines/maxwell_dma.h8
11 files changed, 122 insertions, 56 deletions
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
new file mode 100644
index 000000000..18a9db7e6
--- /dev/null
+++ b/src/video_core/engines/engine_interface.h
@@ -0,0 +1,22 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/common_types.h"
9
10namespace Tegra::Engines {
11
12class EngineInterface {
13public:
14 /// Write the value to the register identified by method.
15 virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
16
17 /// Write multiple values to the register identified by method.
18 virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
19 u32 methods_pending) = 0;
20};
21
22} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 8a47614d2..ff10ff40d 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -12,13 +12,13 @@ namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
14 14
15void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { 15void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
16 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 16 ASSERT_MSG(method < Regs::NUM_REGS,
17 "Invalid Fermi2D register, increase the size of the Regs structure"); 17 "Invalid Fermi2D register, increase the size of the Regs structure");
18 18
19 regs.reg_array[method_call.method] = method_call.argument; 19 regs.reg_array[method] = method_argument;
20 20
21 switch (method_call.method) { 21 switch (method) {
22 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, 22 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
23 // so trigger on the second 32-bit write. 23 // so trigger on the second 32-bit write.
24 case FERMI2D_REG_INDEX(blit_src_y) + 1: { 24 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
@@ -30,7 +30,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
30 30
31void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { 31void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
32 for (std::size_t i = 0; i < amount; i++) { 32 for (std::size_t i = 0; i < amount; i++) {
33 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 33 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
34 } 34 }
35} 35}
36 36
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 939a5966d..8f37d053f 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -10,6 +10,7 @@
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/math_util.h" 12#include "common/math_util.h"
13#include "video_core/engines/engine_interface.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14 15
15namespace Tegra { 16namespace Tegra {
@@ -31,16 +32,17 @@ namespace Tegra::Engines {
31#define FERMI2D_REG_INDEX(field_name) \ 32#define FERMI2D_REG_INDEX(field_name) \
32 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) 33 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
33 34
34class Fermi2D final { 35class Fermi2D final : public EngineInterface {
35public: 36public:
36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); 37 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
37 ~Fermi2D() = default; 38 ~Fermi2D() = default;
38 39
39 /// Write the value to the register identified by method. 40 /// Write the value to the register identified by method.
40 void CallMethod(const GPU::MethodCall& method_call); 41 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
41 42
42 /// Write multiple values to the register identified by method. 43 /// Write multiple values to the register identified by method.
43 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 44 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
45 u32 methods_pending) override;
44 46
45 enum class Origin : u32 { 47 enum class Origin : u32 {
46 Center = 0, 48 Center = 0,
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 00a12175f..f6237fc6a 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -24,20 +24,19 @@ KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterfac
24 24
25KeplerCompute::~KeplerCompute() = default; 25KeplerCompute::~KeplerCompute() = default;
26 26
27void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { 27void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
28 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 28 ASSERT_MSG(method < Regs::NUM_REGS,
29 "Invalid KeplerCompute register, increase the size of the Regs structure"); 29 "Invalid KeplerCompute register, increase the size of the Regs structure");
30 30
31 regs.reg_array[method_call.method] = method_call.argument; 31 regs.reg_array[method] = method_argument;
32 32
33 switch (method_call.method) { 33 switch (method) {
34 case KEPLER_COMPUTE_REG_INDEX(exec_upload): { 34 case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
35 upload_state.ProcessExec(regs.exec_upload.linear != 0); 35 upload_state.ProcessExec(regs.exec_upload.linear != 0);
36 break; 36 break;
37 } 37 }
38 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 38 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
39 const bool is_last_call = method_call.IsLastCall(); 39 upload_state.ProcessData(method_argument, is_last_call);
40 upload_state.ProcessData(method_call.argument, is_last_call);
41 if (is_last_call) { 40 if (is_last_call) {
42 system.GPU().Maxwell3D().OnMemoryWrite(); 41 system.GPU().Maxwell3D().OnMemoryWrite();
43 } 42 }
@@ -54,7 +53,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
54void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 53void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
55 u32 methods_pending) { 54 u32 methods_pending) {
56 for (std::size_t i = 0; i < amount; i++) { 55 for (std::size_t i = 0; i < amount; i++) {
57 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 56 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
58 } 57 }
59} 58}
60 59
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fe55fdfd0..18ceedfaf 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -11,6 +11,7 @@
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h" 13#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/engine_interface.h"
14#include "video_core/engines/engine_upload.h" 15#include "video_core/engines/engine_upload.h"
15#include "video_core/engines/shader_type.h" 16#include "video_core/engines/shader_type.h"
16#include "video_core/gpu.h" 17#include "video_core/gpu.h"
@@ -39,7 +40,7 @@ namespace Tegra::Engines {
39#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 40#define KEPLER_COMPUTE_REG_INDEX(field_name) \
40 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 41 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
41 42
42class KeplerCompute final : public ConstBufferEngineInterface { 43class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
43public: 44public:
44 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 45 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
45 MemoryManager& memory_manager); 46 MemoryManager& memory_manager);
@@ -200,10 +201,11 @@ public:
200 "KeplerCompute LaunchParams has wrong size"); 201 "KeplerCompute LaunchParams has wrong size");
201 202
202 /// Write the value to the register identified by method. 203 /// Write the value to the register identified by method.
203 void CallMethod(const GPU::MethodCall& method_call); 204 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
204 205
205 /// Write multiple values to the register identified by method. 206 /// Write multiple values to the register identified by method.
206 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 207 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
208 u32 methods_pending) override;
207 209
208 Texture::FullTextureInfo GetTexture(std::size_t offset) const; 210 Texture::FullTextureInfo GetTexture(std::size_t offset) const;
209 211
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 586ff15dc..dc71b2eec 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -19,20 +19,19 @@ KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
19 19
20KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
21 21
22void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { 22void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
23 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 23 ASSERT_MSG(method < Regs::NUM_REGS,
24 "Invalid KeplerMemory register, increase the size of the Regs structure"); 24 "Invalid KeplerMemory register, increase the size of the Regs structure");
25 25
26 regs.reg_array[method_call.method] = method_call.argument; 26 regs.reg_array[method] = method_argument;
27 27
28 switch (method_call.method) { 28 switch (method) {
29 case KEPLERMEMORY_REG_INDEX(exec): { 29 case KEPLERMEMORY_REG_INDEX(exec): {
30 upload_state.ProcessExec(regs.exec.linear != 0); 30 upload_state.ProcessExec(regs.exec.linear != 0);
31 break; 31 break;
32 } 32 }
33 case KEPLERMEMORY_REG_INDEX(data): { 33 case KEPLERMEMORY_REG_INDEX(data): {
34 const bool is_last_call = method_call.IsLastCall(); 34 upload_state.ProcessData(method_argument, is_last_call);
35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 35 if (is_last_call) {
37 system.GPU().Maxwell3D().OnMemoryWrite(); 36 system.GPU().Maxwell3D().OnMemoryWrite();
38 } 37 }
@@ -44,7 +43,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
44void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 43void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
45 u32 methods_pending) { 44 u32 methods_pending) {
46 for (std::size_t i = 0; i < amount; i++) { 45 for (std::size_t i = 0; i < amount; i++) {
47 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 46 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
48 } 47 }
49} 48}
50 49
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index bb26fb030..5b7f71a00 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h"
13#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
14#include "video_core/gpu.h" 15#include "video_core/gpu.h"
15 16
@@ -32,16 +33,17 @@ namespace Tegra::Engines {
32#define KEPLERMEMORY_REG_INDEX(field_name) \ 33#define KEPLERMEMORY_REG_INDEX(field_name) \
33 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) 34 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
34 35
35class KeplerMemory final { 36class KeplerMemory final : public EngineInterface {
36public: 37public:
37 KeplerMemory(Core::System& system, MemoryManager& memory_manager); 38 KeplerMemory(Core::System& system, MemoryManager& memory_manager);
38 ~KeplerMemory(); 39 ~KeplerMemory();
39 40
40 /// Write the value to the register identified by method. 41 /// Write the value to the register identified by method.
41 void CallMethod(const GPU::MethodCall& method_call); 42 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
42 43
43 /// Write multiple values to the register identified by method. 44 /// Write multiple values to the register identified by method.
44 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 45 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
46 u32 methods_pending) override;
45 47
46 struct Regs { 48 struct Regs {
47 static constexpr size_t NUM_REGS = 0x7F; 49 static constexpr size_t NUM_REGS = 0x7F;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7db055ea0..33936e209 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -125,12 +125,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
125 } 125 }
126} 126}
127 127
128void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 128void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
129 const u32 method = method_call.method;
130
131 if (method == cb_data_state.current) { 129 if (method == cb_data_state.current) {
132 regs.reg_array[method] = method_call.argument; 130 regs.reg_array[method] = method_argument;
133 ProcessCBData(method_call.argument); 131 ProcessCBData(method_argument);
134 return; 132 return;
135 } else if (cb_data_state.current != null_cb_data) { 133 } else if (cb_data_state.current != null_cb_data) {
136 FinishCBData(); 134 FinishCBData();
@@ -153,10 +151,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
153 executing_macro = method; 151 executing_macro = method;
154 } 152 }
155 153
156 macro_params.push_back(method_call.argument); 154 macro_params.push_back(method_argument);
157 155
158 // Call the macro when there are no more parameters in the command buffer 156 // Call the macro when there are no more parameters in the command buffer
159 if (method_call.IsLastCall()) { 157 if (is_last_call) {
160 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); 158 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
161 macro_params.clear(); 159 macro_params.clear();
162 } 160 }
@@ -166,7 +164,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
166 ASSERT_MSG(method < Regs::NUM_REGS, 164 ASSERT_MSG(method < Regs::NUM_REGS,
167 "Invalid Maxwell3D register, increase the size of the Regs structure"); 165 "Invalid Maxwell3D register, increase the size of the Regs structure");
168 166
169 u32 arg = method_call.argument; 167 u32 arg = method_argument;
170 // Keep track of the register value in shadow_state when requested. 168 // Keep track of the register value in shadow_state when requested.
171 if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || 169 if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
172 shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { 170 shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
@@ -189,7 +187,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
189 break; 187 break;
190 } 188 }
191 case MAXWELL3D_REG_INDEX(shadow_ram_control): { 189 case MAXWELL3D_REG_INDEX(shadow_ram_control): {
192 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); 190 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument);
193 break; 191 break;
194 } 192 }
195 case MAXWELL3D_REG_INDEX(macros.data): { 193 case MAXWELL3D_REG_INDEX(macros.data): {
@@ -272,7 +270,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
272 break; 270 break;
273 } 271 }
274 case MAXWELL3D_REG_INDEX(data_upload): { 272 case MAXWELL3D_REG_INDEX(data_upload): {
275 const bool is_last_call = method_call.IsLastCall();
276 upload_state.ProcessData(arg, is_last_call); 273 upload_state.ProcessData(arg, is_last_call);
277 if (is_last_call) { 274 if (is_last_call) {
278 OnMemoryWrite(); 275 OnMemoryWrite();
@@ -330,7 +327,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
330 } 327 }
331 default: { 328 default: {
332 for (std::size_t i = 0; i < amount; i++) { 329 for (std::size_t i = 0; i < amount; i++) {
333 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 330 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
334 } 331 }
335 } 332 }
336 } 333 }
@@ -360,16 +357,15 @@ void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
360 StepInstance(expected_mode, count); 357 StepInstance(expected_mode, count);
361} 358}
362 359
363void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { 360void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
364 const u32 method = method_call.method;
365 if (mme_inline[method]) { 361 if (mme_inline[method]) {
366 regs.reg_array[method] = method_call.argument; 362 regs.reg_array[method] = method_argument;
367 if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || 363 if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
368 method == MAXWELL3D_REG_INDEX(index_array.count)) { 364 method == MAXWELL3D_REG_INDEX(index_array.count)) {
369 const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) 365 const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
370 ? MMEDrawMode::Array 366 ? MMEDrawMode::Array
371 : MMEDrawMode::Indexed; 367 : MMEDrawMode::Indexed;
372 StepInstance(expected_mode, method_call.argument); 368 StepInstance(expected_mode, method_argument);
373 } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { 369 } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) {
374 mme_draw.instance_mode = 370 mme_draw.instance_mode =
375 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); 371 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0);
@@ -381,7 +377,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
381 if (mme_draw.current_mode != MMEDrawMode::Undefined) { 377 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
382 FlushMMEInlineDraw(); 378 FlushMMEInlineDraw();
383 } 379 }
384 CallMethod(method_call); 380 CallMethod(method, method_argument, true);
385 } 381 }
386} 382}
387 383
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 864924ff3..1a5df05ce 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -19,6 +19,7 @@
19#include "common/math_util.h" 19#include "common/math_util.h"
20#include "video_core/engines/const_buffer_engine_interface.h" 20#include "video_core/engines/const_buffer_engine_interface.h"
21#include "video_core/engines/const_buffer_info.h" 21#include "video_core/engines/const_buffer_info.h"
22#include "video_core/engines/engine_interface.h"
22#include "video_core/engines/engine_upload.h" 23#include "video_core/engines/engine_upload.h"
23#include "video_core/engines/shader_type.h" 24#include "video_core/engines/shader_type.h"
24#include "video_core/gpu.h" 25#include "video_core/gpu.h"
@@ -48,7 +49,7 @@ namespace Tegra::Engines {
48#define MAXWELL3D_REG_INDEX(field_name) \ 49#define MAXWELL3D_REG_INDEX(field_name) \
49 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 50 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
50 51
51class Maxwell3D final : public ConstBufferEngineInterface { 52class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
52public: 53public:
53 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 54 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
54 MemoryManager& memory_manager); 55 MemoryManager& memory_manager);
@@ -1360,13 +1361,14 @@ public:
1360 u32 GetRegisterValue(u32 method) const; 1361 u32 GetRegisterValue(u32 method) const;
1361 1362
1362 /// Write the value to the register identified by method. 1363 /// Write the value to the register identified by method.
1363 void CallMethod(const GPU::MethodCall& method_call); 1364 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
1364 1365
1365 /// Write multiple values to the register identified by method. 1366 /// Write multiple values to the register identified by method.
1366 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 1367 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
1368 u32 methods_pending) override;
1367 1369
1368 /// Write the value to the register identified by method. 1370 /// Write the value to the register identified by method.
1369 void CallMethodFromMME(const GPU::MethodCall& method_call); 1371 void CallMethodFromMME(u32 method, u32 method_argument);
1370 1372
1371 void FlushMMEInlineDraw(); 1373 void FlushMMEInlineDraw();
1372 1374
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6630005b0..01d7df405 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -17,16 +17,16 @@ namespace Tegra::Engines {
17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) 17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
18 : system{system}, memory_manager{memory_manager} {} 18 : system{system}, memory_manager{memory_manager} {}
19 19
20void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 20void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
21 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 21 ASSERT_MSG(method < Regs::NUM_REGS,
22 "Invalid MaxwellDMA register, increase the size of the Regs structure"); 22 "Invalid MaxwellDMA register, increase the size of the Regs structure");
23 23
24 regs.reg_array[method_call.method] = method_call.argument; 24 regs.reg_array[method] = method_argument;
25 25
26#define MAXWELLDMA_REG_INDEX(field_name) \ 26#define MAXWELLDMA_REG_INDEX(field_name) \
27 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) 27 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
28 28
29 switch (method_call.method) { 29 switch (method) {
30 case MAXWELLDMA_REG_INDEX(exec): { 30 case MAXWELLDMA_REG_INDEX(exec): {
31 HandleCopy(); 31 HandleCopy();
32 break; 32 break;
@@ -39,7 +39,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
39void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 39void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
40 u32 methods_pending) { 40 u32 methods_pending) {
41 for (std::size_t i = 0; i < amount; i++) { 41 for (std::size_t i = 0; i < amount; i++) {
42 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 42 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
43 } 43 }
44} 44}
45 45
@@ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() {
90 ASSERT(regs.exec.enable_2d == 1); 90 ASSERT(regs.exec.enable_2d == 1);
91 91
92 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 92 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
93
93 ASSERT(regs.src_params.BlockDepth() == 0); 94 ASSERT(regs.src_params.BlockDepth() == 0);
95 // Optimized path for micro copies.
96 if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
97 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
98 const std::size_t src_size = Texture::GetGOBSize();
99 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
100 u32 pos_x = regs.src_params.pos_x;
101 u32 pos_y = regs.src_params.pos_y;
102 const u64 offset =
103 Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
104 regs.src_params.BlockDepth(), bytes_per_pixel);
105 const u32 x_in_gob = 64 / bytes_per_pixel;
106 pos_x = pos_x % x_in_gob;
107 pos_y = pos_y % 8;
108
109 if (read_buffer.size() < src_size) {
110 read_buffer.resize(src_size);
111 }
112
113 if (write_buffer.size() < dst_size) {
114 write_buffer.resize(dst_size);
115 }
116
117 if (Settings::IsGPULevelExtreme()) {
118 memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
119 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
120 } else {
121 memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
122 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
123 }
124
125 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
126 regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
127 write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
128 pos_y);
129
130 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
131
132 return;
133 }
94 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 134 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
95 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; 135 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
96 const std::size_t src_size = Texture::CalculateSize( 136 const std::size_t src_size = Texture::CalculateSize(
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c43ed8194..502dd8509 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14 15
15namespace Core { 16namespace Core {
@@ -27,16 +28,17 @@ namespace Tegra::Engines {
27 * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml 28 * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
28 */ 29 */
29 30
30class MaxwellDMA final { 31class MaxwellDMA final : public EngineInterface {
31public: 32public:
32 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); 33 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
33 ~MaxwellDMA() = default; 34 ~MaxwellDMA() = default;
34 35
35 /// Write the value to the register identified by method. 36 /// Write the value to the register identified by method.
36 void CallMethod(const GPU::MethodCall& method_call); 37 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
37 38
38 /// Write multiple values to the register identified by method. 39 /// Write multiple values to the register identified by method.
39 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 40 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
41 u32 methods_pending) override;
40 42
41 struct Regs { 43 struct Regs {
42 static constexpr std::size_t NUM_REGS = 0x1D6; 44 static constexpr std::size_t NUM_REGS = 0x1D6;