summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/dma_pusher.cpp20
-rw-r--r--src/video_core/dma_pusher.h11
-rw-r--r--src/video_core/engines/engine_interface.h22
-rw-r--r--src/video_core/engines/fermi_2d.cpp10
-rw-r--r--src/video_core/engines/fermi_2d.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp13
-rw-r--r--src/video_core/engines/kepler_compute.h8
-rw-r--r--src/video_core/engines/kepler_memory.cpp13
-rw-r--r--src/video_core/engines/kepler_memory.h8
-rw-r--r--src/video_core/engines/maxwell_3d.cpp28
-rw-r--r--src/video_core/engines/maxwell_3d.h10
-rw-r--r--src/video_core/engines/maxwell_dma.cpp50
-rw-r--r--src/video_core/engines/maxwell_dma.h8
-rw-r--r--src/video_core/gpu.cpp34
-rw-r--r--src/video_core/macro_interpreter.cpp2
-rw-r--r--src/video_core/textures/decoders.cpp14
-rw-r--r--src/video_core/textures/decoders.h4
18 files changed, 198 insertions, 66 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ff53282c9..d23c53843 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -8,6 +8,7 @@ add_library(video_core STATIC
8 dma_pusher.h 8 dma_pusher.h
9 engines/const_buffer_engine_interface.h 9 engines/const_buffer_engine_interface.h
10 engines/const_buffer_info.h 10 engines/const_buffer_info.h
11 engines/engine_interface.h
11 engines/engine_upload.cpp 12 engines/engine_upload.cpp
12 engines/engine_upload.h 13 engines/engine_upload.h
13 engines/fermi_2d.cpp 14 engines/fermi_2d.cpp
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 16311f05e..bdc023d54 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -27,6 +27,8 @@ void DmaPusher::DispatchCalls() {
27 27
28 dma_pushbuffer_subindex = 0; 28 dma_pushbuffer_subindex = 0;
29 29
30 dma_state.is_last_call = true;
31
30 while (system.IsPoweredOn()) { 32 while (system.IsPoweredOn()) {
31 if (!Step()) { 33 if (!Step()) {
32 break; 34 break;
@@ -82,9 +84,11 @@ bool DmaPusher::Step() {
82 index); 84 index);
83 CallMultiMethod(&command_header.argument, max_write); 85 CallMultiMethod(&command_header.argument, max_write);
84 dma_state.method_count -= max_write; 86 dma_state.method_count -= max_write;
87 dma_state.is_last_call = true;
85 index += max_write; 88 index += max_write;
86 continue; 89 continue;
87 } else { 90 } else {
91 dma_state.is_last_call = dma_state.method_count <= 1;
88 CallMethod(command_header.argument); 92 CallMethod(command_header.argument);
89 } 93 }
90 94
@@ -144,12 +148,22 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
144} 148}
145 149
146void DmaPusher::CallMethod(u32 argument) const { 150void DmaPusher::CallMethod(u32 argument) const {
147 gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count}); 151 if (dma_state.method < non_puller_methods) {
152 gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
153 } else {
154 subchannels[dma_state.subchannel]->CallMethod(dma_state.method, argument,
155 dma_state.is_last_call);
156 }
148} 157}
149 158
150void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { 159void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
151 gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, 160 if (dma_state.method < non_puller_methods) {
152 dma_state.method_count); 161 gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
162 dma_state.method_count);
163 } else {
164 subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
165 num_methods, dma_state.method_count);
166 }
153} 167}
154 168
155} // namespace Tegra 169} // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 6cef71306..e8b714e94 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -4,11 +4,13 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
7#include <vector> 8#include <vector>
8#include <queue> 9#include <queue>
9 10
10#include "common/bit_field.h" 11#include "common/bit_field.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h"
12 14
13namespace Core { 15namespace Core {
14class System; 16class System;
@@ -69,7 +71,13 @@ public:
69 71
70 void DispatchCalls(); 72 void DispatchCalls();
71 73
74 void BindSubchannel(Tegra::Engines::EngineInterface* engine, u32 subchannel_id) {
75 subchannels[subchannel_id] = engine;
76 }
77
72private: 78private:
79 static constexpr u32 non_puller_methods = 0x40;
80 static constexpr u32 max_subchannels = 8;
73 bool Step(); 81 bool Step();
74 82
75 void SetState(const CommandHeader& command_header); 83 void SetState(const CommandHeader& command_header);
@@ -88,6 +96,7 @@ private:
88 u32 method_count; ///< Current method count 96 u32 method_count; ///< Current method count
89 u32 length_pending; ///< Large NI command length pending 97 u32 length_pending; ///< Large NI command length pending
90 bool non_incrementing; ///< Current command's NI flag 98 bool non_incrementing; ///< Current command's NI flag
99 bool is_last_call;
91 }; 100 };
92 101
93 DmaState dma_state{}; 102 DmaState dma_state{};
@@ -96,6 +105,8 @@ private:
96 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 105 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
97 bool ib_enable{true}; ///< IB mode enabled 106 bool ib_enable{true}; ///< IB mode enabled
98 107
108 std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
109
99 GPU& gpu; 110 GPU& gpu;
100 Core::System& system; 111 Core::System& system;
101}; 112};
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
new file mode 100644
index 000000000..18a9db7e6
--- /dev/null
+++ b/src/video_core/engines/engine_interface.h
@@ -0,0 +1,22 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/common_types.h"
9
10namespace Tegra::Engines {
11
12class EngineInterface {
13public:
14 /// Write the value to the register identified by method.
15 virtual void CallMethod(u32 method, u32 method_argument, bool is_last_call) = 0;
16
17 /// Write multiple values to the register identified by method.
18 virtual void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
19 u32 methods_pending) = 0;
20};
21
22} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 8a47614d2..ff10ff40d 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -12,13 +12,13 @@ namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
14 14
15void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { 15void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
16 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 16 ASSERT_MSG(method < Regs::NUM_REGS,
17 "Invalid Fermi2D register, increase the size of the Regs structure"); 17 "Invalid Fermi2D register, increase the size of the Regs structure");
18 18
19 regs.reg_array[method_call.method] = method_call.argument; 19 regs.reg_array[method] = method_argument;
20 20
21 switch (method_call.method) { 21 switch (method) {
22 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit, 22 // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
23 // so trigger on the second 32-bit write. 23 // so trigger on the second 32-bit write.
24 case FERMI2D_REG_INDEX(blit_src_y) + 1: { 24 case FERMI2D_REG_INDEX(blit_src_y) + 1: {
@@ -30,7 +30,7 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
30 30
31void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) { 31void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
32 for (std::size_t i = 0; i < amount; i++) { 32 for (std::size_t i = 0; i < amount; i++) {
33 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 33 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
34 } 34 }
35} 35}
36 36
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 939a5966d..8f37d053f 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -10,6 +10,7 @@
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/math_util.h" 12#include "common/math_util.h"
13#include "video_core/engines/engine_interface.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14 15
15namespace Tegra { 16namespace Tegra {
@@ -31,16 +32,17 @@ namespace Tegra::Engines {
31#define FERMI2D_REG_INDEX(field_name) \ 32#define FERMI2D_REG_INDEX(field_name) \
32 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) 33 (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
33 34
34class Fermi2D final { 35class Fermi2D final : public EngineInterface {
35public: 36public:
36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); 37 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
37 ~Fermi2D() = default; 38 ~Fermi2D() = default;
38 39
39 /// Write the value to the register identified by method. 40 /// Write the value to the register identified by method.
40 void CallMethod(const GPU::MethodCall& method_call); 41 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
41 42
42 /// Write multiple values to the register identified by method. 43 /// Write multiple values to the register identified by method.
43 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 44 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
45 u32 methods_pending) override;
44 46
45 enum class Origin : u32 { 47 enum class Origin : u32 {
46 Center = 0, 48 Center = 0,
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 00a12175f..f6237fc6a 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -24,20 +24,19 @@ KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterfac
24 24
25KeplerCompute::~KeplerCompute() = default; 25KeplerCompute::~KeplerCompute() = default;
26 26
27void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { 27void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
28 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 28 ASSERT_MSG(method < Regs::NUM_REGS,
29 "Invalid KeplerCompute register, increase the size of the Regs structure"); 29 "Invalid KeplerCompute register, increase the size of the Regs structure");
30 30
31 regs.reg_array[method_call.method] = method_call.argument; 31 regs.reg_array[method] = method_argument;
32 32
33 switch (method_call.method) { 33 switch (method) {
34 case KEPLER_COMPUTE_REG_INDEX(exec_upload): { 34 case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
35 upload_state.ProcessExec(regs.exec_upload.linear != 0); 35 upload_state.ProcessExec(regs.exec_upload.linear != 0);
36 break; 36 break;
37 } 37 }
38 case KEPLER_COMPUTE_REG_INDEX(data_upload): { 38 case KEPLER_COMPUTE_REG_INDEX(data_upload): {
39 const bool is_last_call = method_call.IsLastCall(); 39 upload_state.ProcessData(method_argument, is_last_call);
40 upload_state.ProcessData(method_call.argument, is_last_call);
41 if (is_last_call) { 40 if (is_last_call) {
42 system.GPU().Maxwell3D().OnMemoryWrite(); 41 system.GPU().Maxwell3D().OnMemoryWrite();
43 } 42 }
@@ -54,7 +53,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
54void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 53void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
55 u32 methods_pending) { 54 u32 methods_pending) {
56 for (std::size_t i = 0; i < amount; i++) { 55 for (std::size_t i = 0; i < amount; i++) {
57 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 56 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
58 } 57 }
59} 58}
60 59
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index fe55fdfd0..18ceedfaf 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -11,6 +11,7 @@
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h" 13#include "video_core/engines/const_buffer_engine_interface.h"
14#include "video_core/engines/engine_interface.h"
14#include "video_core/engines/engine_upload.h" 15#include "video_core/engines/engine_upload.h"
15#include "video_core/engines/shader_type.h" 16#include "video_core/engines/shader_type.h"
16#include "video_core/gpu.h" 17#include "video_core/gpu.h"
@@ -39,7 +40,7 @@ namespace Tegra::Engines {
39#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 40#define KEPLER_COMPUTE_REG_INDEX(field_name) \
40 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 41 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
41 42
42class KeplerCompute final : public ConstBufferEngineInterface { 43class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
43public: 44public:
44 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 45 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
45 MemoryManager& memory_manager); 46 MemoryManager& memory_manager);
@@ -200,10 +201,11 @@ public:
200 "KeplerCompute LaunchParams has wrong size"); 201 "KeplerCompute LaunchParams has wrong size");
201 202
202 /// Write the value to the register identified by method. 203 /// Write the value to the register identified by method.
203 void CallMethod(const GPU::MethodCall& method_call); 204 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
204 205
205 /// Write multiple values to the register identified by method. 206 /// Write multiple values to the register identified by method.
206 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 207 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
208 u32 methods_pending) override;
207 209
208 Texture::FullTextureInfo GetTexture(std::size_t offset) const; 210 Texture::FullTextureInfo GetTexture(std::size_t offset) const;
209 211
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 586ff15dc..dc71b2eec 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -19,20 +19,19 @@ KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
19 19
20KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
21 21
22void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { 22void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
23 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 23 ASSERT_MSG(method < Regs::NUM_REGS,
24 "Invalid KeplerMemory register, increase the size of the Regs structure"); 24 "Invalid KeplerMemory register, increase the size of the Regs structure");
25 25
26 regs.reg_array[method_call.method] = method_call.argument; 26 regs.reg_array[method] = method_argument;
27 27
28 switch (method_call.method) { 28 switch (method) {
29 case KEPLERMEMORY_REG_INDEX(exec): { 29 case KEPLERMEMORY_REG_INDEX(exec): {
30 upload_state.ProcessExec(regs.exec.linear != 0); 30 upload_state.ProcessExec(regs.exec.linear != 0);
31 break; 31 break;
32 } 32 }
33 case KEPLERMEMORY_REG_INDEX(data): { 33 case KEPLERMEMORY_REG_INDEX(data): {
34 const bool is_last_call = method_call.IsLastCall(); 34 upload_state.ProcessData(method_argument, is_last_call);
35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 35 if (is_last_call) {
37 system.GPU().Maxwell3D().OnMemoryWrite(); 36 system.GPU().Maxwell3D().OnMemoryWrite();
38 } 37 }
@@ -44,7 +43,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
44void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 43void KeplerMemory::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
45 u32 methods_pending) { 44 u32 methods_pending) {
46 for (std::size_t i = 0; i < amount; i++) { 45 for (std::size_t i = 0; i < amount; i++) {
47 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 46 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
48 } 47 }
49} 48}
50 49
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index bb26fb030..5b7f71a00 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h"
13#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
14#include "video_core/gpu.h" 15#include "video_core/gpu.h"
15 16
@@ -32,16 +33,17 @@ namespace Tegra::Engines {
32#define KEPLERMEMORY_REG_INDEX(field_name) \ 33#define KEPLERMEMORY_REG_INDEX(field_name) \
33 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) 34 (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32))
34 35
35class KeplerMemory final { 36class KeplerMemory final : public EngineInterface {
36public: 37public:
37 KeplerMemory(Core::System& system, MemoryManager& memory_manager); 38 KeplerMemory(Core::System& system, MemoryManager& memory_manager);
38 ~KeplerMemory(); 39 ~KeplerMemory();
39 40
40 /// Write the value to the register identified by method. 41 /// Write the value to the register identified by method.
41 void CallMethod(const GPU::MethodCall& method_call); 42 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
42 43
43 /// Write multiple values to the register identified by method. 44 /// Write multiple values to the register identified by method.
44 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 45 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
46 u32 methods_pending) override;
45 47
46 struct Regs { 48 struct Regs {
47 static constexpr size_t NUM_REGS = 0x7F; 49 static constexpr size_t NUM_REGS = 0x7F;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7db055ea0..33936e209 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -125,12 +125,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
125 } 125 }
126} 126}
127 127
128void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 128void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
129 const u32 method = method_call.method;
130
131 if (method == cb_data_state.current) { 129 if (method == cb_data_state.current) {
132 regs.reg_array[method] = method_call.argument; 130 regs.reg_array[method] = method_argument;
133 ProcessCBData(method_call.argument); 131 ProcessCBData(method_argument);
134 return; 132 return;
135 } else if (cb_data_state.current != null_cb_data) { 133 } else if (cb_data_state.current != null_cb_data) {
136 FinishCBData(); 134 FinishCBData();
@@ -153,10 +151,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
153 executing_macro = method; 151 executing_macro = method;
154 } 152 }
155 153
156 macro_params.push_back(method_call.argument); 154 macro_params.push_back(method_argument);
157 155
158 // Call the macro when there are no more parameters in the command buffer 156 // Call the macro when there are no more parameters in the command buffer
159 if (method_call.IsLastCall()) { 157 if (is_last_call) {
160 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); 158 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
161 macro_params.clear(); 159 macro_params.clear();
162 } 160 }
@@ -166,7 +164,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
166 ASSERT_MSG(method < Regs::NUM_REGS, 164 ASSERT_MSG(method < Regs::NUM_REGS,
167 "Invalid Maxwell3D register, increase the size of the Regs structure"); 165 "Invalid Maxwell3D register, increase the size of the Regs structure");
168 166
169 u32 arg = method_call.argument; 167 u32 arg = method_argument;
170 // Keep track of the register value in shadow_state when requested. 168 // Keep track of the register value in shadow_state when requested.
171 if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || 169 if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
172 shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { 170 shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
@@ -189,7 +187,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
189 break; 187 break;
190 } 188 }
191 case MAXWELL3D_REG_INDEX(shadow_ram_control): { 189 case MAXWELL3D_REG_INDEX(shadow_ram_control): {
192 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); 190 shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument);
193 break; 191 break;
194 } 192 }
195 case MAXWELL3D_REG_INDEX(macros.data): { 193 case MAXWELL3D_REG_INDEX(macros.data): {
@@ -272,7 +270,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
272 break; 270 break;
273 } 271 }
274 case MAXWELL3D_REG_INDEX(data_upload): { 272 case MAXWELL3D_REG_INDEX(data_upload): {
275 const bool is_last_call = method_call.IsLastCall();
276 upload_state.ProcessData(arg, is_last_call); 273 upload_state.ProcessData(arg, is_last_call);
277 if (is_last_call) { 274 if (is_last_call) {
278 OnMemoryWrite(); 275 OnMemoryWrite();
@@ -330,7 +327,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
330 } 327 }
331 default: { 328 default: {
332 for (std::size_t i = 0; i < amount; i++) { 329 for (std::size_t i = 0; i < amount; i++) {
333 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 330 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
334 } 331 }
335 } 332 }
336 } 333 }
@@ -360,16 +357,15 @@ void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) {
360 StepInstance(expected_mode, count); 357 StepInstance(expected_mode, count);
361} 358}
362 359
363void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { 360void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
364 const u32 method = method_call.method;
365 if (mme_inline[method]) { 361 if (mme_inline[method]) {
366 regs.reg_array[method] = method_call.argument; 362 regs.reg_array[method] = method_argument;
367 if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) || 363 if (method == MAXWELL3D_REG_INDEX(vertex_buffer.count) ||
368 method == MAXWELL3D_REG_INDEX(index_array.count)) { 364 method == MAXWELL3D_REG_INDEX(index_array.count)) {
369 const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count) 365 const MMEDrawMode expected_mode = method == MAXWELL3D_REG_INDEX(vertex_buffer.count)
370 ? MMEDrawMode::Array 366 ? MMEDrawMode::Array
371 : MMEDrawMode::Indexed; 367 : MMEDrawMode::Indexed;
372 StepInstance(expected_mode, method_call.argument); 368 StepInstance(expected_mode, method_argument);
373 } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) { 369 } else if (method == MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)) {
374 mme_draw.instance_mode = 370 mme_draw.instance_mode =
375 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0); 371 (regs.draw.instance_next != 0) || (regs.draw.instance_cont != 0);
@@ -381,7 +377,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
381 if (mme_draw.current_mode != MMEDrawMode::Undefined) { 377 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
382 FlushMMEInlineDraw(); 378 FlushMMEInlineDraw();
383 } 379 }
384 CallMethod(method_call); 380 CallMethod(method, method_argument, true);
385 } 381 }
386} 382}
387 383
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 864924ff3..1a5df05ce 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -19,6 +19,7 @@
19#include "common/math_util.h" 19#include "common/math_util.h"
20#include "video_core/engines/const_buffer_engine_interface.h" 20#include "video_core/engines/const_buffer_engine_interface.h"
21#include "video_core/engines/const_buffer_info.h" 21#include "video_core/engines/const_buffer_info.h"
22#include "video_core/engines/engine_interface.h"
22#include "video_core/engines/engine_upload.h" 23#include "video_core/engines/engine_upload.h"
23#include "video_core/engines/shader_type.h" 24#include "video_core/engines/shader_type.h"
24#include "video_core/gpu.h" 25#include "video_core/gpu.h"
@@ -48,7 +49,7 @@ namespace Tegra::Engines {
48#define MAXWELL3D_REG_INDEX(field_name) \ 49#define MAXWELL3D_REG_INDEX(field_name) \
49 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 50 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
50 51
51class Maxwell3D final : public ConstBufferEngineInterface { 52class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
52public: 53public:
53 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 54 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
54 MemoryManager& memory_manager); 55 MemoryManager& memory_manager);
@@ -1360,13 +1361,14 @@ public:
1360 u32 GetRegisterValue(u32 method) const; 1361 u32 GetRegisterValue(u32 method) const;
1361 1362
1362 /// Write the value to the register identified by method. 1363 /// Write the value to the register identified by method.
1363 void CallMethod(const GPU::MethodCall& method_call); 1364 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
1364 1365
1365 /// Write multiple values to the register identified by method. 1366 /// Write multiple values to the register identified by method.
1366 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 1367 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
1368 u32 methods_pending) override;
1367 1369
1368 /// Write the value to the register identified by method. 1370 /// Write the value to the register identified by method.
1369 void CallMethodFromMME(const GPU::MethodCall& method_call); 1371 void CallMethodFromMME(u32 method, u32 method_argument);
1370 1372
1371 void FlushMMEInlineDraw(); 1373 void FlushMMEInlineDraw();
1372 1374
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6630005b0..01d7df405 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -17,16 +17,16 @@ namespace Tegra::Engines {
17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager) 17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
18 : system{system}, memory_manager{memory_manager} {} 18 : system{system}, memory_manager{memory_manager} {}
19 19
20void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 20void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
21 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 21 ASSERT_MSG(method < Regs::NUM_REGS,
22 "Invalid MaxwellDMA register, increase the size of the Regs structure"); 22 "Invalid MaxwellDMA register, increase the size of the Regs structure");
23 23
24 regs.reg_array[method_call.method] = method_call.argument; 24 regs.reg_array[method] = method_argument;
25 25
26#define MAXWELLDMA_REG_INDEX(field_name) \ 26#define MAXWELLDMA_REG_INDEX(field_name) \
27 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) 27 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
28 28
29 switch (method_call.method) { 29 switch (method) {
30 case MAXWELLDMA_REG_INDEX(exec): { 30 case MAXWELLDMA_REG_INDEX(exec): {
31 HandleCopy(); 31 HandleCopy();
32 break; 32 break;
@@ -39,7 +39,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
39void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount, 39void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
40 u32 methods_pending) { 40 u32 methods_pending) {
41 for (std::size_t i = 0; i < amount; i++) { 41 for (std::size_t i = 0; i < amount; i++) {
42 CallMethod({method, base_start[i], 0, methods_pending - static_cast<u32>(i)}); 42 CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
43 } 43 }
44} 44}
45 45
@@ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() {
90 ASSERT(regs.exec.enable_2d == 1); 90 ASSERT(regs.exec.enable_2d == 1);
91 91
92 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 92 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
93
93 ASSERT(regs.src_params.BlockDepth() == 0); 94 ASSERT(regs.src_params.BlockDepth() == 0);
95 // Optimized path for micro copies.
96 if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) {
97 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
98 const std::size_t src_size = Texture::GetGOBSize();
99 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
100 u32 pos_x = regs.src_params.pos_x;
101 u32 pos_y = regs.src_params.pos_y;
102 const u64 offset =
103 Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y,
104 regs.src_params.BlockDepth(), bytes_per_pixel);
105 const u32 x_in_gob = 64 / bytes_per_pixel;
106 pos_x = pos_x % x_in_gob;
107 pos_y = pos_y % 8;
108
109 if (read_buffer.size() < src_size) {
110 read_buffer.resize(src_size);
111 }
112
113 if (write_buffer.size() < dst_size) {
114 write_buffer.resize(dst_size);
115 }
116
117 if (Settings::IsGPULevelExtreme()) {
118 memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size);
119 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
120 } else {
121 memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size);
122 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
123 }
124
125 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
126 regs.src_params.size_x, bytes_per_pixel, read_buffer.data(),
127 write_buffer.data(), regs.src_params.BlockHeight(), pos_x,
128 pos_y);
129
130 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
131
132 return;
133 }
94 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 134 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
95 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; 135 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
96 const std::size_t src_size = Texture::CalculateSize( 136 const std::size_t src_size = Texture::CalculateSize(
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index c43ed8194..502dd8509 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/engine_interface.h"
13#include "video_core/gpu.h" 14#include "video_core/gpu.h"
14 15
15namespace Core { 16namespace Core {
@@ -27,16 +28,17 @@ namespace Tegra::Engines {
27 * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml 28 * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml
28 */ 29 */
29 30
30class MaxwellDMA final { 31class MaxwellDMA final : public EngineInterface {
31public: 32public:
32 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager); 33 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
33 ~MaxwellDMA() = default; 34 ~MaxwellDMA() = default;
34 35
35 /// Write the value to the register identified by method. 36 /// Write the value to the register identified by method.
36 void CallMethod(const GPU::MethodCall& method_call); 37 void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
37 38
38 /// Write multiple values to the register identified by method. 39 /// Write multiple values to the register identified by method.
39 void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending); 40 void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
41 u32 methods_pending) override;
40 42
41 struct Regs { 43 struct Regs {
42 static constexpr std::size_t NUM_REGS = 0x1D6; 44 static constexpr std::size_t NUM_REGS = 0x1D6;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index b87fd873d..8eb017f65 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -299,19 +299,21 @@ void GPU::CallEngineMethod(const MethodCall& method_call) {
299 299
300 switch (engine) { 300 switch (engine) {
301 case EngineID::FERMI_TWOD_A: 301 case EngineID::FERMI_TWOD_A:
302 fermi_2d->CallMethod(method_call); 302 fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
303 break; 303 break;
304 case EngineID::MAXWELL_B: 304 case EngineID::MAXWELL_B:
305 maxwell_3d->CallMethod(method_call); 305 maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
306 break; 306 break;
307 case EngineID::KEPLER_COMPUTE_B: 307 case EngineID::KEPLER_COMPUTE_B:
308 kepler_compute->CallMethod(method_call); 308 kepler_compute->CallMethod(method_call.method, method_call.argument,
309 method_call.IsLastCall());
309 break; 310 break;
310 case EngineID::MAXWELL_DMA_COPY_A: 311 case EngineID::MAXWELL_DMA_COPY_A:
311 maxwell_dma->CallMethod(method_call); 312 maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
312 break; 313 break;
313 case EngineID::KEPLER_INLINE_TO_MEMORY_B: 314 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
314 kepler_memory->CallMethod(method_call); 315 kepler_memory->CallMethod(method_call.method, method_call.argument,
316 method_call.IsLastCall());
315 break; 317 break;
316 default: 318 default:
317 UNIMPLEMENTED_MSG("Unimplemented engine"); 319 UNIMPLEMENTED_MSG("Unimplemented engine");
@@ -347,7 +349,27 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) {
347 // Bind the current subchannel to the desired engine id. 349 // Bind the current subchannel to the desired engine id.
348 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, 350 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
349 method_call.argument); 351 method_call.argument);
350 bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument); 352 const auto engine_id = static_cast<EngineID>(method_call.argument);
353 bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
354 switch (engine_id) {
355 case EngineID::FERMI_TWOD_A:
356 dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
357 break;
358 case EngineID::MAXWELL_B:
359 dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
360 break;
361 case EngineID::KEPLER_COMPUTE_B:
362 dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
363 break;
364 case EngineID::MAXWELL_DMA_COPY_A:
365 dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
366 break;
367 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
368 dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
369 break;
370 default:
371 UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id));
372 }
351} 373}
352 374
353void GPU::ProcessSemaphoreTriggerMethod() { 375void GPU::ProcessSemaphoreTriggerMethod() {
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 42031d80a..947364928 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -328,7 +328,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) {
328} 328}
329 329
330void MacroInterpreter::Send(u32 value) { 330void MacroInterpreter::Send(u32 value) {
331 maxwell3d.CallMethodFromMME({method_address.address, value}); 331 maxwell3d.CallMethodFromMME(method_address.address, value);
332 // Increment the method address by the method increment. 332 // Increment the method address by the method increment.
333 method_address.address.Assign(method_address.address.Value() + 333 method_address.address.Assign(method_address.address.Value() +
334 method_address.increment.Value()); 334 method_address.increment.Value());
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index fae8638ec..548e4c3fe 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -382,4 +382,18 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
382 } 382 }
383} 383}
384 384
385u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
386 u32 bytes_per_pixel) {
387 auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
388 const u32 gobs_in_block = 1 << block_height;
389 const u32 y_blocks = gob_size_y << block_height;
390 const u32 x_per_gob = gob_size_x / bytes_per_pixel;
391 const u32 x_blocks = div_ceil(width, x_per_gob);
392 const u32 block_size = gob_size * gobs_in_block;
393 const u32 stride = block_size * x_blocks;
394 const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size;
395 const u32 relative_y = dst_y % y_blocks;
396 return base + (relative_y / gob_size_y) * gob_size;
397}
398
385} // namespace Tegra::Texture 399} // namespace Tegra::Texture
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 9f2d6d308..06f3ebf87 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -59,4 +59,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
59void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, 59void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
60 std::size_t copy_size, const u8* source_data, u8* swizzle_data); 60 std::size_t copy_size, const u8* source_data, u8* swizzle_data);
61 61
62/// Obtains the offset of the gob for positions 'dst_x' & 'dst_y'
63u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height,
64 u32 bytes_per_pixel);
65
62} // namespace Tegra::Texture 66} // namespace Tegra::Texture