summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-11-23 23:20:56 -0500
committerGravatar bunnei2018-11-26 23:14:01 -0500
commitabea6fa90c901d0b47487ed38d44511b18f0addf (patch)
tree65ba9fc6ff7609ea569ea1e3d05f91caa56ffa14 /src
parentMerge pull request #1805 from lioncash/resource (diff)
downloadyuzu-abea6fa90c901d0b47487ed38d44511b18f0addf.tar.gz
yuzu-abea6fa90c901d0b47487ed38d44511b18f0addf.tar.xz
yuzu-abea6fa90c901d0b47487ed38d44511b18f0addf.zip
gpu: Rewrite GPU command list processing with DmaPusher class.
- More accurate impl., fixes Undertale (among other games).
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp13
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/command_processor.h53
-rw-r--r--src/video_core/dma_pusher.cpp110
-rw-r--r--src/video_core/dma_pusher.h95
-rw-r--r--src/video_core/engines/fermi_2d.cpp8
-rw-r--r--src/video_core/engines/fermi_2d.h2
-rw-r--r--src/video_core/engines/kepler_memory.cpp10
-rw-r--r--src/video_core/engines/kepler_memory.h3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp53
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/maxwell_compute.cpp8
-rw-r--r--src/video_core/engines/maxwell_compute.h3
-rw-r--r--src/video_core/engines/maxwell_dma.cpp8
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/gpu.cpp58
-rw-r--r--src/video_core/gpu.h27
-rw-r--r--src/video_core/macro_interpreter.cpp2
18 files changed, 353 insertions, 108 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 874d5e1c3..39a58b685 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -8,7 +8,6 @@
8#include "core/core.h" 8#include "core/core.h"
9#include "core/hle/service/nvdrv/devices/nvhost_gpu.h" 9#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
10#include "core/memory.h" 10#include "core/memory.h"
11#include "video_core/command_processor.h"
12#include "video_core/gpu.h" 11#include "video_core/gpu.h"
13#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
14 13
@@ -129,6 +128,14 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
129 return 0; 128 return 0;
130} 129}
131 130
131static void PushGPUEntries(const std::vector<Tegra::CommandListHeader>& entries) {
132 auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()};
133 for (const auto& entry : entries) {
134 dma_pusher.Push(entry);
135 }
136 dma_pusher.DispatchCalls();
137}
138
132u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { 139u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
133 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 140 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
134 UNIMPLEMENTED(); 141 UNIMPLEMENTED();
@@ -146,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
146 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
147 params.num_entries * sizeof(Tegra::CommandListHeader)); 154 params.num_entries * sizeof(Tegra::CommandListHeader));
148 155
149 Core::System::GetInstance().GPU().ProcessCommandLists(entries); 156 PushGPUEntries(entries);
150 157
151 params.fence_out.id = 0; 158 params.fence_out.id = 0;
152 params.fence_out.value = 0; 159 params.fence_out.value = 0;
@@ -167,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
167 Memory::ReadBlock(params.address, entries.data(), 174 Memory::ReadBlock(params.address, entries.data(),
168 params.num_entries * sizeof(Tegra::CommandListHeader)); 175 params.num_entries * sizeof(Tegra::CommandListHeader));
169 176
170 Core::System::GetInstance().GPU().ProcessCommandLists(entries); 177 PushGPUEntries(entries);
171 178
172 params.fence_out.id = 0; 179 params.fence_out.id = 0;
173 params.fence_out.value = 0; 180 params.fence_out.value = 0;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3f906a517..0406fbcd9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,6 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 command_processor.cpp 2 dma_pusher.cpp
3 command_processor.h 3 dma_pusher.h
4 debug_utils/debug_utils.cpp 4 debug_utils/debug_utils.cpp
5 debug_utils/debug_utils.h 5 debug_utils/debug_utils.h
6 engines/fermi_2d.cpp 6 engines/fermi_2d.cpp
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
deleted file mode 100644
index bd766e77a..000000000
--- a/src/video_core/command_processor.h
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "video_core/memory_manager.h"
11
12namespace Tegra {
13
14enum class SubmissionMode : u32 {
15 IncreasingOld = 0,
16 Increasing = 1,
17 NonIncreasingOld = 2,
18 NonIncreasing = 3,
19 Inline = 4,
20 IncreaseOnce = 5
21};
22
23struct CommandListHeader {
24 u32 entry0; // gpu_va_lo
25 union {
26 u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
27 BitField<0, 8, u32> gpu_va_hi;
28 BitField<8, 2, u32> unk1;
29 BitField<10, 21, u32> sz;
30 BitField<31, 1, u32> unk2;
31 };
32
33 GPUVAddr Address() const {
34 return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
35 }
36};
37static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
38
39union CommandHeader {
40 u32 hex;
41
42 BitField<0, 13, u32> method;
43 BitField<13, 3, u32> subchannel;
44
45 BitField<16, 13, u32> arg_count;
46 BitField<16, 13, u32> inline_data;
47
48 BitField<29, 3, SubmissionMode> mode;
49};
50static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
51static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
52
53} // namespace Tegra
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
new file mode 100644
index 000000000..9f85a7aca
--- /dev/null
+++ b/src/video_core/dma_pusher.cpp
@@ -0,0 +1,110 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/memory.h"
7#include "video_core/dma_pusher.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/gpu.h"
10
11namespace Tegra {
12
13DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {}
14
15DmaPusher::~DmaPusher() = default;
16
17void DmaPusher::DispatchCalls() {
18 // On entering GPU code, assume all memory may be touched by the ARM core.
19 gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
20
21 while (Core::System::GetInstance().IsPoweredOn()) {
22 if (!Step()) {
23 break;
24 }
25 }
26}
27
28bool DmaPusher::Step() {
29 if (dma_get != dma_put) {
30 // Push buffer non-empty, read a word
31 const CommandHeader command_header{
32 Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))};
33
34 dma_get += sizeof(u32);
35
36 if (!non_main) {
37 dma_mget = dma_get;
38 }
39
40 // now, see if we're in the middle of a command
41 if (dma_state.length_pending) {
42 // Second word of long non-inc methods command - method count
43 dma_state.length_pending = 0;
44 dma_state.method_count = command_header.method_count_;
45 } else if (dma_state.method_count) {
46 // Data word of methods command
47 CallMethod(command_header.argument);
48
49 if (!dma_state.non_incrementing) {
50 dma_state.method++;
51 }
52
53 if (dma_increment_once) {
54 dma_state.non_incrementing = true;
55 }
56
57 dma_state.method_count--;
58 } else {
59 // No command active - this is the first word of a new one
60 switch (command_header.mode) {
61 case SubmissionMode::Increasing:
62 SetState(command_header);
63 dma_state.non_incrementing = false;
64 dma_increment_once = false;
65 break;
66 case SubmissionMode::NonIncreasing:
67 SetState(command_header);
68 dma_state.non_incrementing = true;
69 dma_increment_once = false;
70 break;
71 case SubmissionMode::Inline:
72 dma_state.method = command_header.method;
73 dma_state.subchannel = command_header.subchannel;
74 CallMethod(command_header.arg_count);
75 dma_state.non_incrementing = true;
76 dma_increment_once = false;
77 break;
78 case SubmissionMode::IncreaseOnce:
79 SetState(command_header);
80 dma_state.non_incrementing = false;
81 dma_increment_once = true;
82 break;
83 }
84 }
85 } else if (ib_enable && !dma_pushbuffer.empty()) {
86 // Current pushbuffer empty, but we have more IB entries to read
87 const CommandListHeader& command_list_header{dma_pushbuffer.front()};
88 dma_get = command_list_header.addr;
89 dma_put = dma_get + command_list_header.size * sizeof(u32);
90 non_main = command_list_header.is_non_main;
91 dma_pushbuffer.pop();
92 } else {
93 // Otherwise, pushbuffer empty and IB empty or nonexistent - nothing to do
94 return {};
95 }
96
97 return true;
98}
99
100void DmaPusher::SetState(const CommandHeader& command_header) {
101 dma_state.method = command_header.method;
102 dma_state.subchannel = command_header.subchannel;
103 dma_state.method_count = command_header.method_count;
104}
105
106void DmaPusher::CallMethod(u32 argument) const {
107 gpu.CallMethod({dma_state.method, argument, dma_state.subchannel, dma_state.method_count});
108}
109
110} // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
new file mode 100644
index 000000000..39d98e46e
--- /dev/null
+++ b/src/video_core/dma_pusher.h
@@ -0,0 +1,95 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <queue>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11#include "video_core/memory_manager.h"
12
13namespace Tegra {
14
15enum class SubmissionMode : u32 {
16 IncreasingOld = 0,
17 Increasing = 1,
18 NonIncreasingOld = 2,
19 NonIncreasing = 3,
20 Inline = 4,
21 IncreaseOnce = 5
22};
23
24struct CommandListHeader {
25 union {
26 u64 raw;
27 BitField<0, 40, GPUVAddr> addr;
28 BitField<41, 1, u64> is_non_main;
29 BitField<42, 21, u64> size;
30 };
31};
32static_assert(sizeof(CommandListHeader) == sizeof(u64), "CommandListHeader is incorrect size");
33
34union CommandHeader {
35 u32 argument;
36 BitField<0, 13, u32> method;
37 BitField<0, 24, u32> method_count_;
38 BitField<13, 3, u32> subchannel;
39 BitField<16, 13, u32> arg_count;
40 BitField<16, 13, u32> method_count;
41 BitField<29, 3, SubmissionMode> mode;
42};
43static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout");
44static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!");
45
46class GPU;
47
48/**
49 * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
50 * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
51 * into a "command stream" consisting of 32-bit words that make up "commands".
52 * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
53 * details on this implementation.
54 */
55class DmaPusher {
56public:
57 explicit DmaPusher(GPU& gpu);
58 ~DmaPusher();
59
60 void Push(const CommandListHeader& command_list_header) {
61 dma_pushbuffer.push(command_list_header);
62 }
63
64 void DispatchCalls();
65
66private:
67 bool Step();
68
69 void SetState(const CommandHeader& command_header);
70
71 void CallMethod(u32 argument) const;
72
73 GPU& gpu;
74
75 std::queue<CommandListHeader> dma_pushbuffer;
76
77 struct DmaState {
78 u32 method; ///< Current method
79 u32 subchannel; ///< Current subchannel
80 u32 method_count; ///< Current method count
81 u32 length_pending; ///< Large NI command length pending
82 bool non_incrementing; ///< Current command’s NI flag
83 };
84
85 DmaState dma_state{};
86 bool dma_increment_once{};
87
88 GPUVAddr dma_put{}; ///< pushbuffer current end address
89 GPUVAddr dma_get{}; ///< pushbuffer current read address
90 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
91 bool ib_enable{true}; ///< IB mode enabled
92 bool non_main{}; ///< non-main pushbuffer active
93};
94
95} // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 8d0700d13..dbea5bb5e 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -14,13 +14,13 @@ namespace Tegra::Engines {
14Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 14Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 15 : memory_manager(memory_manager), rasterizer{rasterizer} {}
16 16
17void Fermi2D::WriteReg(u32 method, u32 value) { 17void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method < Regs::NUM_REGS, 18 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
19 "Invalid Fermi2D register, increase the size of the Regs structure"); 19 "Invalid Fermi2D register, increase the size of the Regs structure");
20 20
21 regs.reg_array[method] = value; 21 regs.reg_array[method_call.method] = method_call.argument;
22 22
23 switch (method) { 23 switch (method_call.method) {
24 case FERMI2D_REG_INDEX(trigger): { 24 case FERMI2D_REG_INDEX(trigger): {
25 HandleSurfaceCopy(); 25 HandleSurfaceCopy();
26 break; 26 break;
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 2a6e8bbbb..50009bf75 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -27,7 +27,7 @@ public:
27 ~Fermi2D() = default; 27 ~Fermi2D() = default;
28 28
29 /// Write the value to the register identified by method. 29 /// Write the value to the register identified by method.
30 void WriteReg(u32 method, u32 value); 30 void CallMethod(const GPU::MethodCall& method_call);
31 31
32 struct Regs { 32 struct Regs {
33 static constexpr std::size_t NUM_REGS = 0x258; 33 static constexpr std::size_t NUM_REGS = 0x258;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 2adbc9eaf..4880191fc 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -17,19 +17,19 @@ KeplerMemory::KeplerMemory(VideoCore::RasterizerInterface& rasterizer,
17 17
18KeplerMemory::~KeplerMemory() = default; 18KeplerMemory::~KeplerMemory() = default;
19 19
20void KeplerMemory::WriteReg(u32 method, u32 value) { 20void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
21 ASSERT_MSG(method < Regs::NUM_REGS, 21 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
22 "Invalid KeplerMemory register, increase the size of the Regs structure"); 22 "Invalid KeplerMemory register, increase the size of the Regs structure");
23 23
24 regs.reg_array[method] = value; 24 regs.reg_array[method_call.method] = method_call.argument;
25 25
26 switch (method) { 26 switch (method_call.method) {
27 case KEPLERMEMORY_REG_INDEX(exec): { 27 case KEPLERMEMORY_REG_INDEX(exec): {
28 state.write_offset = 0; 28 state.write_offset = 0;
29 break; 29 break;
30 } 30 }
31 case KEPLERMEMORY_REG_INDEX(data): { 31 case KEPLERMEMORY_REG_INDEX(data): {
32 ProcessData(value); 32 ProcessData(method_call.argument);
33 break; 33 break;
34 } 34 }
35 } 35 }
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index bf4a13cff..fe9ebc5b9 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -9,6 +9,7 @@
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
13 14
14namespace VideoCore { 15namespace VideoCore {
@@ -26,7 +27,7 @@ public:
26 ~KeplerMemory(); 27 ~KeplerMemory();
27 28
28 /// Write the value to the register identified by method. 29 /// Write the value to the register identified by method.
29 void WriteReg(u32 method, u32 value); 30 void CallMethod(const GPU::MethodCall& method_call);
30 31
31 struct Regs { 32 struct Regs {
32 static constexpr size_t NUM_REGS = 0x7F; 33 static constexpr size_t NUM_REGS = 0x7F;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index f0a5470b9..b19b3a75a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -97,71 +97,74 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
97 macro_interpreter.Execute(search->second, std::move(parameters)); 97 macro_interpreter.Execute(search->second, std::move(parameters));
98} 98}
99 99
100void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { 100void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
101 auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); 101 auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
102 102
103 // It is an error to write to a register other than the current macro's ARG register before it 103 // It is an error to write to a register other than the current macro's ARG register before it
104 // has finished execution. 104 // has finished execution.
105 if (executing_macro != 0) { 105 if (executing_macro != 0) {
106 ASSERT(method == executing_macro + 1); 106 ASSERT(method_call.method == executing_macro + 1);
107 } 107 }
108 108
109 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 109 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
110 // uploaded to the GPU during initialization. 110 // uploaded to the GPU during initialization.
111 if (method >= MacroRegistersStart) { 111 if (method_call.method >= MacroRegistersStart) {
112 // We're trying to execute a macro 112 // We're trying to execute a macro
113 if (executing_macro == 0) { 113 if (executing_macro == 0) {
114 // A macro call must begin by writing the macro method's register, not its argument. 114 // A macro call must begin by writing the macro method's register, not its argument.
115 ASSERT_MSG((method % 2) == 0, 115 ASSERT_MSG((method_call.method % 2) == 0,
116 "Can't start macro execution by writing to the ARGS register"); 116 "Can't start macro execution by writing to the ARGS register");
117 executing_macro = method; 117 executing_macro = method_call.method;
118 } 118 }
119 119
120 macro_params.push_back(value); 120 macro_params.push_back(method_call.argument);
121 121
122 // Call the macro when there are no more parameters in the command buffer 122 // Call the macro when there are no more parameters in the command buffer
123 if (remaining_params == 0) { 123 if (method_call.IsLastCall()) {
124 CallMacroMethod(executing_macro, std::move(macro_params)); 124 CallMacroMethod(executing_macro, std::move(macro_params));
125 } 125 }
126 return; 126 return;
127 } 127 }
128 128
129 ASSERT_MSG(method < Regs::NUM_REGS, 129 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
130 "Invalid Maxwell3D register, increase the size of the Regs structure"); 130 "Invalid Maxwell3D register, increase the size of the Regs structure");
131 131
132 if (debug_context) { 132 if (debug_context) {
133 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 133 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
134 } 134 }
135 135
136 if (regs.reg_array[method] != value) { 136 if (regs.reg_array[method_call.method] != method_call.argument) {
137 regs.reg_array[method] = value; 137 regs.reg_array[method_call.method] = method_call.argument;
138 // Vertex format 138 // Vertex format
139 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 139 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
140 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { 140 method_call.method <
141 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
141 dirty_flags.vertex_attrib_format = true; 142 dirty_flags.vertex_attrib_format = true;
142 } 143 }
143 144
144 // Vertex buffer 145 // Vertex buffer
145 if (method >= MAXWELL3D_REG_INDEX(vertex_array) && 146 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) &&
146 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 147 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
147 dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
148 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
149 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
150 dirty_flags.vertex_array |= 148 dirty_flags.vertex_array |=
151 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 149 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
152 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 150 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
153 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { 151 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
154 dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays)); 152 dirty_flags.vertex_array |=
153 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
154 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
155 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
156 dirty_flags.vertex_array |=
157 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
155 } 158 }
156 } 159 }
157 160
158 switch (method) { 161 switch (method_call.method) {
159 case MAXWELL3D_REG_INDEX(macros.data): { 162 case MAXWELL3D_REG_INDEX(macros.data): {
160 ProcessMacroUpload(value); 163 ProcessMacroUpload(method_call.argument);
161 break; 164 break;
162 } 165 }
163 case MAXWELL3D_REG_INDEX(macros.bind): { 166 case MAXWELL3D_REG_INDEX(macros.bind): {
164 ProcessMacroBind(value); 167 ProcessMacroBind(method_call.argument);
165 break; 168 break;
166 } 169 }
167 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): 170 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
@@ -180,7 +183,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
180 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 183 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
181 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 184 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
182 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 185 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
183 ProcessCBData(value); 186 ProcessCBData(method_call.argument);
184 break; 187 break;
185 } 188 }
186 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 189 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 9324d9710..84471f181 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1080,7 +1080,7 @@ public:
1080 u32 GetRegisterValue(u32 method) const; 1080 u32 GetRegisterValue(u32 method) const;
1081 1081
1082 /// Write the value to the register identified by method. 1082 /// Write the value to the register identified by method.
1083 void WriteReg(u32 method, u32 value, u32 remaining_params); 1083 void CallMethod(const GPU::MethodCall& method_call);
1084 1084
1085 /// Returns a list of enabled textures for the specified shader stage. 1085 /// Returns a list of enabled textures for the specified shader stage.
1086 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; 1086 std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
diff --git a/src/video_core/engines/maxwell_compute.cpp b/src/video_core/engines/maxwell_compute.cpp
index 8b5f08351..656db6a61 100644
--- a/src/video_core/engines/maxwell_compute.cpp
+++ b/src/video_core/engines/maxwell_compute.cpp
@@ -8,13 +8,13 @@
8 8
9namespace Tegra::Engines { 9namespace Tegra::Engines {
10 10
11void MaxwellCompute::WriteReg(u32 method, u32 value) { 11void MaxwellCompute::CallMethod(const GPU::MethodCall& method_call) {
12 ASSERT_MSG(method < Regs::NUM_REGS, 12 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
13 "Invalid MaxwellCompute register, increase the size of the Regs structure"); 13 "Invalid MaxwellCompute register, increase the size of the Regs structure");
14 14
15 regs.reg_array[method] = value; 15 regs.reg_array[method_call.method] = method_call.argument;
16 16
17 switch (method) { 17 switch (method_call.method) {
18 case MAXWELL_COMPUTE_REG_INDEX(compute): { 18 case MAXWELL_COMPUTE_REG_INDEX(compute): {
19 LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented"); 19 LOG_CRITICAL(HW_GPU, "Compute shaders are not implemented");
20 UNREACHABLE(); 20 UNREACHABLE();
diff --git a/src/video_core/engines/maxwell_compute.h b/src/video_core/engines/maxwell_compute.h
index 6ea934fb9..1d71f11bd 100644
--- a/src/video_core/engines/maxwell_compute.h
+++ b/src/video_core/engines/maxwell_compute.h
@@ -9,6 +9,7 @@
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_funcs.h" 10#include "common/common_funcs.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/gpu.h"
12 13
13namespace Tegra::Engines { 14namespace Tegra::Engines {
14 15
@@ -42,7 +43,7 @@ public:
42 "MaxwellCompute Regs has wrong size"); 43 "MaxwellCompute Regs has wrong size");
43 44
44 /// Write the value to the register identified by method. 45 /// Write the value to the register identified by method.
45 void WriteReg(u32 method, u32 value); 46 void CallMethod(const GPU::MethodCall& method_call);
46}; 47};
47 48
48#define ASSERT_REG_POSITION(field_name, position) \ 49#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a34e884fe..06462f570 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -14,16 +14,16 @@ namespace Tegra::Engines {
14MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 14MaxwellDMA::MaxwellDMA(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
15 : memory_manager(memory_manager), rasterizer{rasterizer} {} 15 : memory_manager(memory_manager), rasterizer{rasterizer} {}
16 16
17void MaxwellDMA::WriteReg(u32 method, u32 value) { 17void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
18 ASSERT_MSG(method < Regs::NUM_REGS, 18 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
19 "Invalid MaxwellDMA register, increase the size of the Regs structure"); 19 "Invalid MaxwellDMA register, increase the size of the Regs structure");
20 20
21 regs.reg_array[method] = value; 21 regs.reg_array[method_call.method] = method_call.argument;
22 22
23#define MAXWELLDMA_REG_INDEX(field_name) \ 23#define MAXWELLDMA_REG_INDEX(field_name) \
24 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32)) 24 (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
25 25
26 switch (method) { 26 switch (method_call.method) {
27 case MAXWELLDMA_REG_INDEX(exec): { 27 case MAXWELLDMA_REG_INDEX(exec): {
28 HandleCopy(); 28 HandleCopy();
29 break; 29 break;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 5f3704f05..1f8cd65d2 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -24,7 +24,7 @@ public:
24 ~MaxwellDMA() = default; 24 ~MaxwellDMA() = default;
25 25
26 /// Write the value to the register identified by method. 26 /// Write the value to the register identified by method.
27 void WriteReg(u32 method, u32 value); 27 void CallMethod(const GPU::MethodCall& method_call);
28 28
29 struct Regs { 29 struct Regs {
30 static constexpr std::size_t NUM_REGS = 0x1D6; 30 static constexpr std::size_t NUM_REGS = 0x1D6;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 51b3904f6..4a96530ae 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h"
6#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
7#include "video_core/engines/kepler_memory.h" 8#include "video_core/engines/kepler_memory.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
@@ -26,6 +27,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
26 27
27GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { 28GPU::GPU(VideoCore::RasterizerInterface& rasterizer) {
28 memory_manager = std::make_unique<Tegra::MemoryManager>(); 29 memory_manager = std::make_unique<Tegra::MemoryManager>();
30 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
29 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); 31 maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager);
30 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 32 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
31 maxwell_compute = std::make_unique<Engines::MaxwellCompute>(); 33 maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
@@ -51,6 +53,14 @@ const MemoryManager& GPU::MemoryManager() const {
51 return *memory_manager; 53 return *memory_manager;
52} 54}
53 55
56DmaPusher& GPU::DmaPusher() {
57 return *dma_pusher;
58}
59
60const DmaPusher& GPU::DmaPusher() const {
61 return *dma_pusher;
62}
63
54u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { 64u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
55 ASSERT(format != RenderTargetFormat::NONE); 65 ASSERT(format != RenderTargetFormat::NONE);
56 66
@@ -113,4 +123,52 @@ u32 DepthFormatBytesPerPixel(DepthFormat format) {
113 } 123 }
114} 124}
115 125
126enum class BufferMethods {
127 BindObject = 0,
128 CountBufferMethods = 0x40,
129};
130
131MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
132
133void GPU::CallMethod(const MethodCall& method_call) {
134 MICROPROFILE_SCOPE(ProcessCommandLists);
135
136 LOG_TRACE(HW_GPU,
137 "Processing method {:08X} on subchannel {} value "
138 "{:08X} remaining params {}",
139 MethCall.method, MethCall.subchannel, value, remaining_params);
140
141 ASSERT(method_call.subchannel < bound_engines.size());
142
143 if (method_call.method == static_cast<u32>(BufferMethods::BindObject)) {
144 // Bind the current subchannel to the desired engine id.
145 LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
146 method_call.argument);
147 bound_engines[method_call.subchannel] = static_cast<EngineID>(method_call.argument);
148 return;
149 }
150
151 const EngineID engine = bound_engines[method_call.subchannel];
152
153 switch (engine) {
154 case EngineID::FERMI_TWOD_A:
155 fermi_2d->CallMethod(method_call);
156 break;
157 case EngineID::MAXWELL_B:
158 maxwell_3d->CallMethod(method_call);
159 break;
160 case EngineID::MAXWELL_COMPUTE_B:
161 maxwell_compute->CallMethod(method_call);
162 break;
163 case EngineID::MAXWELL_DMA_COPY_A:
164 maxwell_dma->CallMethod(method_call);
165 break;
166 case EngineID::KEPLER_INLINE_TO_MEMORY_B:
167 kepler_memory->CallMethod(method_call);
168 break;
169 default:
170 UNIMPLEMENTED_MSG("Unimplemented engine");
171 }
172}
173
116} // namespace Tegra 174} // namespace Tegra
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 5cc1e19ca..af5ccd1e9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -9,6 +9,7 @@
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/service/nvflinger/buffer_queue.h" 11#include "core/hle/service/nvflinger/buffer_queue.h"
12#include "video_core/dma_pusher.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
13 14
14namespace VideoCore { 15namespace VideoCore {
@@ -119,8 +120,23 @@ public:
119 explicit GPU(VideoCore::RasterizerInterface& rasterizer); 120 explicit GPU(VideoCore::RasterizerInterface& rasterizer);
120 ~GPU(); 121 ~GPU();
121 122
122 /// Processes a command list stored at the specified address in GPU memory. 123 struct MethodCall {
123 void ProcessCommandLists(const std::vector<CommandListHeader>& commands); 124 u32 method{};
125 u32 argument{};
126 u32 subchannel{};
127 u32 method_count{};
128
129 bool IsLastCall() const {
130 return method_count <= 1;
131 }
132
133 MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0)
134 : method(method), argument(argument), subchannel(subchannel),
135 method_count(method_count) {}
136 };
137
138 /// Calls a GPU method.
139 void CallMethod(const MethodCall& method_call);
124 140
125 /// Returns a reference to the Maxwell3D GPU engine. 141 /// Returns a reference to the Maxwell3D GPU engine.
126 Engines::Maxwell3D& Maxwell3D(); 142 Engines::Maxwell3D& Maxwell3D();
@@ -134,7 +150,14 @@ public:
134 /// Returns a const reference to the GPU memory manager. 150 /// Returns a const reference to the GPU memory manager.
135 const Tegra::MemoryManager& MemoryManager() const; 151 const Tegra::MemoryManager& MemoryManager() const;
136 152
153 /// Returns a reference to the GPU DMA pusher.
154 Tegra::DmaPusher& DmaPusher();
155
156 /// Returns a const reference to the GPU DMA pusher.
157 const Tegra::DmaPusher& DmaPusher() const;
158
137private: 159private:
160 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
138 std::unique_ptr<Tegra::MemoryManager> memory_manager; 161 std::unique_ptr<Tegra::MemoryManager> memory_manager;
139 162
140 /// Mapping of command subchannels to their bound engine ids. 163 /// Mapping of command subchannels to their bound engine ids.
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 2b0dea5cd..9c55e9f1e 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -250,7 +250,7 @@ void MacroInterpreter::SetMethodAddress(u32 address) {
250} 250}
251 251
252void MacroInterpreter::Send(u32 value) { 252void MacroInterpreter::Send(u32 value) {
253 maxwell3d.WriteReg(method_address.address, value, 0); 253 maxwell3d.CallMethod({method_address.address, value});
254 // Increment the method address by the method increment. 254 // Increment the method address by the method increment.
255 method_address.address.Assign(method_address.address.Value() + 255 method_address.address.Assign(method_address.address.Value() +
256 method_address.increment.Value()); 256 method_address.increment.Value());