summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Subv2018-03-28 15:14:47 -0500
committerGravatar Subv2018-04-01 12:07:26 -0500
commit1ec8d2123d3cca013178827e059641f8989b5af4 (patch)
tree697717ce5175807e6f56611aee8eef9fd2a11ec3 /src
parentMerge pull request #286 from N00byKing/citratoyuzuagain (diff)
downloadyuzu-1ec8d2123d3cca013178827e059641f8989b5af4.tar.gz
yuzu-1ec8d2123d3cca013178827e059641f8989b5af4.tar.xz
yuzu-1ec8d2123d3cca013178827e059641f8989b5af4.zip
GPU: Implemented a gpu macro interpreter.
The Ryujinx macro interpreter and envydis were used as reference. Macros are programs that are uploaded by the games during boot and can later be called by writing to their method id in a GPU command buffer.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/macro_interpreter.cpp257
-rw-r--r--src/video_core/macro_interpreter.h164
5 files changed, 431 insertions, 0 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 841f27d7f..a710c4bc5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -11,6 +11,8 @@ add_library(video_core STATIC
11 engines/maxwell_compute.h 11 engines/maxwell_compute.h
12 gpu.cpp 12 gpu.cpp
13 gpu.h 13 gpu.h
14 macro_interpreter.cpp
15 macro_interpreter.h
14 memory_manager.cpp 16 memory_manager.cpp
15 memory_manager.h 17 memory_manager.h
16 rasterizer_interface.h 18 rasterizer_interface.h
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 5359d21a2..85255d0a5 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -386,5 +386,10 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
386 return textures; 386 return textures;
387} 387}
388 388
389u32 Maxwell3D::GetRegisterValue(u32 method) const {
390 ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
391 return regs.reg_array[method];
392}
393
389} // namespace Engines 394} // namespace Engines
390} // namespace Tegra 395} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 3066bc606..086ffeb6a 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -514,6 +514,9 @@ public:
514 514
515 State state{}; 515 State state{};
516 516
517 /// Reads a register value located at the input method address
518 u32 GetRegisterValue(u32 method) const;
519
517 /// Write the value to the register identified by method. 520 /// Write the value to the register identified by method.
518 void WriteReg(u32 method, u32 value, u32 remaining_params); 521 void WriteReg(u32 method, u32 value, u32 remaining_params);
519 522
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
new file mode 100644
index 000000000..993a67746
--- /dev/null
+++ b/src/video_core/macro_interpreter.cpp
@@ -0,0 +1,257 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro_interpreter.h"
9
10namespace Tegra {
11
12MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
13
14void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
15 Reset();
16 registers[1] = parameters[0];
17 this->parameters = std::move(parameters);
18
19 // Execute the code until we hit an exit condition.
20 bool keep_executing = true;
21 while (keep_executing) {
22 keep_executing = Step(code, false);
23 }
24
25 // Assert the the macro used all the input parameters
26 ASSERT(next_parameter_index == this->parameters.size());
27}
28
29void MacroInterpreter::Reset() {
30 registers = {};
31 pc = 0;
32 delayed_pc = boost::none;
33 method_address.raw = 0;
34 parameters.clear();
35 // The next parameter index starts at 1, because $r1 already has the value of the first
36 // parameter.
37 next_parameter_index = 1;
38}
39
40bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
41 u32 base_address = pc;
42
43 Opcode opcode = GetOpcode(code);
44 pc += 4;
45
46 // Update the program counter if we were delayed
47 if (delayed_pc != boost::none) {
48 ASSERT(is_delay_slot);
49 pc = *delayed_pc;
50 delayed_pc = boost::none;
51 }
52
53 switch (opcode.operation) {
54 case Operation::ALU: {
55 u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
56 GetRegister(opcode.src_b));
57 ProcessResult(opcode.result_operation, opcode.dst, result);
58 break;
59 }
60 case Operation::AddImmediate: {
61 ProcessResult(opcode.result_operation, opcode.dst,
62 GetRegister(opcode.src_a) + opcode.immediate);
63 break;
64 }
65 case Operation::ExtractInsert: {
66 u32 dst = GetRegister(opcode.src_a);
67 u32 src = GetRegister(opcode.src_b);
68
69 src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
70 dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
71 dst |= src << opcode.bf_dst_bit;
72 ProcessResult(opcode.result_operation, opcode.dst, dst);
73 break;
74 }
75 case Operation::ExtractShiftLeftImmediate: {
76 u32 dst = GetRegister(opcode.src_a);
77 u32 src = GetRegister(opcode.src_b);
78
79 u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
80
81 ProcessResult(opcode.result_operation, opcode.dst, result);
82 break;
83 }
84 case Operation::ExtractShiftLeftRegister: {
85 u32 dst = GetRegister(opcode.src_a);
86 u32 src = GetRegister(opcode.src_b);
87
88 u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
89
90 ProcessResult(opcode.result_operation, opcode.dst, result);
91 break;
92 }
93 case Operation::Read: {
94 u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
95 ProcessResult(opcode.result_operation, opcode.dst, result);
96 break;
97 }
98 case Operation::Branch: {
99 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
100 u32 value = GetRegister(opcode.src_a);
101 bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
102 if (taken) {
103 // Ignore the delay slot if the branch has the annul bit.
104 if (opcode.branch_annul) {
105 pc = base_address + (opcode.immediate << 2);
106 return true;
107 }
108
109 delayed_pc = base_address + (opcode.immediate << 2);
110 // Execute one more instruction due to the delay slot.
111 return Step(code, true);
112 }
113 break;
114 }
115 default:
116 UNIMPLEMENTED_MSG("Unimplemented macro operation %u",
117 static_cast<u32>(opcode.operation.Value()));
118 }
119
120 if (opcode.is_exit) {
121 // Exit has a delay slot, execute the next instruction
122 // Note: Executing an exit during a branch delay slot will cause the instruction at the
123 // branch target to be executed before exiting.
124 Step(code, true);
125 return false;
126 }
127
128 return true;
129}
130
131MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
132 ASSERT((pc % sizeof(u32)) == 0);
133 ASSERT(pc < code.size() * sizeof(u32));
134 return {code[pc / sizeof(u32)]};
135}
136
137u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
138 switch (operation) {
139 case ALUOperation::Add:
140 return src_a + src_b;
141 // TODO(Subv): Implement AddWithCarry
142 case ALUOperation::Subtract:
143 return src_a - src_b;
144 // TODO(Subv): Implement SubtractWithBorrow
145 case ALUOperation::Xor:
146 return src_a ^ src_b;
147 case ALUOperation::Or:
148 return src_a | src_b;
149 case ALUOperation::And:
150 return src_a & src_b;
151 case ALUOperation::AndNot:
152 return src_a & ~src_b;
153 case ALUOperation::Nand:
154 return ~(src_a & src_b);
155
156 default:
157 UNIMPLEMENTED_MSG("Unimplemented ALU operation %u", static_cast<u32>(operation));
158 }
159}
160
161void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) {
162 switch (operation) {
163 case ResultOperation::IgnoreAndFetch:
164 // Fetch parameter and ignore result.
165 SetRegister(reg, FetchParameter());
166 break;
167 case ResultOperation::Move:
168 // Move result.
169 SetRegister(reg, result);
170 break;
171 case ResultOperation::MoveAndSetMethod:
172 // Move result and use as Method Address.
173 SetRegister(reg, result);
174 SetMethodAddress(result);
175 break;
176 case ResultOperation::FetchAndSend:
177 // Fetch parameter and send result.
178 SetRegister(reg, FetchParameter());
179 Send(result);
180 break;
181 case ResultOperation::MoveAndSend:
182 // Move and send result.
183 SetRegister(reg, result);
184 Send(result);
185 break;
186 case ResultOperation::FetchAndSetMethod:
187 // Fetch parameter and use result as Method Address.
188 SetRegister(reg, FetchParameter());
189 SetMethodAddress(result);
190 break;
191 case ResultOperation::MoveAndSetMethodFetchAndSend:
192 // Move result and use as Method Address, then fetch and send parameter.
193 SetRegister(reg, result);
194 SetMethodAddress(result);
195 Send(FetchParameter());
196 break;
197 case ResultOperation::MoveAndSetMethodSend:
198 // Move result and use as Method Address, then send bits 12:17 of result.
199 SetRegister(reg, result);
200 SetMethodAddress(result);
201 Send((result >> 12) & 0b111111);
202 break;
203 default:
204 UNIMPLEMENTED_MSG("Unimplemented result operation %u", static_cast<u32>(operation));
205 }
206}
207
208u32 MacroInterpreter::FetchParameter() {
209 ASSERT(next_parameter_index < parameters.size());
210 return parameters[next_parameter_index++];
211}
212
213u32 MacroInterpreter::GetRegister(u32 register_id) const {
214 // Register 0 is supposed to always return 0.
215 if (register_id == 0)
216 return 0;
217
218 ASSERT(register_id < registers.size());
219 return registers[register_id];
220}
221
222void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
223 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
224 // register.
225 if (register_id == 0)
226 return;
227
228 ASSERT(register_id < registers.size());
229 registers[register_id] = value;
230}
231
232void MacroInterpreter::SetMethodAddress(u32 address) {
233 method_address.raw = address;
234}
235
236void MacroInterpreter::Send(u32 value) {
237 maxwell3d.WriteReg(method_address.address, value, 0);
238 // Increment the method address by the method increment.
239 method_address.address.Assign(method_address.address.Value() +
240 method_address.increment.Value());
241}
242
243u32 MacroInterpreter::Read(u32 method) const {
244 return maxwell3d.GetRegisterValue(method);
245}
246
247bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const {
248 switch (cond) {
249 case BranchCondition::Zero:
250 return value == 0;
251 case BranchCondition::NotZero:
252 return value != 0;
253 }
254 UNREACHABLE();
255}
256
257} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
new file mode 100644
index 000000000..a71e359d8
--- /dev/null
+++ b/src/video_core/macro_interpreter.h
@@ -0,0 +1,164 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <vector>
9#include <boost/optional.hpp>
10#include "common/bit_field.h"
11#include "common/common_types.h"
12
13namespace Tegra {
14namespace Engines {
15class Maxwell3D;
16}
17
18class MacroInterpreter final {
19public:
20 explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
21
22 /**
23 * Executes the macro code with the specified input parameters.
24 * @param code The macro byte code to execute
25 * @param parameters The parameters of the macro
26 */
27 void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
28
29private:
30 enum class Operation : u32 {
31 ALU = 0,
32 AddImmediate = 1,
33 ExtractInsert = 2,
34 ExtractShiftLeftImmediate = 3,
35 ExtractShiftLeftRegister = 4,
36 Read = 5,
37 Unused = 6, // This operation doesn't seem to be a valid encoding.
38 Branch = 7,
39 };
40
41 enum class ALUOperation : u32 {
42 Add = 0,
43 AddWithCarry = 1,
44 Subtract = 2,
45 SubtractWithBorrow = 3,
46 // Operations 4-7 don't seem to be valid encodings.
47 Xor = 8,
48 Or = 9,
49 And = 10,
50 AndNot = 11,
51 Nand = 12
52 };
53
54 enum class ResultOperation : u32 {
55 IgnoreAndFetch = 0,
56 Move = 1,
57 MoveAndSetMethod = 2,
58 FetchAndSend = 3,
59 MoveAndSend = 4,
60 FetchAndSetMethod = 5,
61 MoveAndSetMethodFetchAndSend = 6,
62 MoveAndSetMethodSend = 7
63 };
64
65 enum class BranchCondition : u32 {
66 Zero = 0,
67 NotZero = 1,
68 };
69
70 union Opcode {
71 u32 raw;
72 BitField<0, 3, Operation> operation;
73 BitField<4, 3, ResultOperation> result_operation;
74 BitField<4, 1, BranchCondition> branch_condition;
75 BitField<5, 1, u32>
76 branch_annul; // If set on a branch, then the branch doesn't have a delay slot.
77 BitField<7, 1, u32> is_exit;
78 BitField<8, 3, u32> dst;
79 BitField<11, 3, u32> src_a;
80 BitField<14, 3, u32> src_b;
81 // The signed immediate overlaps the second source operand and the alu operation.
82 BitField<14, 18, s32> immediate;
83
84 BitField<17, 5, ALUOperation> alu_operation;
85
86 // Bitfield instructions data
87 BitField<17, 5, u32> bf_src_bit;
88 BitField<22, 5, u32> bf_size;
89 BitField<27, 5, u32> bf_dst_bit;
90
91 u32 GetBitfieldMask() const {
92 return (1 << bf_size) - 1;
93 }
94 };
95
96 union MethodAddress {
97 u32 raw;
98 BitField<0, 12, u32> address;
99 BitField<12, 6, u32> increment;
100 };
101
102 /// Resets the execution engine state, zeroing registers, etc.
103 void Reset();
104
105 /**
106 * Executes a single macro instruction located at the current program counter. Returns whether
107 * the interpreter should keep running.
108 * @param code The macro code to execute.
109 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
110 * previous instruction.
111 */
112 bool Step(const std::vector<u32>& code, bool is_delay_slot);
113
114 /// Calculates the result of an ALU operation. src_a OP src_b;
115 u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
116
117 /// Performs the result operation on the input result and stores it in the specified register
118 /// (if necessary).
119 void ProcessResult(ResultOperation operation, u32 reg, u32 result);
120
121 /// Evaluates the branch condition and returns whether the branch should be taken or not.
122 bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
123
124 /// Reads an opcode at the current program counter location.
125 Opcode GetOpcode(const std::vector<u32>& code) const;
126
127 /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
128 u32 GetRegister(u32 register_id) const;
129
130 /// Sets the register to the input value.
131 void SetRegister(u32 register_id, u32 value);
132
133 /// Sets the method address to use for the next Send instruction.
134 void SetMethodAddress(u32 address);
135
136 /// Calls a GPU Engine method with the input parameter.
137 void Send(u32 value);
138
139 /// Reads a GPU register located at the method address.
140 u32 Read(u32 method) const;
141
142 /// Returns the next parameter in the parameter queue.
143 u32 FetchParameter();
144
145 Engines::Maxwell3D& maxwell3d;
146
147 u32 pc; ///< Current program counter
148 boost::optional<u32>
149 delayed_pc; ///< Program counter to execute at after the delay slot is executed.
150
151 static constexpr size_t NumMacroRegisters = 8;
152
153 /// General purpose macro registers.
154 std::array<u32, NumMacroRegisters> registers = {};
155
156 /// Method address to use for the next Send instruction.
157 MethodAddress method_address = {};
158
159 /// Input parameters of the current macro.
160 std::vector<u32> parameters;
161 /// Index of the next parameter that will be fetched by the 'parm' instruction.
162 u32 next_parameter_index = 0;
163};
164} // namespace Tegra