summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Lioncash2022-01-25 13:24:53 -0500
committerGravatar Lioncash2022-01-25 13:31:46 -0500
commit6b873b72ae5dcaad9a234d3f5375f309578e28fa (patch)
tree0ac70be3583c31d4f9ae09ae30d5d16c51eab525 /src
parentvideo_core/macro_hle: Move impl class into cpp file (diff)
downloadyuzu-6b873b72ae5dcaad9a234d3f5375f309578e28fa.tar.gz
yuzu-6b873b72ae5dcaad9a234d3f5375f309578e28fa.tar.xz
yuzu-6b873b72ae5dcaad9a234d3f5375f309578e28fa.zip
video_core/macro_jit_x64: Move impl class into cpp file
Keeps the implementation internalized and also reduces API-facing header dependencies. Notably, this fully internalizes all of the xbyak externals.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp102
-rw-r--r--src/video_core/macro/macro_jit_x64.h71
2 files changed, 86 insertions, 87 deletions
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index c6b2b2109..1934039c0 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -2,9 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <bitset>
7#include <optional>
8
9#include <xbyak/xbyak.h>
10
5#include "common/assert.h" 11#include "common/assert.h"
12#include "common/bit_field.h"
6#include "common/logging/log.h" 13#include "common/logging/log.h"
7#include "common/microprofile.h" 14#include "common/microprofile.h"
15#include "common/x64/xbyak_abi.h"
8#include "common/x64/xbyak_util.h" 16#include "common/x64/xbyak_util.h"
9#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
10#include "video_core/macro/macro_interpreter.h" 18#include "video_core/macro/macro_interpreter.h"
@@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); 22MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
15 23
16namespace Tegra { 24namespace Tegra {
25namespace {
17constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; 26constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
18constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; 27constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
19constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; 28constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
20constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; 29constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
21constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; 30constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
22 31
23static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ 32const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
24 STATE, 33 STATE,
25 RESULT, 34 RESULT,
26 PARAMETERS, 35 PARAMETERS,
@@ -28,19 +37,73 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
28 BRANCH_HOLDER, 37 BRANCH_HOLDER,
29}); 38});
30 39
31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) 40// Arbitrarily chosen based on current booting games.
32 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} 41constexpr size_t MAX_CODE_SIZE = 0x10000;
33 42
34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { 43class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code); 44public:
36} 45 explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
46 : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
47 Compile();
48 }
37 49
38MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) 50 void Execute(const std::vector<u32>& parameters, u32 method) override;
39 : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { 51
40 Compile(); 52 void Compile_ALU(Macro::Opcode opcode);
41} 53 void Compile_AddImmediate(Macro::Opcode opcode);
54 void Compile_ExtractInsert(Macro::Opcode opcode);
55 void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
56 void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
57 void Compile_Read(Macro::Opcode opcode);
58 void Compile_Branch(Macro::Opcode opcode);
59
60private:
61 void Optimizer_ScanFlags();
62
63 void Compile();
64 bool Compile_NextInstruction();
65
66 Xbyak::Reg32 Compile_FetchParameter();
67 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
68
69 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
70 void Compile_Send(Xbyak::Reg32 value);
42 71
43MacroJITx64Impl::~MacroJITx64Impl() = default; 72 Macro::Opcode GetOpCode() const;
73 std::bitset<32> PersistentCallerSavedRegs() const;
74
75 struct JITState {
76 Engines::Maxwell3D* maxwell3d{};
77 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
78 u32 carry_flag{};
79 };
80 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
81 using ProgramType = void (*)(JITState*, const u32*);
82
83 struct OptimizerState {
84 bool can_skip_carry{};
85 bool has_delayed_pc{};
86 bool zero_reg_skip{};
87 bool skip_dummy_addimmediate{};
88 bool optimize_for_method_move{};
89 bool enable_asserts{};
90 };
91 OptimizerState optimizer{};
92
93 std::optional<Macro::Opcode> next_opcode{};
94 ProgramType program{nullptr};
95
96 std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
97 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
98 Xbyak::Label end_of_code{};
99
100 bool is_delay_slot{};
101 u32 pc{};
102 std::optional<u32> delayed_pc;
103
104 const std::vector<u32>& code;
105 Engines::Maxwell3D& maxwell3d;
106};
44 107
45void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { 108void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
46 MICROPROFILE_SCOPE(MacroJitExecute); 109 MICROPROFILE_SCOPE(MacroJitExecute);
@@ -307,11 +370,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
307 Compile_ProcessResult(opcode.result_operation, opcode.dst); 370 Compile_ProcessResult(opcode.result_operation, opcode.dst);
308} 371}
309 372
310static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { 373void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
311 maxwell3d->CallMethodFromMME(method_address.address, value); 374 maxwell3d->CallMethodFromMME(method_address.address, value);
312} 375}
313 376
314void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { 377void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
315 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); 378 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
316 mov(Common::X64::ABI_PARAM1, qword[STATE]); 379 mov(Common::X64::ABI_PARAM1, qword[STATE]);
317 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); 380 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
@@ -338,7 +401,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
338 L(dont_process); 401 L(dont_process);
339} 402}
340 403
341void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { 404void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
342 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); 405 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
343 const s32 jump_address = 406 const s32 jump_address =
344 static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); 407 static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
@@ -392,7 +455,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
392 L(end); 455 L(end);
393} 456}
394 457
395void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { 458void MacroJITx64Impl::Optimizer_ScanFlags() {
396 optimizer.can_skip_carry = true; 459 optimizer.can_skip_carry = true;
397 optimizer.has_delayed_pc = false; 460 optimizer.has_delayed_pc = false;
398 for (auto raw_op : code) { 461 for (auto raw_op : code) {
@@ -534,7 +597,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
534 return true; 597 return true;
535} 598}
536 599
537Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { 600Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
538 mov(eax, dword[PARAMETERS]); 601 mov(eax, dword[PARAMETERS]);
539 add(PARAMETERS, sizeof(u32)); 602 add(PARAMETERS, sizeof(u32));
540 return eax; 603 return eax;
@@ -615,5 +678,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const {
615std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { 678std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
616 return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; 679 return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
617} 680}
681} // Anonymous namespace
618 682
683MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
684 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
685
686std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
687 return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
688}
619} // namespace Tegra 689} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index d03d480b4..773b037ae 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -4,12 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <xbyak/xbyak.h>
10#include "common/bit_field.h"
11#include "common/common_types.h" 7#include "common/common_types.h"
12#include "common/x64/xbyak_abi.h"
13#include "video_core/macro/macro.h" 8#include "video_core/macro/macro.h"
14 9
15namespace Tegra { 10namespace Tegra {
@@ -18,9 +13,6 @@ namespace Engines {
18class Maxwell3D; 13class Maxwell3D;
19} 14}
20 15
21/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
22constexpr size_t MAX_CODE_SIZE = 0x10000;
23
24class MacroJITx64 final : public MacroEngine { 16class MacroJITx64 final : public MacroEngine {
25public: 17public:
26 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); 18 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
@@ -32,67 +24,4 @@ private:
32 Engines::Maxwell3D& maxwell3d; 24 Engines::Maxwell3D& maxwell3d;
33}; 25};
34 26
35class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
36public:
37 explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
38 ~MacroJITx64Impl();
39
40 void Execute(const std::vector<u32>& parameters, u32 method) override;
41
42 void Compile_ALU(Macro::Opcode opcode);
43 void Compile_AddImmediate(Macro::Opcode opcode);
44 void Compile_ExtractInsert(Macro::Opcode opcode);
45 void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
46 void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
47 void Compile_Read(Macro::Opcode opcode);
48 void Compile_Branch(Macro::Opcode opcode);
49
50private:
51 void Optimizer_ScanFlags();
52
53 void Compile();
54 bool Compile_NextInstruction();
55
56 Xbyak::Reg32 Compile_FetchParameter();
57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
58
59 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
60 void Compile_Send(Xbyak::Reg32 value);
61
62 Macro::Opcode GetOpCode() const;
63 std::bitset<32> PersistentCallerSavedRegs() const;
64
65 struct JITState {
66 Engines::Maxwell3D* maxwell3d{};
67 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
68 u32 carry_flag{};
69 };
70 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
71 using ProgramType = void (*)(JITState*, const u32*);
72
73 struct OptimizerState {
74 bool can_skip_carry{};
75 bool has_delayed_pc{};
76 bool zero_reg_skip{};
77 bool skip_dummy_addimmediate{};
78 bool optimize_for_method_move{};
79 bool enable_asserts{};
80 };
81 OptimizerState optimizer{};
82
83 std::optional<Macro::Opcode> next_opcode{};
84 ProgramType program{nullptr};
85
86 std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
87 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
88 Xbyak::Label end_of_code{};
89
90 bool is_delay_slot{};
91 u32 pc{};
92 std::optional<u32> delayed_pc;
93
94 const std::vector<u32>& code;
95 Engines::Maxwell3D& maxwell3d;
96};
97
98} // namespace Tegra 27} // namespace Tegra