diff options
| author | 2022-01-25 13:24:53 -0500 | |
|---|---|---|
| committer | 2022-01-25 13:31:46 -0500 | |
| commit | 6b873b72ae5dcaad9a234d3f5375f309578e28fa (patch) | |
| tree | 0ac70be3583c31d4f9ae09ae30d5d16c51eab525 /src | |
| parent | video_core/macro_hle: Move impl class into cpp file (diff) | |
| download | yuzu-6b873b72ae5dcaad9a234d3f5375f309578e28fa.tar.gz yuzu-6b873b72ae5dcaad9a234d3f5375f309578e28fa.tar.xz yuzu-6b873b72ae5dcaad9a234d3f5375f309578e28fa.zip | |
video_core/macro_jit_x64: Move impl class into cpp file
Keeps the implementation internalized and also reduces API-facing header
dependencies.
Notably, this fully internalizes all of the xbyak externals.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 102 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 71 |
2 files changed, 86 insertions, 87 deletions
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c6b2b2109..1934039c0 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -2,9 +2,17 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include <bitset> | ||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include <xbyak/xbyak.h> | ||
| 10 | |||
| 5 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/bit_field.h" | ||
| 6 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | 14 | #include "common/microprofile.h" |
| 15 | #include "common/x64/xbyak_abi.h" | ||
| 8 | #include "common/x64/xbyak_util.h" | 16 | #include "common/x64/xbyak_util.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/macro/macro_interpreter.h" | 18 | #include "video_core/macro/macro_interpreter.h" |
| @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 | |||
| 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | 22 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); |
| 15 | 23 | ||
| 16 | namespace Tegra { | 24 | namespace Tegra { |
| 25 | namespace { | ||
| 17 | constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; | 26 | constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; |
| 18 | constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; | 27 | constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; |
| 19 | constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; | 28 | constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
| 20 | constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | 29 | constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
| 21 | constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | 30 | constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
| 22 | 31 | ||
| 23 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | 32 | const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
| 24 | STATE, | 33 | STATE, |
| 25 | RESULT, | 34 | RESULT, |
| 26 | PARAMETERS, | 35 | PARAMETERS, |
| @@ -28,19 +37,73 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | |||
| 28 | BRANCH_HOLDER, | 37 | BRANCH_HOLDER, |
| 29 | }); | 38 | }); |
| 30 | 39 | ||
| 31 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | 40 | // Arbitrarily chosen based on current booting games. |
| 32 | : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | 41 | constexpr size_t MAX_CODE_SIZE = 0x10000; |
| 33 | 42 | ||
| 34 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | 43 | class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { |
| 35 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | 44 | public: |
| 36 | } | 45 | explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) |
| 46 | : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | ||
| 47 | Compile(); | ||
| 48 | } | ||
| 37 | 49 | ||
| 38 | MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | 50 | void Execute(const std::vector<u32>& parameters, u32 method) override; |
| 39 | : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | 51 | |
| 40 | Compile(); | 52 | void Compile_ALU(Macro::Opcode opcode); |
| 41 | } | 53 | void Compile_AddImmediate(Macro::Opcode opcode); |
| 54 | void Compile_ExtractInsert(Macro::Opcode opcode); | ||
| 55 | void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||
| 56 | void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||
| 57 | void Compile_Read(Macro::Opcode opcode); | ||
| 58 | void Compile_Branch(Macro::Opcode opcode); | ||
| 59 | |||
| 60 | private: | ||
| 61 | void Optimizer_ScanFlags(); | ||
| 62 | |||
| 63 | void Compile(); | ||
| 64 | bool Compile_NextInstruction(); | ||
| 65 | |||
| 66 | Xbyak::Reg32 Compile_FetchParameter(); | ||
| 67 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||
| 68 | |||
| 69 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||
| 70 | void Compile_Send(Xbyak::Reg32 value); | ||
| 42 | 71 | ||
| 43 | MacroJITx64Impl::~MacroJITx64Impl() = default; | 72 | Macro::Opcode GetOpCode() const; |
| 73 | std::bitset<32> PersistentCallerSavedRegs() const; | ||
| 74 | |||
| 75 | struct JITState { | ||
| 76 | Engines::Maxwell3D* maxwell3d{}; | ||
| 77 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||
| 78 | u32 carry_flag{}; | ||
| 79 | }; | ||
| 80 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||
| 81 | using ProgramType = void (*)(JITState*, const u32*); | ||
| 82 | |||
| 83 | struct OptimizerState { | ||
| 84 | bool can_skip_carry{}; | ||
| 85 | bool has_delayed_pc{}; | ||
| 86 | bool zero_reg_skip{}; | ||
| 87 | bool skip_dummy_addimmediate{}; | ||
| 88 | bool optimize_for_method_move{}; | ||
| 89 | bool enable_asserts{}; | ||
| 90 | }; | ||
| 91 | OptimizerState optimizer{}; | ||
| 92 | |||
| 93 | std::optional<Macro::Opcode> next_opcode{}; | ||
| 94 | ProgramType program{nullptr}; | ||
| 95 | |||
| 96 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||
| 97 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||
| 98 | Xbyak::Label end_of_code{}; | ||
| 99 | |||
| 100 | bool is_delay_slot{}; | ||
| 101 | u32 pc{}; | ||
| 102 | std::optional<u32> delayed_pc; | ||
| 103 | |||
| 104 | const std::vector<u32>& code; | ||
| 105 | Engines::Maxwell3D& maxwell3d; | ||
| 106 | }; | ||
| 44 | 107 | ||
| 45 | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | 108 | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { |
| 46 | MICROPROFILE_SCOPE(MacroJitExecute); | 109 | MICROPROFILE_SCOPE(MacroJitExecute); |
| @@ -307,11 +370,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | |||
| 307 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | 370 | Compile_ProcessResult(opcode.result_operation, opcode.dst); |
| 308 | } | 371 | } |
| 309 | 372 | ||
| 310 | static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | 373 | void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { |
| 311 | maxwell3d->CallMethodFromMME(method_address.address, value); | 374 | maxwell3d->CallMethodFromMME(method_address.address, value); |
| 312 | } | 375 | } |
| 313 | 376 | ||
| 314 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | 377 | void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
| 315 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | 378 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 316 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | 379 | mov(Common::X64::ABI_PARAM1, qword[STATE]); |
| 317 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | 380 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); |
| @@ -338,7 +401,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | |||
| 338 | L(dont_process); | 401 | L(dont_process); |
| 339 | } | 402 | } |
| 340 | 403 | ||
| 341 | void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | 404 | void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { |
| 342 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | 405 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
| 343 | const s32 jump_address = | 406 | const s32 jump_address = |
| 344 | static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); | 407 | static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); |
| @@ -392,7 +455,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | |||
| 392 | L(end); | 455 | L(end); |
| 393 | } | 456 | } |
| 394 | 457 | ||
| 395 | void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { | 458 | void MacroJITx64Impl::Optimizer_ScanFlags() { |
| 396 | optimizer.can_skip_carry = true; | 459 | optimizer.can_skip_carry = true; |
| 397 | optimizer.has_delayed_pc = false; | 460 | optimizer.has_delayed_pc = false; |
| 398 | for (auto raw_op : code) { | 461 | for (auto raw_op : code) { |
| @@ -534,7 +597,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { | |||
| 534 | return true; | 597 | return true; |
| 535 | } | 598 | } |
| 536 | 599 | ||
| 537 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | 600 | Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { |
| 538 | mov(eax, dword[PARAMETERS]); | 601 | mov(eax, dword[PARAMETERS]); |
| 539 | add(PARAMETERS, sizeof(u32)); | 602 | add(PARAMETERS, sizeof(u32)); |
| 540 | return eax; | 603 | return eax; |
| @@ -615,5 +678,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { | |||
| 615 | std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { | 678 | std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { |
| 616 | return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; | 679 | return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; |
| 617 | } | 680 | } |
| 681 | } // Anonymous namespace | ||
| 618 | 682 | ||
| 683 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | ||
| 684 | : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | ||
| 685 | |||
| 686 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||
| 687 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||
| 688 | } | ||
| 619 | } // namespace Tegra | 689 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index d03d480b4..773b037ae 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h | |||
| @@ -4,12 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <xbyak/xbyak.h> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 12 | #include "common/x64/xbyak_abi.h" | ||
| 13 | #include "video_core/macro/macro.h" | 8 | #include "video_core/macro/macro.h" |
| 14 | 9 | ||
| 15 | namespace Tegra { | 10 | namespace Tegra { |
| @@ -18,9 +13,6 @@ namespace Engines { | |||
| 18 | class Maxwell3D; | 13 | class Maxwell3D; |
| 19 | } | 14 | } |
| 20 | 15 | ||
| 21 | /// MAX_CODE_SIZE is arbitrarily chosen based on current booting games | ||
| 22 | constexpr size_t MAX_CODE_SIZE = 0x10000; | ||
| 23 | |||
| 24 | class MacroJITx64 final : public MacroEngine { | 16 | class MacroJITx64 final : public MacroEngine { |
| 25 | public: | 17 | public: |
| 26 | explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); | 18 | explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); |
| @@ -32,67 +24,4 @@ private: | |||
| 32 | Engines::Maxwell3D& maxwell3d; | 24 | Engines::Maxwell3D& maxwell3d; |
| 33 | }; | 25 | }; |
| 34 | 26 | ||
| 35 | class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { | ||
| 36 | public: | ||
| 37 | explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); | ||
| 38 | ~MacroJITx64Impl(); | ||
| 39 | |||
| 40 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 41 | |||
| 42 | void Compile_ALU(Macro::Opcode opcode); | ||
| 43 | void Compile_AddImmediate(Macro::Opcode opcode); | ||
| 44 | void Compile_ExtractInsert(Macro::Opcode opcode); | ||
| 45 | void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||
| 46 | void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||
| 47 | void Compile_Read(Macro::Opcode opcode); | ||
| 48 | void Compile_Branch(Macro::Opcode opcode); | ||
| 49 | |||
| 50 | private: | ||
| 51 | void Optimizer_ScanFlags(); | ||
| 52 | |||
| 53 | void Compile(); | ||
| 54 | bool Compile_NextInstruction(); | ||
| 55 | |||
| 56 | Xbyak::Reg32 Compile_FetchParameter(); | ||
| 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||
| 58 | |||
| 59 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||
| 60 | void Compile_Send(Xbyak::Reg32 value); | ||
| 61 | |||
| 62 | Macro::Opcode GetOpCode() const; | ||
| 63 | std::bitset<32> PersistentCallerSavedRegs() const; | ||
| 64 | |||
| 65 | struct JITState { | ||
| 66 | Engines::Maxwell3D* maxwell3d{}; | ||
| 67 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||
| 68 | u32 carry_flag{}; | ||
| 69 | }; | ||
| 70 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||
| 71 | using ProgramType = void (*)(JITState*, const u32*); | ||
| 72 | |||
| 73 | struct OptimizerState { | ||
| 74 | bool can_skip_carry{}; | ||
| 75 | bool has_delayed_pc{}; | ||
| 76 | bool zero_reg_skip{}; | ||
| 77 | bool skip_dummy_addimmediate{}; | ||
| 78 | bool optimize_for_method_move{}; | ||
| 79 | bool enable_asserts{}; | ||
| 80 | }; | ||
| 81 | OptimizerState optimizer{}; | ||
| 82 | |||
| 83 | std::optional<Macro::Opcode> next_opcode{}; | ||
| 84 | ProgramType program{nullptr}; | ||
| 85 | |||
| 86 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||
| 87 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||
| 88 | Xbyak::Label end_of_code{}; | ||
| 89 | |||
| 90 | bool is_delay_slot{}; | ||
| 91 | u32 pc{}; | ||
| 92 | std::optional<u32> delayed_pc; | ||
| 93 | |||
| 94 | const std::vector<u32>& code; | ||
| 95 | Engines::Maxwell3D& maxwell3d; | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace Tegra | 27 | } // namespace Tegra |