diff options
| author | 2020-06-25 23:34:37 +0200 | |
|---|---|---|
| committer | 2020-06-25 23:34:37 +0200 | |
| commit | 0f4512291a0504b32fac248f73a68fec34f657fe (patch) | |
| tree | 3c69736a2ac82a9a0076ec3b79673c814e5f1abd /src/video_core/macro | |
| parent | Fix typo 2: electric boogaloo (diff) | |
| parent | Merge pull request #4136 from VolcaEM/mods (diff) | |
| download | yuzu-0f4512291a0504b32fac248f73a68fec34f657fe.tar.gz yuzu-0f4512291a0504b32fac248f73a68fec34f657fe.tar.xz yuzu-0f4512291a0504b32fac248f73a68fec34f657fe.zip | |
Merge branch 'master' into quickstart-faq
Diffstat (limited to 'src/video_core/macro')
| -rw-r--r-- | src/video_core/macro/macro.cpp | 72 | ||||
| -rw-r--r-- | src/video_core/macro/macro.h | 141 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 113 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.h | 44 | ||||
| -rw-r--r-- | src/video_core/macro/macro_interpreter.cpp | 289 | ||||
| -rw-r--r-- | src/video_core/macro/macro_interpreter.h | 102 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 621 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 98 |
8 files changed, 1480 insertions, 0 deletions
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp new file mode 100644 index 000000000..ef7dad349 --- /dev/null +++ b/src/video_core/macro/macro.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <boost/container_hash/hash.hpp> | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/macro/macro.h" | ||
| 11 | #include "video_core/macro/macro_hle.h" | ||
| 12 | #include "video_core/macro/macro_interpreter.h" | ||
| 13 | #include "video_core/macro/macro_jit_x64.h" | ||
| 14 | |||
| 15 | namespace Tegra { | ||
| 16 | |||
| 17 | MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) | ||
| 18 | : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} | ||
| 19 | |||
| 20 | MacroEngine::~MacroEngine() = default; | ||
| 21 | |||
| 22 | void MacroEngine::AddCode(u32 method, u32 data) { | ||
| 23 | uploaded_macro_code[method].push_back(data); | ||
| 24 | } | ||
| 25 | |||
| 26 | void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, | ||
| 27 | const std::vector<u32>& parameters) { | ||
| 28 | auto compiled_macro = macro_cache.find(method); | ||
| 29 | if (compiled_macro != macro_cache.end()) { | ||
| 30 | const auto& cache_info = compiled_macro->second; | ||
| 31 | if (cache_info.has_hle_program) { | ||
| 32 | cache_info.hle_program->Execute(parameters, method); | ||
| 33 | } else { | ||
| 34 | cache_info.lle_program->Execute(parameters, method); | ||
| 35 | } | ||
| 36 | } else { | ||
| 37 | // Macro not compiled, check if it's uploaded and if so, compile it | ||
| 38 | auto macro_code = uploaded_macro_code.find(method); | ||
| 39 | if (macro_code == uploaded_macro_code.end()) { | ||
| 40 | UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | auto& cache_info = macro_cache[method]; | ||
| 44 | cache_info.hash = boost::hash_value(macro_code->second); | ||
| 45 | cache_info.lle_program = Compile(macro_code->second); | ||
| 46 | |||
| 47 | auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); | ||
| 48 | if (hle_program.has_value()) { | ||
| 49 | cache_info.has_hle_program = true; | ||
| 50 | cache_info.hle_program = std::move(hle_program.value()); | ||
| 51 | } | ||
| 52 | |||
| 53 | if (cache_info.has_hle_program) { | ||
| 54 | cache_info.hle_program->Execute(parameters, method); | ||
| 55 | } else { | ||
| 56 | cache_info.lle_program->Execute(parameters, method); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) { | ||
| 62 | if (Settings::values.disable_macro_jit) { | ||
| 63 | return std::make_unique<MacroInterpreter>(maxwell3d); | ||
| 64 | } | ||
| 65 | #ifdef ARCHITECTURE_x86_64 | ||
| 66 | return std::make_unique<MacroJITx64>(maxwell3d); | ||
| 67 | #else | ||
| 68 | return std::make_unique<MacroInterpreter>(maxwell3d); | ||
| 69 | #endif | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h new file mode 100644 index 000000000..4d00b84b0 --- /dev/null +++ b/src/video_core/macro/macro.h | |||
| @@ -0,0 +1,141 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | |||
| 15 | namespace Engines { | ||
| 16 | class Maxwell3D; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace Macro { | ||
| 20 | constexpr std::size_t NUM_MACRO_REGISTERS = 8; | ||
| 21 | enum class Operation : u32 { | ||
| 22 | ALU = 0, | ||
| 23 | AddImmediate = 1, | ||
| 24 | ExtractInsert = 2, | ||
| 25 | ExtractShiftLeftImmediate = 3, | ||
| 26 | ExtractShiftLeftRegister = 4, | ||
| 27 | Read = 5, | ||
| 28 | Unused = 6, // This operation doesn't seem to be a valid encoding. | ||
| 29 | Branch = 7, | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class ALUOperation : u32 { | ||
| 33 | Add = 0, | ||
| 34 | AddWithCarry = 1, | ||
| 35 | Subtract = 2, | ||
| 36 | SubtractWithBorrow = 3, | ||
| 37 | // Operations 4-7 don't seem to be valid encodings. | ||
| 38 | Xor = 8, | ||
| 39 | Or = 9, | ||
| 40 | And = 10, | ||
| 41 | AndNot = 11, | ||
| 42 | Nand = 12 | ||
| 43 | }; | ||
| 44 | |||
| 45 | enum class ResultOperation : u32 { | ||
| 46 | IgnoreAndFetch = 0, | ||
| 47 | Move = 1, | ||
| 48 | MoveAndSetMethod = 2, | ||
| 49 | FetchAndSend = 3, | ||
| 50 | MoveAndSend = 4, | ||
| 51 | FetchAndSetMethod = 5, | ||
| 52 | MoveAndSetMethodFetchAndSend = 6, | ||
| 53 | MoveAndSetMethodSend = 7 | ||
| 54 | }; | ||
| 55 | |||
| 56 | enum class BranchCondition : u32 { | ||
| 57 | Zero = 0, | ||
| 58 | NotZero = 1, | ||
| 59 | }; | ||
| 60 | |||
| 61 | union Opcode { | ||
| 62 | u32 raw; | ||
| 63 | BitField<0, 3, Operation> operation; | ||
| 64 | BitField<4, 3, ResultOperation> result_operation; | ||
| 65 | BitField<4, 1, BranchCondition> branch_condition; | ||
| 66 | // If set on a branch, then the branch doesn't have a delay slot. | ||
| 67 | BitField<5, 1, u32> branch_annul; | ||
| 68 | BitField<7, 1, u32> is_exit; | ||
| 69 | BitField<8, 3, u32> dst; | ||
| 70 | BitField<11, 3, u32> src_a; | ||
| 71 | BitField<14, 3, u32> src_b; | ||
| 72 | // The signed immediate overlaps the second source operand and the alu operation. | ||
| 73 | BitField<14, 18, s32> immediate; | ||
| 74 | |||
| 75 | BitField<17, 5, ALUOperation> alu_operation; | ||
| 76 | |||
| 77 | // Bitfield instructions data | ||
| 78 | BitField<17, 5, u32> bf_src_bit; | ||
| 79 | BitField<22, 5, u32> bf_size; | ||
| 80 | BitField<27, 5, u32> bf_dst_bit; | ||
| 81 | |||
| 82 | u32 GetBitfieldMask() const { | ||
| 83 | return (1 << bf_size) - 1; | ||
| 84 | } | ||
| 85 | |||
| 86 | s32 GetBranchTarget() const { | ||
| 87 | return static_cast<s32>(immediate * sizeof(u32)); | ||
| 88 | } | ||
| 89 | }; | ||
| 90 | |||
| 91 | union MethodAddress { | ||
| 92 | u32 raw; | ||
| 93 | BitField<0, 12, u32> address; | ||
| 94 | BitField<12, 6, u32> increment; | ||
| 95 | }; | ||
| 96 | |||
| 97 | } // namespace Macro | ||
| 98 | |||
| 99 | class HLEMacro; | ||
| 100 | |||
| 101 | class CachedMacro { | ||
| 102 | public: | ||
| 103 | virtual ~CachedMacro() = default; | ||
| 104 | /** | ||
| 105 | * Executes the macro code with the specified input parameters. | ||
| 106 | * @param code The macro byte code to execute | ||
| 107 | * @param parameters The parameters of the macro | ||
| 108 | */ | ||
| 109 | virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0; | ||
| 110 | }; | ||
| 111 | |||
| 112 | class MacroEngine { | ||
| 113 | public: | ||
| 114 | explicit MacroEngine(Engines::Maxwell3D& maxwell3d); | ||
| 115 | virtual ~MacroEngine(); | ||
| 116 | |||
| 117 | // Store the uploaded macro code to compile them when they're called. | ||
| 118 | void AddCode(u32 method, u32 data); | ||
| 119 | |||
| 120 | // Compiles the macro if its not in the cache, and executes the compiled macro | ||
| 121 | void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); | ||
| 122 | |||
| 123 | protected: | ||
| 124 | virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; | ||
| 125 | |||
| 126 | private: | ||
| 127 | struct CacheInfo { | ||
| 128 | std::unique_ptr<CachedMacro> lle_program{}; | ||
| 129 | std::unique_ptr<CachedMacro> hle_program{}; | ||
| 130 | u64 hash{}; | ||
| 131 | bool has_hle_program{}; | ||
| 132 | }; | ||
| 133 | |||
| 134 | std::unordered_map<u32, CacheInfo> macro_cache; | ||
| 135 | std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; | ||
| 136 | std::unique_ptr<HLEMacro> hle_macros; | ||
| 137 | }; | ||
| 138 | |||
| 139 | std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); | ||
| 140 | |||
| 141 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..410f99018 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <vector> | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/macro/macro_hle.h" | ||
| 9 | #include "video_core/rasterizer_interface.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | // HLE'd functions | ||
| 15 | static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, | ||
| 16 | const std::vector<u32>& parameters) { | ||
| 17 | const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); | ||
| 18 | |||
| 19 | maxwell3d.regs.draw.topology.Assign( | ||
| 20 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & | ||
| 21 | ~(0x3ffffff << 26))); | ||
| 22 | maxwell3d.regs.vb_base_instance = parameters[5]; | ||
| 23 | maxwell3d.mme_draw.instance_count = instance_count; | ||
| 24 | maxwell3d.regs.vb_element_base = parameters[3]; | ||
| 25 | maxwell3d.regs.index_array.count = parameters[1]; | ||
| 26 | maxwell3d.regs.index_array.first = parameters[4]; | ||
| 27 | |||
| 28 | if (maxwell3d.ShouldExecute()) { | ||
| 29 | maxwell3d.GetRasterizer().Draw(true, true); | ||
| 30 | } | ||
| 31 | maxwell3d.regs.index_array.count = 0; | ||
| 32 | maxwell3d.mme_draw.instance_count = 0; | ||
| 33 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||
| 34 | } | ||
| 35 | |||
| 36 | static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, | ||
| 37 | const std::vector<u32>& parameters) { | ||
| 38 | const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); | ||
| 39 | |||
| 40 | maxwell3d.regs.vertex_buffer.first = parameters[3]; | ||
| 41 | maxwell3d.regs.vertex_buffer.count = parameters[1]; | ||
| 42 | maxwell3d.regs.vb_base_instance = parameters[4]; | ||
| 43 | maxwell3d.regs.draw.topology.Assign( | ||
| 44 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); | ||
| 45 | maxwell3d.mme_draw.instance_count = count; | ||
| 46 | |||
| 47 | if (maxwell3d.ShouldExecute()) { | ||
| 48 | maxwell3d.GetRasterizer().Draw(false, true); | ||
| 49 | } | ||
| 50 | maxwell3d.regs.vertex_buffer.count = 0; | ||
| 51 | maxwell3d.mme_draw.instance_count = 0; | ||
| 52 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||
| 53 | } | ||
| 54 | |||
| 55 | static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, | ||
| 56 | const std::vector<u32>& parameters) { | ||
| 57 | const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); | ||
| 58 | const u32 element_base = parameters[4]; | ||
| 59 | const u32 base_instance = parameters[5]; | ||
| 60 | maxwell3d.regs.index_array.first = parameters[3]; | ||
| 61 | maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? | ||
| 62 | maxwell3d.regs.index_array.count = parameters[1]; | ||
| 63 | maxwell3d.regs.vb_element_base = element_base; | ||
| 64 | maxwell3d.regs.vb_base_instance = base_instance; | ||
| 65 | maxwell3d.mme_draw.instance_count = instance_count; | ||
| 66 | maxwell3d.CallMethodFromMME(0x8e3, 0x640); | ||
| 67 | maxwell3d.CallMethodFromMME(0x8e4, element_base); | ||
| 68 | maxwell3d.CallMethodFromMME(0x8e5, base_instance); | ||
| 69 | maxwell3d.regs.draw.topology.Assign( | ||
| 70 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); | ||
| 71 | if (maxwell3d.ShouldExecute()) { | ||
| 72 | maxwell3d.GetRasterizer().Draw(true, true); | ||
| 73 | } | ||
| 74 | maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? | ||
| 75 | maxwell3d.regs.index_array.count = 0; | ||
| 76 | maxwell3d.regs.vb_element_base = 0x0; | ||
| 77 | maxwell3d.regs.vb_base_instance = 0x0; | ||
| 78 | maxwell3d.mme_draw.instance_count = 0; | ||
| 79 | maxwell3d.CallMethodFromMME(0x8e3, 0x640); | ||
| 80 | maxwell3d.CallMethodFromMME(0x8e4, 0x0); | ||
| 81 | maxwell3d.CallMethodFromMME(0x8e5, 0x0); | ||
| 82 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||
| 83 | } | ||
| 84 | } // namespace | ||
| 85 | |||
| 86 | constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | ||
| 87 | std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), | ||
| 88 | std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), | ||
| 89 | std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), | ||
| 90 | }}; | ||
| 91 | |||
| 92 | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | ||
| 93 | HLEMacro::~HLEMacro() = default; | ||
| 94 | |||
| 95 | std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { | ||
| 96 | const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), | ||
| 97 | [hash](const auto& pair) { return pair.first == hash; }); | ||
| 98 | if (it == hle_funcs.end()) { | ||
| 99 | return std::nullopt; | ||
| 100 | } | ||
| 101 | return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); | ||
| 102 | } | ||
| 103 | |||
| 104 | HLEMacroImpl::~HLEMacroImpl() = default; | ||
| 105 | |||
| 106 | HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) | ||
| 107 | : maxwell3d(maxwell3d), func(func) {} | ||
| 108 | |||
| 109 | void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { | ||
| 110 | func(maxwell3d, parameters); | ||
| 111 | } | ||
| 112 | |||
| 113 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/macro/macro.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | |||
| 15 | namespace Engines { | ||
| 16 | class Maxwell3D; | ||
| 17 | } | ||
| 18 | |||
| 19 | using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); | ||
| 20 | |||
| 21 | class HLEMacro { | ||
| 22 | public: | ||
| 23 | explicit HLEMacro(Engines::Maxwell3D& maxwell3d); | ||
| 24 | ~HLEMacro(); | ||
| 25 | |||
| 26 | std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; | ||
| 27 | |||
| 28 | private: | ||
| 29 | Engines::Maxwell3D& maxwell3d; | ||
| 30 | }; | ||
| 31 | |||
| 32 | class HLEMacroImpl : public CachedMacro { | ||
| 33 | public: | ||
| 34 | explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); | ||
| 35 | ~HLEMacroImpl(); | ||
| 36 | |||
| 37 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 38 | |||
| 39 | private: | ||
| 40 | Engines::Maxwell3D& maxwell3d; | ||
| 41 | HLEFunction func; | ||
| 42 | }; | ||
| 43 | |||
| 44 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp new file mode 100644 index 000000000..aa5256419 --- /dev/null +++ b/src/video_core/macro/macro_interpreter.cpp | |||
| @@ -0,0 +1,289 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "common/microprofile.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/macro/macro_interpreter.h" | ||
| 10 | |||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) | ||
| 15 | : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} | ||
| 16 | |||
| 17 | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { | ||
| 18 | return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); | ||
| 19 | } | ||
| 20 | |||
| 21 | MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, | ||
| 22 | const std::vector<u32>& code) | ||
| 23 | : maxwell3d(maxwell3d), code(code) {} | ||
| 24 | |||
| 25 | void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) { | ||
| 26 | MICROPROFILE_SCOPE(MacroInterp); | ||
| 27 | Reset(); | ||
| 28 | |||
| 29 | registers[1] = parameters[0]; | ||
| 30 | num_parameters = parameters.size(); | ||
| 31 | |||
| 32 | if (num_parameters > parameters_capacity) { | ||
| 33 | parameters_capacity = num_parameters; | ||
| 34 | this->parameters = std::make_unique<u32[]>(num_parameters); | ||
| 35 | } | ||
| 36 | std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); | ||
| 37 | this->num_parameters = num_parameters; | ||
| 38 | |||
| 39 | // Execute the code until we hit an exit condition. | ||
| 40 | bool keep_executing = true; | ||
| 41 | while (keep_executing) { | ||
| 42 | keep_executing = Step(false); | ||
| 43 | } | ||
| 44 | |||
| 45 | // Assert the the macro used all the input parameters | ||
| 46 | ASSERT(next_parameter_index == num_parameters); | ||
| 47 | } | ||
| 48 | |||
| 49 | void MacroInterpreterImpl::Reset() { | ||
| 50 | registers = {}; | ||
| 51 | pc = 0; | ||
| 52 | delayed_pc = {}; | ||
| 53 | method_address.raw = 0; | ||
| 54 | num_parameters = 0; | ||
| 55 | // The next parameter index starts at 1, because $r1 already has the value of the first | ||
| 56 | // parameter. | ||
| 57 | next_parameter_index = 1; | ||
| 58 | carry_flag = false; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool MacroInterpreterImpl::Step(bool is_delay_slot) { | ||
| 62 | u32 base_address = pc; | ||
| 63 | |||
| 64 | Macro::Opcode opcode = GetOpcode(); | ||
| 65 | pc += 4; | ||
| 66 | |||
| 67 | // Update the program counter if we were delayed | ||
| 68 | if (delayed_pc) { | ||
| 69 | ASSERT(is_delay_slot); | ||
| 70 | pc = *delayed_pc; | ||
| 71 | delayed_pc = {}; | ||
| 72 | } | ||
| 73 | |||
| 74 | switch (opcode.operation) { | ||
| 75 | case Macro::Operation::ALU: { | ||
| 76 | u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), | ||
| 77 | GetRegister(opcode.src_b)); | ||
| 78 | ProcessResult(opcode.result_operation, opcode.dst, result); | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | case Macro::Operation::AddImmediate: { | ||
| 82 | ProcessResult(opcode.result_operation, opcode.dst, | ||
| 83 | GetRegister(opcode.src_a) + opcode.immediate); | ||
| 84 | break; | ||
| 85 | } | ||
| 86 | case Macro::Operation::ExtractInsert: { | ||
| 87 | u32 dst = GetRegister(opcode.src_a); | ||
| 88 | u32 src = GetRegister(opcode.src_b); | ||
| 89 | |||
| 90 | src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); | ||
| 91 | dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); | ||
| 92 | dst |= src << opcode.bf_dst_bit; | ||
| 93 | ProcessResult(opcode.result_operation, opcode.dst, dst); | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | case Macro::Operation::ExtractShiftLeftImmediate: { | ||
| 97 | u32 dst = GetRegister(opcode.src_a); | ||
| 98 | u32 src = GetRegister(opcode.src_b); | ||
| 99 | |||
| 100 | u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; | ||
| 101 | |||
| 102 | ProcessResult(opcode.result_operation, opcode.dst, result); | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | case Macro::Operation::ExtractShiftLeftRegister: { | ||
| 106 | u32 dst = GetRegister(opcode.src_a); | ||
| 107 | u32 src = GetRegister(opcode.src_b); | ||
| 108 | |||
| 109 | u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; | ||
| 110 | |||
| 111 | ProcessResult(opcode.result_operation, opcode.dst, result); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case Macro::Operation::Read: { | ||
| 115 | u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); | ||
| 116 | ProcessResult(opcode.result_operation, opcode.dst, result); | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | case Macro::Operation::Branch: { | ||
| 120 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | ||
| 121 | u32 value = GetRegister(opcode.src_a); | ||
| 122 | bool taken = EvaluateBranchCondition(opcode.branch_condition, value); | ||
| 123 | if (taken) { | ||
| 124 | // Ignore the delay slot if the branch has the annul bit. | ||
| 125 | if (opcode.branch_annul) { | ||
| 126 | pc = base_address + opcode.GetBranchTarget(); | ||
| 127 | return true; | ||
| 128 | } | ||
| 129 | |||
| 130 | delayed_pc = base_address + opcode.GetBranchTarget(); | ||
| 131 | // Execute one more instruction due to the delay slot. | ||
| 132 | return Step(true); | ||
| 133 | } | ||
| 134 | break; | ||
| 135 | } | ||
| 136 | default: | ||
| 137 | UNIMPLEMENTED_MSG("Unimplemented macro operation {}", | ||
| 138 | static_cast<u32>(opcode.operation.Value())); | ||
| 139 | } | ||
| 140 | |||
| 141 | // An instruction with the Exit flag will not actually | ||
| 142 | // cause an exit if it's executed inside a delay slot. | ||
| 143 | if (opcode.is_exit && !is_delay_slot) { | ||
| 144 | // Exit has a delay slot, execute the next instruction | ||
| 145 | Step(true); | ||
| 146 | return false; | ||
| 147 | } | ||
| 148 | |||
| 149 | return true; | ||
| 150 | } | ||
| 151 | |||
| 152 | u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { | ||
| 153 | switch (operation) { | ||
| 154 | case Macro::ALUOperation::Add: { | ||
| 155 | const u64 result{static_cast<u64>(src_a) + src_b}; | ||
| 156 | carry_flag = result > 0xffffffff; | ||
| 157 | return static_cast<u32>(result); | ||
| 158 | } | ||
| 159 | case Macro::ALUOperation::AddWithCarry: { | ||
| 160 | const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; | ||
| 161 | carry_flag = result > 0xffffffff; | ||
| 162 | return static_cast<u32>(result); | ||
| 163 | } | ||
| 164 | case Macro::ALUOperation::Subtract: { | ||
| 165 | const u64 result{static_cast<u64>(src_a) - src_b}; | ||
| 166 | carry_flag = result < 0x100000000; | ||
| 167 | return static_cast<u32>(result); | ||
| 168 | } | ||
| 169 | case Macro::ALUOperation::SubtractWithBorrow: { | ||
| 170 | const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; | ||
| 171 | carry_flag = result < 0x100000000; | ||
| 172 | return static_cast<u32>(result); | ||
| 173 | } | ||
| 174 | case Macro::ALUOperation::Xor: | ||
| 175 | return src_a ^ src_b; | ||
| 176 | case Macro::ALUOperation::Or: | ||
| 177 | return src_a | src_b; | ||
| 178 | case Macro::ALUOperation::And: | ||
| 179 | return src_a & src_b; | ||
| 180 | case Macro::ALUOperation::AndNot: | ||
| 181 | return src_a & ~src_b; | ||
| 182 | case Macro::ALUOperation::Nand: | ||
| 183 | return ~(src_a & src_b); | ||
| 184 | |||
| 185 | default: | ||
| 186 | UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", static_cast<u32>(operation)); | ||
| 187 | return 0; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { | ||
| 192 | switch (operation) { | ||
| 193 | case Macro::ResultOperation::IgnoreAndFetch: | ||
| 194 | // Fetch parameter and ignore result. | ||
| 195 | SetRegister(reg, FetchParameter()); | ||
| 196 | break; | ||
| 197 | case Macro::ResultOperation::Move: | ||
| 198 | // Move result. | ||
| 199 | SetRegister(reg, result); | ||
| 200 | break; | ||
| 201 | case Macro::ResultOperation::MoveAndSetMethod: | ||
| 202 | // Move result and use as Method Address. | ||
| 203 | SetRegister(reg, result); | ||
| 204 | SetMethodAddress(result); | ||
| 205 | break; | ||
| 206 | case Macro::ResultOperation::FetchAndSend: | ||
| 207 | // Fetch parameter and send result. | ||
| 208 | SetRegister(reg, FetchParameter()); | ||
| 209 | Send(result); | ||
| 210 | break; | ||
| 211 | case Macro::ResultOperation::MoveAndSend: | ||
| 212 | // Move and send result. | ||
| 213 | SetRegister(reg, result); | ||
| 214 | Send(result); | ||
| 215 | break; | ||
| 216 | case Macro::ResultOperation::FetchAndSetMethod: | ||
| 217 | // Fetch parameter and use result as Method Address. | ||
| 218 | SetRegister(reg, FetchParameter()); | ||
| 219 | SetMethodAddress(result); | ||
| 220 | break; | ||
| 221 | case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: | ||
| 222 | // Move result and use as Method Address, then fetch and send parameter. | ||
| 223 | SetRegister(reg, result); | ||
| 224 | SetMethodAddress(result); | ||
| 225 | Send(FetchParameter()); | ||
| 226 | break; | ||
| 227 | case Macro::ResultOperation::MoveAndSetMethodSend: | ||
| 228 | // Move result and use as Method Address, then send bits 12:17 of result. | ||
| 229 | SetRegister(reg, result); | ||
| 230 | SetMethodAddress(result); | ||
| 231 | Send((result >> 12) & 0b111111); | ||
| 232 | break; | ||
| 233 | default: | ||
| 234 | UNIMPLEMENTED_MSG("Unimplemented result operation {}", static_cast<u32>(operation)); | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { | ||
| 239 | switch (cond) { | ||
| 240 | case Macro::BranchCondition::Zero: | ||
| 241 | return value == 0; | ||
| 242 | case Macro::BranchCondition::NotZero: | ||
| 243 | return value != 0; | ||
| 244 | } | ||
| 245 | UNREACHABLE(); | ||
| 246 | return true; | ||
| 247 | } | ||
| 248 | |||
| 249 | Macro::Opcode MacroInterpreterImpl::GetOpcode() const { | ||
| 250 | ASSERT((pc % sizeof(u32)) == 0); | ||
| 251 | ASSERT(pc < code.size() * sizeof(u32)); | ||
| 252 | return {code[pc / sizeof(u32)]}; | ||
| 253 | } | ||
| 254 | |||
| 255 | u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { | ||
| 256 | return registers.at(register_id); | ||
| 257 | } | ||
| 258 | |||
| 259 | void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { | ||
| 260 | // Register 0 is hardwired as the zero register. | ||
| 261 | // Ensure no writes to it actually occur. | ||
| 262 | if (register_id == 0) { | ||
| 263 | return; | ||
| 264 | } | ||
| 265 | |||
| 266 | registers.at(register_id) = value; | ||
| 267 | } | ||
| 268 | |||
| 269 | void MacroInterpreterImpl::SetMethodAddress(u32 address) { | ||
| 270 | method_address.raw = address; | ||
| 271 | } | ||
| 272 | |||
| 273 | void MacroInterpreterImpl::Send(u32 value) { | ||
| 274 | maxwell3d.CallMethodFromMME(method_address.address, value); | ||
| 275 | // Increment the method address by the method increment. | ||
| 276 | method_address.address.Assign(method_address.address.Value() + | ||
| 277 | method_address.increment.Value()); | ||
| 278 | } | ||
| 279 | |||
| 280 | u32 MacroInterpreterImpl::Read(u32 method) const { | ||
| 281 | return maxwell3d.GetRegisterValue(method); | ||
| 282 | } | ||
| 283 | |||
| 284 | u32 MacroInterpreterImpl::FetchParameter() { | ||
| 285 | ASSERT(next_parameter_index < num_parameters); | ||
| 286 | return parameters[next_parameter_index++]; | ||
| 287 | } | ||
| 288 | |||
| 289 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h new file mode 100644 index 000000000..90217fc89 --- /dev/null +++ b/src/video_core/macro/macro_interpreter.h | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | #include <array> | ||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/macro/macro.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | namespace Engines { | ||
| 15 | class Maxwell3D; | ||
| 16 | } | ||
| 17 | |||
| 18 | class MacroInterpreter final : public MacroEngine { | ||
| 19 | public: | ||
| 20 | explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); | ||
| 21 | |||
| 22 | protected: | ||
| 23 | std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; | ||
| 24 | |||
| 25 | private: | ||
| 26 | Engines::Maxwell3D& maxwell3d; | ||
| 27 | }; | ||
| 28 | |||
| 29 | class MacroInterpreterImpl : public CachedMacro { | ||
| 30 | public: | ||
| 31 | MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); | ||
| 32 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 33 | |||
| 34 | private: | ||
| 35 | /// Resets the execution engine state, zeroing registers, etc. | ||
| 36 | void Reset(); | ||
| 37 | |||
| 38 | /** | ||
| 39 | * Executes a single macro instruction located at the current program counter. Returns whether | ||
| 40 | * the interpreter should keep running. | ||
| 41 | * @param offset Offset to start execution at. | ||
| 42 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | ||
| 43 | * previous instruction. | ||
| 44 | */ | ||
| 45 | bool Step(bool is_delay_slot); | ||
| 46 | |||
| 47 | /// Calculates the result of an ALU operation. src_a OP src_b; | ||
| 48 | u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); | ||
| 49 | |||
| 50 | /// Performs the result operation on the input result and stores it in the specified register | ||
| 51 | /// (if necessary). | ||
| 52 | void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); | ||
| 53 | |||
| 54 | /// Evaluates the branch condition and returns whether the branch should be taken or not. | ||
| 55 | bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; | ||
| 56 | |||
| 57 | /// Reads an opcode at the current program counter location. | ||
| 58 | Macro::Opcode GetOpcode() const; | ||
| 59 | |||
| 60 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | ||
| 61 | u32 GetRegister(u32 register_id) const; | ||
| 62 | |||
| 63 | /// Sets the register to the input value. | ||
| 64 | void SetRegister(u32 register_id, u32 value); | ||
| 65 | |||
| 66 | /// Sets the method address to use for the next Send instruction. | ||
| 67 | void SetMethodAddress(u32 address); | ||
| 68 | |||
| 69 | /// Calls a GPU Engine method with the input parameter. | ||
| 70 | void Send(u32 value); | ||
| 71 | |||
| 72 | /// Reads a GPU register located at the method address. | ||
| 73 | u32 Read(u32 method) const; | ||
| 74 | |||
| 75 | /// Returns the next parameter in the parameter queue. | ||
| 76 | u32 FetchParameter(); | ||
| 77 | |||
| 78 | Engines::Maxwell3D& maxwell3d; | ||
| 79 | |||
| 80 | /// Current program counter | ||
| 81 | u32 pc; | ||
| 82 | /// Program counter to execute at after the delay slot is executed. | ||
| 83 | std::optional<u32> delayed_pc; | ||
| 84 | |||
| 85 | /// General purpose macro registers. | ||
| 86 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; | ||
| 87 | |||
| 88 | /// Method address to use for the next Send instruction. | ||
| 89 | Macro::MethodAddress method_address = {}; | ||
| 90 | |||
| 91 | /// Input parameters of the current macro. | ||
| 92 | std::unique_ptr<u32[]> parameters; | ||
| 93 | std::size_t num_parameters = 0; | ||
| 94 | std::size_t parameters_capacity = 0; | ||
| 95 | /// Index of the next parameter that will be fetched by the 'parm' instruction. | ||
| 96 | u32 next_parameter_index = 0; | ||
| 97 | |||
| 98 | bool carry_flag = false; | ||
| 99 | const std::vector<u32>& code; | ||
| 100 | }; | ||
| 101 | |||
| 102 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp new file mode 100644 index 000000000..07292702f --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -0,0 +1,621 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "common/microprofile.h" | ||
| 8 | #include "common/x64/xbyak_util.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/macro/macro_interpreter.h" | ||
| 11 | #include "video_core/macro/macro_jit_x64.h" | ||
| 12 | |||
| 13 | MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47)); | ||
| 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | static const Xbyak::Reg64 STATE = Xbyak::util::rbx; | ||
| 18 | static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; | ||
| 19 | static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; | ||
| 20 | static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | ||
| 21 | static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | ||
| 22 | |||
| 23 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | ||
| 24 | STATE, | ||
| 25 | RESULT, | ||
| 26 | PARAMETERS, | ||
| 27 | METHOD_ADDRESS, | ||
| 28 | BRANCH_HOLDER, | ||
| 29 | }); | ||
| 30 | |||
| 31 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) | ||
| 32 | : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} | ||
| 33 | |||
| 34 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||
| 35 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||
| 36 | } | ||
| 37 | |||
| 38 | MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code) | ||
| 39 | : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { | ||
| 40 | Compile(); | ||
| 41 | } | ||
| 42 | |||
| 43 | MacroJITx64Impl::~MacroJITx64Impl() = default; | ||
| 44 | |||
| 45 | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | ||
| 46 | MICROPROFILE_SCOPE(MacroJitExecute); | ||
| 47 | ASSERT_OR_EXECUTE(program != nullptr, { return; }); | ||
| 48 | JITState state{}; | ||
| 49 | state.maxwell3d = &maxwell3d; | ||
| 50 | state.registers = {}; | ||
| 51 | program(&state, parameters.data()); | ||
| 52 | } | ||
| 53 | |||
| 54 | void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { | ||
| 55 | const bool is_a_zero = opcode.src_a == 0; | ||
| 56 | const bool is_b_zero = opcode.src_b == 0; | ||
| 57 | const bool valid_operation = !is_a_zero && !is_b_zero; | ||
| 58 | [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; | ||
| 59 | const bool has_zero_register = is_a_zero || is_b_zero; | ||
| 60 | const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || | ||
| 61 | opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; | ||
| 62 | |||
| 63 | Xbyak::Reg32 src_a; | ||
| 64 | Xbyak::Reg32 src_b; | ||
| 65 | |||
| 66 | if (!optimizer.zero_reg_skip || no_zero_reg_skip) { | ||
| 67 | src_a = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 68 | src_b = Compile_GetRegister(opcode.src_b, eax); | ||
| 69 | } else { | ||
| 70 | if (!is_a_zero) { | ||
| 71 | src_a = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 72 | } | ||
| 73 | if (!is_b_zero) { | ||
| 74 | src_b = Compile_GetRegister(opcode.src_b, eax); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | bool has_emitted = false; | ||
| 79 | |||
| 80 | switch (opcode.alu_operation) { | ||
| 81 | case Macro::ALUOperation::Add: | ||
| 82 | if (optimizer.zero_reg_skip) { | ||
| 83 | if (valid_operation) { | ||
| 84 | add(src_a, src_b); | ||
| 85 | } | ||
| 86 | } else { | ||
| 87 | add(src_a, src_b); | ||
| 88 | } | ||
| 89 | |||
| 90 | if (!optimizer.can_skip_carry) { | ||
| 91 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 92 | } | ||
| 93 | break; | ||
| 94 | case Macro::ALUOperation::AddWithCarry: | ||
| 95 | bt(dword[STATE + offsetof(JITState, carry_flag)], 0); | ||
| 96 | adc(src_a, src_b); | ||
| 97 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 98 | break; | ||
| 99 | case Macro::ALUOperation::Subtract: | ||
| 100 | if (optimizer.zero_reg_skip) { | ||
| 101 | if (valid_operation) { | ||
| 102 | sub(src_a, src_b); | ||
| 103 | has_emitted = true; | ||
| 104 | } | ||
| 105 | } else { | ||
| 106 | sub(src_a, src_b); | ||
| 107 | has_emitted = true; | ||
| 108 | } | ||
| 109 | if (!optimizer.can_skip_carry && has_emitted) { | ||
| 110 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 111 | } | ||
| 112 | break; | ||
| 113 | case Macro::ALUOperation::SubtractWithBorrow: | ||
| 114 | bt(dword[STATE + offsetof(JITState, carry_flag)], 0); | ||
| 115 | sbb(src_a, src_b); | ||
| 116 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 117 | break; | ||
| 118 | case Macro::ALUOperation::Xor: | ||
| 119 | if (optimizer.zero_reg_skip) { | ||
| 120 | if (valid_operation) { | ||
| 121 | xor_(src_a, src_b); | ||
| 122 | } | ||
| 123 | } else { | ||
| 124 | xor_(src_a, src_b); | ||
| 125 | } | ||
| 126 | break; | ||
| 127 | case Macro::ALUOperation::Or: | ||
| 128 | if (optimizer.zero_reg_skip) { | ||
| 129 | if (valid_operation) { | ||
| 130 | or_(src_a, src_b); | ||
| 131 | } | ||
| 132 | } else { | ||
| 133 | or_(src_a, src_b); | ||
| 134 | } | ||
| 135 | break; | ||
| 136 | case Macro::ALUOperation::And: | ||
| 137 | if (optimizer.zero_reg_skip) { | ||
| 138 | if (!has_zero_register) { | ||
| 139 | and_(src_a, src_b); | ||
| 140 | } | ||
| 141 | } else { | ||
| 142 | and_(src_a, src_b); | ||
| 143 | } | ||
| 144 | break; | ||
| 145 | case Macro::ALUOperation::AndNot: | ||
| 146 | if (optimizer.zero_reg_skip) { | ||
| 147 | if (!is_a_zero) { | ||
| 148 | not_(src_b); | ||
| 149 | and_(src_a, src_b); | ||
| 150 | } | ||
| 151 | } else { | ||
| 152 | not_(src_b); | ||
| 153 | and_(src_a, src_b); | ||
| 154 | } | ||
| 155 | break; | ||
| 156 | case Macro::ALUOperation::Nand: | ||
| 157 | if (optimizer.zero_reg_skip) { | ||
| 158 | if (!is_a_zero) { | ||
| 159 | and_(src_a, src_b); | ||
| 160 | not_(src_a); | ||
| 161 | } | ||
| 162 | } else { | ||
| 163 | and_(src_a, src_b); | ||
| 164 | not_(src_a); | ||
| 165 | } | ||
| 166 | break; | ||
| 167 | default: | ||
| 168 | UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", | ||
| 169 | static_cast<std::size_t>(opcode.alu_operation.Value())); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 173 | } | ||
| 174 | |||
| 175 | void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { | ||
| 176 | if (optimizer.skip_dummy_addimmediate) { | ||
| 177 | // Games tend to use this as an exit instruction placeholder. It's to encode an instruction | ||
| 178 | // without doing anything. In our case we can just not emit anything. | ||
| 179 | if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { | ||
| 180 | return; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | // Check for redundant moves | ||
| 184 | if (optimizer.optimize_for_method_move && | ||
| 185 | opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { | ||
| 186 | if (next_opcode.has_value()) { | ||
| 187 | const auto next = *next_opcode; | ||
| 188 | if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && | ||
| 189 | opcode.dst == next.dst) { | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | } | ||
| 194 | if (optimizer.zero_reg_skip && opcode.src_a == 0) { | ||
| 195 | if (opcode.immediate == 0) { | ||
| 196 | xor_(RESULT, RESULT); | ||
| 197 | } else { | ||
| 198 | mov(RESULT, opcode.immediate); | ||
| 199 | } | ||
| 200 | } else { | ||
| 201 | auto result = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 202 | if (opcode.immediate > 2) { | ||
| 203 | add(result, opcode.immediate); | ||
| 204 | } else if (opcode.immediate == 1) { | ||
| 205 | inc(result); | ||
| 206 | } else if (opcode.immediate < 0) { | ||
| 207 | sub(result, opcode.immediate * -1); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 211 | } | ||
| 212 | |||
| 213 | void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { | ||
| 214 | auto dst = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 215 | auto src = Compile_GetRegister(opcode.src_b, eax); | ||
| 216 | |||
| 217 | if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { | ||
| 218 | shr(src, opcode.bf_src_bit); | ||
| 219 | } else if (opcode.bf_src_bit == 31) { | ||
| 220 | xor_(src, src); | ||
| 221 | } | ||
| 222 | // Don't bother masking the whole register since we're using a 32 bit register | ||
| 223 | if (opcode.bf_size != 31 && opcode.bf_size != 0) { | ||
| 224 | and_(src, opcode.GetBitfieldMask()); | ||
| 225 | } else if (opcode.bf_size == 0) { | ||
| 226 | xor_(src, src); | ||
| 227 | } | ||
| 228 | if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { | ||
| 229 | shl(src, opcode.bf_dst_bit); | ||
| 230 | } else if (opcode.bf_dst_bit == 31) { | ||
| 231 | xor_(src, src); | ||
| 232 | } | ||
| 233 | |||
| 234 | const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); | ||
| 235 | if (mask != 0xffffffff) { | ||
| 236 | and_(dst, mask); | ||
| 237 | } | ||
| 238 | or_(dst, src); | ||
| 239 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 240 | } | ||
| 241 | |||
| 242 | void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { | ||
| 243 | const auto dst = Compile_GetRegister(opcode.src_a, ecx); | ||
| 244 | const auto src = Compile_GetRegister(opcode.src_b, RESULT); | ||
| 245 | |||
| 246 | shr(src, dst.cvt8()); | ||
| 247 | if (opcode.bf_size != 0 && opcode.bf_size != 31) { | ||
| 248 | and_(src, opcode.GetBitfieldMask()); | ||
| 249 | } else if (opcode.bf_size == 0) { | ||
| 250 | xor_(src, src); | ||
| 251 | } | ||
| 252 | |||
| 253 | if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { | ||
| 254 | shl(src, opcode.bf_dst_bit); | ||
| 255 | } else if (opcode.bf_dst_bit == 31) { | ||
| 256 | xor_(src, src); | ||
| 257 | } | ||
| 258 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 259 | } | ||
| 260 | |||
| 261 | void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { | ||
| 262 | const auto dst = Compile_GetRegister(opcode.src_a, ecx); | ||
| 263 | const auto src = Compile_GetRegister(opcode.src_b, RESULT); | ||
| 264 | |||
| 265 | if (opcode.bf_src_bit != 0) { | ||
| 266 | shr(src, opcode.bf_src_bit); | ||
| 267 | } | ||
| 268 | |||
| 269 | if (opcode.bf_size != 31) { | ||
| 270 | and_(src, opcode.GetBitfieldMask()); | ||
| 271 | } | ||
| 272 | shl(src, dst.cvt8()); | ||
| 273 | |||
| 274 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 275 | } | ||
| 276 | |||
| 277 | void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | ||
| 278 | if (optimizer.zero_reg_skip && opcode.src_a == 0) { | ||
| 279 | if (opcode.immediate == 0) { | ||
| 280 | xor_(RESULT, RESULT); | ||
| 281 | } else { | ||
| 282 | mov(RESULT, opcode.immediate); | ||
| 283 | } | ||
| 284 | } else { | ||
| 285 | auto result = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 286 | if (opcode.immediate > 2) { | ||
| 287 | add(result, opcode.immediate); | ||
| 288 | } else if (opcode.immediate == 1) { | ||
| 289 | inc(result); | ||
| 290 | } else if (opcode.immediate < 0) { | ||
| 291 | sub(result, opcode.immediate * -1); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | // Equivalent to Engines::Maxwell3D::GetRegisterValue: | ||
| 296 | if (optimizer.enable_asserts) { | ||
| 297 | Xbyak::Label pass_range_check; | ||
| 298 | cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS)); | ||
| 299 | jb(pass_range_check); | ||
| 300 | int3(); | ||
| 301 | L(pass_range_check); | ||
| 302 | } | ||
| 303 | mov(rax, qword[STATE]); | ||
| 304 | mov(RESULT, | ||
| 305 | dword[rax + offsetof(Engines::Maxwell3D, regs) + | ||
| 306 | offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); | ||
| 307 | |||
| 308 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 309 | } | ||
| 310 | |||
| 311 | static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | ||
| 312 | maxwell3d->CallMethodFromMME(method_address.address, value); | ||
| 313 | } | ||
| 314 | |||
| 315 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | ||
| 316 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 317 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | ||
| 318 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | ||
| 319 | mov(Common::X64::ABI_PARAM3, value); | ||
| 320 | Common::X64::CallFarFunction(*this, &Send); | ||
| 321 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||
| 322 | |||
| 323 | Xbyak::Label dont_process{}; | ||
| 324 | // Get increment | ||
| 325 | test(METHOD_ADDRESS, 0x3f000); | ||
| 326 | // If zero, method address doesn't update | ||
| 327 | je(dont_process); | ||
| 328 | |||
| 329 | mov(ecx, METHOD_ADDRESS); | ||
| 330 | and_(METHOD_ADDRESS, 0xfff); | ||
| 331 | shr(ecx, 12); | ||
| 332 | and_(ecx, 0x3f); | ||
| 333 | lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); | ||
| 334 | sal(ecx, 12); | ||
| 335 | or_(eax, ecx); | ||
| 336 | |||
| 337 | mov(METHOD_ADDRESS, eax); | ||
| 338 | |||
| 339 | L(dont_process); | ||
| 340 | } | ||
| 341 | |||
| 342 | void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | ||
| 343 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | ||
| 344 | const s32 jump_address = | ||
| 345 | static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); | ||
| 346 | |||
| 347 | Xbyak::Label end; | ||
| 348 | auto value = Compile_GetRegister(opcode.src_a, eax); | ||
| 349 | test(value, value); | ||
| 350 | if (optimizer.has_delayed_pc) { | ||
| 351 | switch (opcode.branch_condition) { | ||
| 352 | case Macro::BranchCondition::Zero: | ||
| 353 | jne(end, T_NEAR); | ||
| 354 | break; | ||
| 355 | case Macro::BranchCondition::NotZero: | ||
| 356 | je(end, T_NEAR); | ||
| 357 | break; | ||
| 358 | } | ||
| 359 | |||
| 360 | if (opcode.branch_annul) { | ||
| 361 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 362 | jmp(labels[jump_address], T_NEAR); | ||
| 363 | } else { | ||
| 364 | Xbyak::Label handle_post_exit{}; | ||
| 365 | Xbyak::Label skip{}; | ||
| 366 | jmp(skip, T_NEAR); | ||
| 367 | if (opcode.is_exit) { | ||
| 368 | L(handle_post_exit); | ||
| 369 | // Execute 1 instruction | ||
| 370 | mov(BRANCH_HOLDER, end_of_code); | ||
| 371 | // Jump to next instruction to skip delay slot check | ||
| 372 | jmp(labels[jump_address], T_NEAR); | ||
| 373 | } else { | ||
| 374 | L(handle_post_exit); | ||
| 375 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 376 | jmp(labels[jump_address], T_NEAR); | ||
| 377 | } | ||
| 378 | L(skip); | ||
| 379 | mov(BRANCH_HOLDER, handle_post_exit); | ||
| 380 | jmp(delay_skip[pc], T_NEAR); | ||
| 381 | } | ||
| 382 | } else { | ||
| 383 | switch (opcode.branch_condition) { | ||
| 384 | case Macro::BranchCondition::Zero: | ||
| 385 | je(labels[jump_address], T_NEAR); | ||
| 386 | break; | ||
| 387 | case Macro::BranchCondition::NotZero: | ||
| 388 | jne(labels[jump_address], T_NEAR); | ||
| 389 | break; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | L(end); | ||
| 394 | } | ||
| 395 | |||
| 396 | void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { | ||
| 397 | optimizer.can_skip_carry = true; | ||
| 398 | optimizer.has_delayed_pc = false; | ||
| 399 | for (auto raw_op : code) { | ||
| 400 | Macro::Opcode op{}; | ||
| 401 | op.raw = raw_op; | ||
| 402 | |||
| 403 | if (op.operation == Macro::Operation::ALU) { | ||
| 404 | // Scan for any ALU operations which actually use the carry flag, if they don't exist in | ||
| 405 | // our current code we can skip emitting the carry flag handling operations | ||
| 406 | if (op.alu_operation == Macro::ALUOperation::AddWithCarry || | ||
| 407 | op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { | ||
| 408 | optimizer.can_skip_carry = false; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | if (op.operation == Macro::Operation::Branch) { | ||
| 413 | if (!op.branch_annul) { | ||
| 414 | optimizer.has_delayed_pc = true; | ||
| 415 | } | ||
| 416 | } | ||
| 417 | } | ||
| 418 | } | ||
| 419 | |||
| 420 | void MacroJITx64Impl::Compile() { | ||
| 421 | MICROPROFILE_SCOPE(MacroJitCompile); | ||
| 422 | bool keep_executing = true; | ||
| 423 | labels.fill(Xbyak::Label()); | ||
| 424 | |||
| 425 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | ||
| 426 | // JIT state | ||
| 427 | mov(STATE, Common::X64::ABI_PARAM1); | ||
| 428 | mov(PARAMETERS, Common::X64::ABI_PARAM2); | ||
| 429 | xor_(RESULT, RESULT); | ||
| 430 | xor_(METHOD_ADDRESS, METHOD_ADDRESS); | ||
| 431 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 432 | |||
| 433 | mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); | ||
| 434 | |||
| 435 | // Track get register for zero registers and mark it as no-op | ||
| 436 | optimizer.zero_reg_skip = true; | ||
| 437 | |||
| 438 | // AddImmediate tends to be used as a NOP instruction, if we detect this we can | ||
| 439 | // completely skip the entire code path and no emit anything | ||
| 440 | optimizer.skip_dummy_addimmediate = true; | ||
| 441 | |||
| 442 | // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting | ||
| 443 | // one if our register isn't "dirty" | ||
| 444 | optimizer.optimize_for_method_move = true; | ||
| 445 | |||
| 446 | // Enable run-time assertions in JITted code | ||
| 447 | optimizer.enable_asserts = false; | ||
| 448 | |||
| 449 | // Check to see if we can skip emitting certain instructions | ||
| 450 | Optimizer_ScanFlags(); | ||
| 451 | |||
| 452 | const u32 op_count = static_cast<u32>(code.size()); | ||
| 453 | for (u32 i = 0; i < op_count; i++) { | ||
| 454 | if (i < op_count - 1) { | ||
| 455 | pc = i + 1; | ||
| 456 | next_opcode = GetOpCode(); | ||
| 457 | } else { | ||
| 458 | next_opcode = {}; | ||
| 459 | } | ||
| 460 | pc = i; | ||
| 461 | Compile_NextInstruction(); | ||
| 462 | } | ||
| 463 | |||
| 464 | L(end_of_code); | ||
| 465 | |||
| 466 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | ||
| 467 | ret(); | ||
| 468 | ready(); | ||
| 469 | program = getCode<ProgramType>(); | ||
| 470 | } | ||
| 471 | |||
| 472 | bool MacroJITx64Impl::Compile_NextInstruction() { | ||
| 473 | const auto opcode = GetOpCode(); | ||
| 474 | if (labels[pc].getAddress()) { | ||
| 475 | return false; | ||
| 476 | } | ||
| 477 | |||
| 478 | L(labels[pc]); | ||
| 479 | |||
| 480 | switch (opcode.operation) { | ||
| 481 | case Macro::Operation::ALU: | ||
| 482 | Compile_ALU(opcode); | ||
| 483 | break; | ||
| 484 | case Macro::Operation::AddImmediate: | ||
| 485 | Compile_AddImmediate(opcode); | ||
| 486 | break; | ||
| 487 | case Macro::Operation::ExtractInsert: | ||
| 488 | Compile_ExtractInsert(opcode); | ||
| 489 | break; | ||
| 490 | case Macro::Operation::ExtractShiftLeftImmediate: | ||
| 491 | Compile_ExtractShiftLeftImmediate(opcode); | ||
| 492 | break; | ||
| 493 | case Macro::Operation::ExtractShiftLeftRegister: | ||
| 494 | Compile_ExtractShiftLeftRegister(opcode); | ||
| 495 | break; | ||
| 496 | case Macro::Operation::Read: | ||
| 497 | Compile_Read(opcode); | ||
| 498 | break; | ||
| 499 | case Macro::Operation::Branch: | ||
| 500 | Compile_Branch(opcode); | ||
| 501 | break; | ||
| 502 | default: | ||
| 503 | UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); | ||
| 504 | break; | ||
| 505 | } | ||
| 506 | |||
| 507 | if (optimizer.has_delayed_pc) { | ||
| 508 | if (opcode.is_exit) { | ||
| 509 | mov(rax, end_of_code); | ||
| 510 | test(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 511 | cmove(BRANCH_HOLDER, rax); | ||
| 512 | // Jump to next instruction to skip delay slot check | ||
| 513 | je(labels[pc + 1], T_NEAR); | ||
| 514 | } else { | ||
| 515 | // TODO(ogniK): Optimize delay slot branching | ||
| 516 | Xbyak::Label no_delay_slot{}; | ||
| 517 | test(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 518 | je(no_delay_slot, T_NEAR); | ||
| 519 | mov(rax, BRANCH_HOLDER); | ||
| 520 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 521 | jmp(rax); | ||
| 522 | L(no_delay_slot); | ||
| 523 | } | ||
| 524 | L(delay_skip[pc]); | ||
| 525 | if (opcode.is_exit) { | ||
| 526 | return false; | ||
| 527 | } | ||
| 528 | } else { | ||
| 529 | test(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 530 | jne(end_of_code, T_NEAR); | ||
| 531 | if (opcode.is_exit) { | ||
| 532 | inc(BRANCH_HOLDER); | ||
| 533 | return false; | ||
| 534 | } | ||
| 535 | } | ||
| 536 | return true; | ||
| 537 | } | ||
| 538 | |||
| 539 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | ||
| 540 | mov(eax, dword[PARAMETERS]); | ||
| 541 | add(PARAMETERS, sizeof(u32)); | ||
| 542 | return eax; | ||
| 543 | } | ||
| 544 | |||
| 545 | Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { | ||
| 546 | if (index == 0) { | ||
| 547 | // Register 0 is always zero | ||
| 548 | xor_(dst, dst); | ||
| 549 | } else { | ||
| 550 | mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); | ||
| 551 | } | ||
| 552 | |||
| 553 | return dst; | ||
| 554 | } | ||
| 555 | |||
| 556 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { | ||
| 557 | const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { | ||
| 558 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero | ||
| 559 | // register. | ||
| 560 | if (reg == 0) { | ||
| 561 | return; | ||
| 562 | } | ||
| 563 | mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); | ||
| 564 | }; | ||
| 565 | const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; | ||
| 566 | |||
| 567 | switch (operation) { | ||
| 568 | case Macro::ResultOperation::IgnoreAndFetch: | ||
| 569 | SetRegister(reg, Compile_FetchParameter()); | ||
| 570 | break; | ||
| 571 | case Macro::ResultOperation::Move: | ||
| 572 | SetRegister(reg, RESULT); | ||
| 573 | break; | ||
| 574 | case Macro::ResultOperation::MoveAndSetMethod: | ||
| 575 | SetRegister(reg, RESULT); | ||
| 576 | SetMethodAddress(RESULT); | ||
| 577 | break; | ||
| 578 | case Macro::ResultOperation::FetchAndSend: | ||
| 579 | // Fetch parameter and send result. | ||
| 580 | SetRegister(reg, Compile_FetchParameter()); | ||
| 581 | Compile_Send(RESULT); | ||
| 582 | break; | ||
| 583 | case Macro::ResultOperation::MoveAndSend: | ||
| 584 | // Move and send result. | ||
| 585 | SetRegister(reg, RESULT); | ||
| 586 | Compile_Send(RESULT); | ||
| 587 | break; | ||
| 588 | case Macro::ResultOperation::FetchAndSetMethod: | ||
| 589 | // Fetch parameter and use result as Method Address. | ||
| 590 | SetRegister(reg, Compile_FetchParameter()); | ||
| 591 | SetMethodAddress(RESULT); | ||
| 592 | break; | ||
| 593 | case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: | ||
| 594 | // Move result and use as Method Address, then fetch and send parameter. | ||
| 595 | SetRegister(reg, RESULT); | ||
| 596 | SetMethodAddress(RESULT); | ||
| 597 | Compile_Send(Compile_FetchParameter()); | ||
| 598 | break; | ||
| 599 | case Macro::ResultOperation::MoveAndSetMethodSend: | ||
| 600 | // Move result and use as Method Address, then send bits 12:17 of result. | ||
| 601 | SetRegister(reg, RESULT); | ||
| 602 | SetMethodAddress(RESULT); | ||
| 603 | shr(RESULT, 12); | ||
| 604 | and_(RESULT, 0b111111); | ||
| 605 | Compile_Send(RESULT); | ||
| 606 | break; | ||
| 607 | default: | ||
| 608 | UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation)); | ||
| 609 | } | ||
| 610 | } | ||
| 611 | |||
| 612 | Macro::Opcode MacroJITx64Impl::GetOpCode() const { | ||
| 613 | ASSERT(pc < code.size()); | ||
| 614 | return {code[pc]}; | ||
| 615 | } | ||
| 616 | |||
| 617 | std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { | ||
| 618 | return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; | ||
| 619 | } | ||
| 620 | |||
| 621 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h new file mode 100644 index 000000000..a180e7428 --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.h | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <xbyak.h> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/x64/xbyak_abi.h" | ||
| 13 | #include "video_core/macro/macro.h" | ||
| 14 | |||
| 15 | namespace Tegra { | ||
| 16 | |||
| 17 | namespace Engines { | ||
| 18 | class Maxwell3D; | ||
| 19 | } | ||
| 20 | |||
| 21 | /// MAX_CODE_SIZE is arbitrarily chosen based on current booting games | ||
| 22 | constexpr size_t MAX_CODE_SIZE = 0x10000; | ||
| 23 | |||
| 24 | class MacroJITx64 final : public MacroEngine { | ||
| 25 | public: | ||
| 26 | explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); | ||
| 27 | |||
| 28 | protected: | ||
| 29 | std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; | ||
| 30 | |||
| 31 | private: | ||
| 32 | Engines::Maxwell3D& maxwell3d; | ||
| 33 | }; | ||
| 34 | |||
| 35 | class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { | ||
| 36 | public: | ||
| 37 | MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); | ||
| 38 | ~MacroJITx64Impl(); | ||
| 39 | |||
| 40 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 41 | |||
| 42 | void Compile_ALU(Macro::Opcode opcode); | ||
| 43 | void Compile_AddImmediate(Macro::Opcode opcode); | ||
| 44 | void Compile_ExtractInsert(Macro::Opcode opcode); | ||
| 45 | void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||
| 46 | void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||
| 47 | void Compile_Read(Macro::Opcode opcode); | ||
| 48 | void Compile_Branch(Macro::Opcode opcode); | ||
| 49 | |||
| 50 | private: | ||
| 51 | void Optimizer_ScanFlags(); | ||
| 52 | |||
| 53 | void Compile(); | ||
| 54 | bool Compile_NextInstruction(); | ||
| 55 | |||
| 56 | Xbyak::Reg32 Compile_FetchParameter(); | ||
| 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||
| 58 | |||
| 59 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||
| 60 | void Compile_Send(Xbyak::Reg32 value); | ||
| 61 | |||
| 62 | Macro::Opcode GetOpCode() const; | ||
| 63 | std::bitset<32> PersistentCallerSavedRegs() const; | ||
| 64 | |||
| 65 | struct JITState { | ||
| 66 | Engines::Maxwell3D* maxwell3d{}; | ||
| 67 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||
| 68 | u32 carry_flag{}; | ||
| 69 | }; | ||
| 70 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||
| 71 | using ProgramType = void (*)(JITState*, const u32*); | ||
| 72 | |||
| 73 | struct OptimizerState { | ||
| 74 | bool can_skip_carry{}; | ||
| 75 | bool has_delayed_pc{}; | ||
| 76 | bool zero_reg_skip{}; | ||
| 77 | bool skip_dummy_addimmediate{}; | ||
| 78 | bool optimize_for_method_move{}; | ||
| 79 | bool enable_asserts{}; | ||
| 80 | }; | ||
| 81 | OptimizerState optimizer{}; | ||
| 82 | |||
| 83 | std::optional<Macro::Opcode> next_opcode{}; | ||
| 84 | ProgramType program{nullptr}; | ||
| 85 | |||
| 86 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||
| 87 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||
| 88 | Xbyak::Label end_of_code{}; | ||
| 89 | |||
| 90 | bool is_delay_slot{}; | ||
| 91 | u32 pc{}; | ||
| 92 | std::optional<u32> delayed_pc; | ||
| 93 | |||
| 94 | const std::vector<u32>& code; | ||
| 95 | Engines::Maxwell3D& maxwell3d; | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace Tegra | ||