diff options
| author | 2022-01-26 12:39:59 -0800 | |
|---|---|---|
| committer | 2022-01-26 12:39:59 -0800 | |
| commit | 40050c1188ecae2688488e8109b708e04295b480 (patch) | |
| tree | 115f40e763afef8b376874150d536c23f12bd02e | |
| parent | Merge pull request #7769 from german77/no-control (diff) | |
| parent | video_core/macro: Add missing <cstring> header (diff) | |
| download | yuzu-40050c1188ecae2688488e8109b708e04295b480.tar.gz yuzu-40050c1188ecae2688488e8109b708e04295b480.tar.xz yuzu-40050c1188ecae2688488e8109b708e04295b480.zip | |
Merge pull request #7780 from lioncash/macro
video_core/macro: Move impl classes into their cpp files
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/macro/macro.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/macro/macro.h | 2 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.cpp | 34 | ||||
| -rw-r--r-- | src/video_core/macro/macro_hle.h | 21 | ||||
| -rw-r--r-- | src/video_core/macro/macro_interpreter.cpp | 92 | ||||
| -rw-r--r-- | src/video_core/macro/macro_interpreter.h | 78 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 71 |
9 files changed, 204 insertions, 213 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b18b8a02a..c38ebd670 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) | |||
| 240 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); | 240 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); |
| 241 | 241 | ||
| 242 | // Execute the current macro. | 242 | // Execute the current macro. |
| 243 | macro_engine->Execute(*this, macro_positions[entry], parameters); | 243 | macro_engine->Execute(macro_positions[entry], parameters); |
| 244 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { | 244 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { |
| 245 | FlushMMEInlineDraw(); | 245 | FlushMMEInlineDraw(); |
| 246 | } | 246 | } |
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index d7fabe605..0aeda4ce8 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp | |||
| @@ -2,12 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 5 | #include <optional> | 6 | #include <optional> |
| 7 | |||
| 6 | #include <boost/container_hash/hash.hpp> | 8 | #include <boost/container_hash/hash.hpp> |
| 9 | |||
| 7 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | ||
| 9 | #include "common/settings.h" | 11 | #include "common/settings.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/macro/macro.h" | 12 | #include "video_core/macro/macro.h" |
| 12 | #include "video_core/macro/macro_hle.h" | 13 | #include "video_core/macro/macro_hle.h" |
| 13 | #include "video_core/macro/macro_interpreter.h" | 14 | #include "video_core/macro/macro_interpreter.h" |
| @@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { | |||
| 24 | uploaded_macro_code[method].push_back(data); | 25 | uploaded_macro_code[method].push_back(data); |
| 25 | } | 26 | } |
| 26 | 27 | ||
| 27 | void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, | 28 | void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { |
| 28 | const std::vector<u32>& parameters) { | ||
| 29 | auto compiled_macro = macro_cache.find(method); | 29 | auto compiled_macro = macro_cache.find(method); |
| 30 | if (compiled_macro != macro_cache.end()) { | 30 | if (compiled_macro != macro_cache.end()) { |
| 31 | const auto& cache_info = compiled_macro->second; | 31 | const auto& cache_info = compiled_macro->second; |
| @@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, | |||
| 66 | cache_info.lle_program = Compile(code); | 66 | cache_info.lle_program = Compile(code); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); | 69 | if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { |
| 70 | if (hle_program.has_value()) { | ||
| 71 | cache_info.has_hle_program = true; | 70 | cache_info.has_hle_program = true; |
| 72 | cache_info.hle_program = std::move(hle_program.value()); | 71 | cache_info.hle_program = std::move(hle_program); |
| 73 | cache_info.hle_program->Execute(parameters, method); | 72 | cache_info.hle_program->Execute(parameters, method); |
| 74 | } else { | 73 | } else { |
| 75 | cache_info.lle_program->Execute(parameters, method); | 74 | cache_info.lle_program->Execute(parameters, method); |
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index 31ee3440a..7aaa49286 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h | |||
| @@ -119,7 +119,7 @@ public: | |||
| 119 | void AddCode(u32 method, u32 data); | 119 | void AddCode(u32 method, u32 data); |
| 120 | 120 | ||
| 121 | // Compiles the macro if its not in the cache, and executes the compiled macro | 121 | // Compiles the macro if its not in the cache, and executes the compiled macro |
| 122 | void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); | 122 | void Execute(u32 method, const std::vector<u32>& parameters); |
| 123 | 123 | ||
| 124 | protected: | 124 | protected: |
| 125 | virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; | 125 | virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; |
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 70ac7c620..900ad23c9 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -5,12 +5,15 @@ | |||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include "video_core/engines/maxwell_3d.h" | 7 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/macro/macro.h" | ||
| 8 | #include "video_core/macro/macro_hle.h" | 9 | #include "video_core/macro/macro_hle.h" |
| 9 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 10 | 11 | ||
| 11 | namespace Tegra { | 12 | namespace Tegra { |
| 12 | |||
| 13 | namespace { | 13 | namespace { |
| 14 | |||
| 15 | using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); | ||
| 16 | |||
| 14 | // HLE'd functions | 17 | // HLE'd functions |
| 15 | void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | 18 | void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { |
| 16 | const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); | 19 | const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); |
| @@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||
| 77 | maxwell3d.CallMethodFromMME(0x8e5, 0x0); | 80 | maxwell3d.CallMethodFromMME(0x8e5, 0x0); |
| 78 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | 81 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; |
| 79 | } | 82 | } |
| 80 | } // Anonymous namespace | ||
| 81 | 83 | ||
| 82 | constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | 84 | constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ |
| 83 | {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, | 85 | {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, |
| @@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | |||
| 85 | {0x0217920100488FF7, &HLE_0217920100488FF7}, | 87 | {0x0217920100488FF7, &HLE_0217920100488FF7}, |
| 86 | }}; | 88 | }}; |
| 87 | 89 | ||
| 90 | class HLEMacroImpl final : public CachedMacro { | ||
| 91 | public: | ||
| 92 | explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) | ||
| 93 | : maxwell3d{maxwell3d_}, func{func_} {} | ||
| 94 | |||
| 95 | void Execute(const std::vector<u32>& parameters, u32 method) override { | ||
| 96 | func(maxwell3d, parameters); | ||
| 97 | } | ||
| 98 | |||
| 99 | private: | ||
| 100 | Engines::Maxwell3D& maxwell3d; | ||
| 101 | HLEFunction func; | ||
| 102 | }; | ||
| 103 | } // Anonymous namespace | ||
| 104 | |||
| 88 | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} | 105 | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} |
| 89 | HLEMacro::~HLEMacro() = default; | 106 | HLEMacro::~HLEMacro() = default; |
| 90 | 107 | ||
| 91 | std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { | 108 | std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { |
| 92 | const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), | 109 | const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), |
| 93 | [hash](const auto& pair) { return pair.first == hash; }); | 110 | [hash](const auto& pair) { return pair.first == hash; }); |
| 94 | if (it == hle_funcs.end()) { | 111 | if (it == hle_funcs.end()) { |
| 95 | return std::nullopt; | 112 | return nullptr; |
| 96 | } | 113 | } |
| 97 | return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); | 114 | return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); |
| 98 | } | 115 | } |
| 99 | 116 | ||
| 100 | HLEMacroImpl::~HLEMacroImpl() = default; | ||
| 101 | |||
| 102 | HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) | ||
| 103 | : maxwell3d{maxwell3d_}, func{func_} {} | ||
| 104 | |||
| 105 | void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { | ||
| 106 | func(maxwell3d, parameters); | ||
| 107 | } | ||
| 108 | |||
| 109 | } // namespace Tegra | 117 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h index cb3bd1600..b86ba84a1 100644 --- a/src/video_core/macro/macro_hle.h +++ b/src/video_core/macro/macro_hle.h | |||
| @@ -5,10 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 11 | #include "video_core/macro/macro.h" | ||
| 12 | 9 | ||
| 13 | namespace Tegra { | 10 | namespace Tegra { |
| 14 | 11 | ||
| @@ -16,29 +13,17 @@ namespace Engines { | |||
| 16 | class Maxwell3D; | 13 | class Maxwell3D; |
| 17 | } | 14 | } |
| 18 | 15 | ||
| 19 | using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); | ||
| 20 | |||
| 21 | class HLEMacro { | 16 | class HLEMacro { |
| 22 | public: | 17 | public: |
| 23 | explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); | 18 | explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); |
| 24 | ~HLEMacro(); | 19 | ~HLEMacro(); |
| 25 | 20 | ||
| 26 | std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; | 21 | // Allocates and returns a cached macro if the hash matches a known function. |
| 27 | 22 | // Returns nullptr otherwise. | |
| 28 | private: | 23 | [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; |
| 29 | Engines::Maxwell3D& maxwell3d; | ||
| 30 | }; | ||
| 31 | |||
| 32 | class HLEMacroImpl : public CachedMacro { | ||
| 33 | public: | ||
| 34 | explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); | ||
| 35 | ~HLEMacroImpl(); | ||
| 36 | |||
| 37 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 38 | 24 | ||
| 39 | private: | 25 | private: |
| 40 | Engines::Maxwell3D& maxwell3d; | 26 | Engines::Maxwell3D& maxwell3d; |
| 41 | HLEFunction func; | ||
| 42 | }; | 27 | }; |
| 43 | 28 | ||
| 44 | } // namespace Tegra | 29 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 8da26fd59..fba755448 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp | |||
| @@ -2,6 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include <optional> | ||
| 7 | |||
| 5 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| @@ -11,16 +14,81 @@ | |||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | 14 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); |
| 12 | 15 | ||
| 13 | namespace Tegra { | 16 | namespace Tegra { |
| 14 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) | 17 | namespace { |
| 15 | : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | 18 | class MacroInterpreterImpl final : public CachedMacro { |
| 19 | public: | ||
| 20 | explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | ||
| 21 | : maxwell3d{maxwell3d_}, code{code_} {} | ||
| 16 | 22 | ||
| 17 | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { | 23 | void Execute(const std::vector<u32>& params, u32 method) override; |
| 18 | return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); | 24 | |
| 19 | } | 25 | private: |
| 26 | /// Resets the execution engine state, zeroing registers, etc. | ||
| 27 | void Reset(); | ||
| 28 | |||
| 29 | /** | ||
| 30 | * Executes a single macro instruction located at the current program counter. Returns whether | ||
| 31 | * the interpreter should keep running. | ||
| 32 | * | ||
| 33 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | ||
| 34 | * previous instruction. | ||
| 35 | */ | ||
| 36 | bool Step(bool is_delay_slot); | ||
| 37 | |||
| 38 | /// Calculates the result of an ALU operation. src_a OP src_b; | ||
| 39 | u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); | ||
| 40 | |||
| 41 | /// Performs the result operation on the input result and stores it in the specified register | ||
| 42 | /// (if necessary). | ||
| 43 | void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); | ||
| 44 | |||
| 45 | /// Evaluates the branch condition and returns whether the branch should be taken or not. | ||
| 46 | bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; | ||
| 47 | |||
| 48 | /// Reads an opcode at the current program counter location. | ||
| 49 | Macro::Opcode GetOpcode() const; | ||
| 50 | |||
| 51 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | ||
| 52 | u32 GetRegister(u32 register_id) const; | ||
| 53 | |||
| 54 | /// Sets the register to the input value. | ||
| 55 | void SetRegister(u32 register_id, u32 value); | ||
| 56 | |||
| 57 | /// Sets the method address to use for the next Send instruction. | ||
| 58 | void SetMethodAddress(u32 address); | ||
| 20 | 59 | ||
| 21 | MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, | 60 | /// Calls a GPU Engine method with the input parameter. |
| 22 | const std::vector<u32>& code_) | 61 | void Send(u32 value); |
| 23 | : maxwell3d{maxwell3d_}, code{code_} {} | 62 | |
| 63 | /// Reads a GPU register located at the method address. | ||
| 64 | u32 Read(u32 method) const; | ||
| 65 | |||
| 66 | /// Returns the next parameter in the parameter queue. | ||
| 67 | u32 FetchParameter(); | ||
| 68 | |||
| 69 | Engines::Maxwell3D& maxwell3d; | ||
| 70 | |||
| 71 | /// Current program counter | ||
| 72 | u32 pc{}; | ||
| 73 | /// Program counter to execute at after the delay slot is executed. | ||
| 74 | std::optional<u32> delayed_pc; | ||
| 75 | |||
| 76 | /// General purpose macro registers. | ||
| 77 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; | ||
| 78 | |||
| 79 | /// Method address to use for the next Send instruction. | ||
| 80 | Macro::MethodAddress method_address = {}; | ||
| 81 | |||
| 82 | /// Input parameters of the current macro. | ||
| 83 | std::unique_ptr<u32[]> parameters; | ||
| 84 | std::size_t num_parameters = 0; | ||
| 85 | std::size_t parameters_capacity = 0; | ||
| 86 | /// Index of the next parameter that will be fetched by the 'parm' instruction. | ||
| 87 | u32 next_parameter_index = 0; | ||
| 88 | |||
| 89 | bool carry_flag = false; | ||
| 90 | const std::vector<u32>& code; | ||
| 91 | }; | ||
| 24 | 92 | ||
| 25 | void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { | 93 | void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { |
| 26 | MICROPROFILE_SCOPE(MacroInterp); | 94 | MICROPROFILE_SCOPE(MacroInterp); |
| @@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() { | |||
| 283 | ASSERT(next_parameter_index < num_parameters); | 351 | ASSERT(next_parameter_index < num_parameters); |
| 284 | return parameters[next_parameter_index++]; | 352 | return parameters[next_parameter_index++]; |
| 285 | } | 353 | } |
| 354 | } // Anonymous namespace | ||
| 355 | |||
| 356 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) | ||
| 357 | : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | ||
| 358 | |||
| 359 | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { | ||
| 360 | return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); | ||
| 361 | } | ||
| 286 | 362 | ||
| 287 | } // namespace Tegra | 363 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index d50c619ce..8a9648e46 100644 --- a/src/video_core/macro/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h | |||
| @@ -3,10 +3,9 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | #include <array> | 6 | |
| 7 | #include <optional> | ||
| 8 | #include <vector> | 7 | #include <vector> |
| 9 | #include "common/bit_field.h" | 8 | |
| 10 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 11 | #include "video_core/macro/macro.h" | 10 | #include "video_core/macro/macro.h" |
| 12 | 11 | ||
| @@ -26,77 +25,4 @@ private: | |||
| 26 | Engines::Maxwell3D& maxwell3d; | 25 | Engines::Maxwell3D& maxwell3d; |
| 27 | }; | 26 | }; |
| 28 | 27 | ||
| 29 | class MacroInterpreterImpl : public CachedMacro { | ||
| 30 | public: | ||
| 31 | explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); | ||
| 32 | void Execute(const std::vector<u32>& params, u32 method) override; | ||
| 33 | |||
| 34 | private: | ||
| 35 | /// Resets the execution engine state, zeroing registers, etc. | ||
| 36 | void Reset(); | ||
| 37 | |||
| 38 | /** | ||
| 39 | * Executes a single macro instruction located at the current program counter. Returns whether | ||
| 40 | * the interpreter should keep running. | ||
| 41 | * | ||
| 42 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | ||
| 43 | * previous instruction. | ||
| 44 | */ | ||
| 45 | bool Step(bool is_delay_slot); | ||
| 46 | |||
| 47 | /// Calculates the result of an ALU operation. src_a OP src_b; | ||
| 48 | u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); | ||
| 49 | |||
| 50 | /// Performs the result operation on the input result and stores it in the specified register | ||
| 51 | /// (if necessary). | ||
| 52 | void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); | ||
| 53 | |||
| 54 | /// Evaluates the branch condition and returns whether the branch should be taken or not. | ||
| 55 | bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; | ||
| 56 | |||
| 57 | /// Reads an opcode at the current program counter location. | ||
| 58 | Macro::Opcode GetOpcode() const; | ||
| 59 | |||
| 60 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | ||
| 61 | u32 GetRegister(u32 register_id) const; | ||
| 62 | |||
| 63 | /// Sets the register to the input value. | ||
| 64 | void SetRegister(u32 register_id, u32 value); | ||
| 65 | |||
| 66 | /// Sets the method address to use for the next Send instruction. | ||
| 67 | void SetMethodAddress(u32 address); | ||
| 68 | |||
| 69 | /// Calls a GPU Engine method with the input parameter. | ||
| 70 | void Send(u32 value); | ||
| 71 | |||
| 72 | /// Reads a GPU register located at the method address. | ||
| 73 | u32 Read(u32 method) const; | ||
| 74 | |||
| 75 | /// Returns the next parameter in the parameter queue. | ||
| 76 | u32 FetchParameter(); | ||
| 77 | |||
| 78 | Engines::Maxwell3D& maxwell3d; | ||
| 79 | |||
| 80 | /// Current program counter | ||
| 81 | u32 pc; | ||
| 82 | /// Program counter to execute at after the delay slot is executed. | ||
| 83 | std::optional<u32> delayed_pc; | ||
| 84 | |||
| 85 | /// General purpose macro registers. | ||
| 86 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; | ||
| 87 | |||
| 88 | /// Method address to use for the next Send instruction. | ||
| 89 | Macro::MethodAddress method_address = {}; | ||
| 90 | |||
| 91 | /// Input parameters of the current macro. | ||
| 92 | std::unique_ptr<u32[]> parameters; | ||
| 93 | std::size_t num_parameters = 0; | ||
| 94 | std::size_t parameters_capacity = 0; | ||
| 95 | /// Index of the next parameter that will be fetched by the 'parm' instruction. | ||
| 96 | u32 next_parameter_index = 0; | ||
| 97 | |||
| 98 | bool carry_flag = false; | ||
| 99 | const std::vector<u32>& code; | ||
| 100 | }; | ||
| 101 | |||
| 102 | } // namespace Tegra | 28 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c6b2b2109..924c9fe5c 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -2,9 +2,17 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include <bitset> | ||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include <xbyak/xbyak.h> | ||
| 10 | |||
| 5 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/bit_field.h" | ||
| 6 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | 14 | #include "common/microprofile.h" |
| 15 | #include "common/x64/xbyak_abi.h" | ||
| 8 | #include "common/x64/xbyak_util.h" | 16 | #include "common/x64/xbyak_util.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/macro/macro_interpreter.h" | 18 | #include "video_core/macro/macro_interpreter.h" |
| @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 | |||
| 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | 22 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); |
| 15 | 23 | ||
| 16 | namespace Tegra { | 24 | namespace Tegra { |
| 25 | namespace { | ||
| 17 | constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; | 26 | constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; |
| 18 | constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; | 27 | constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; |
| 19 | constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; | 28 | constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; |
| 20 | constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | 29 | constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; |
| 21 | constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | 30 | constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; |
| 22 | 31 | ||
| 23 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | 32 | const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ |
| 24 | STATE, | 33 | STATE, |
| 25 | RESULT, | 34 | RESULT, |
| 26 | PARAMETERS, | 35 | PARAMETERS, |
| @@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | |||
| 28 | BRANCH_HOLDER, | 37 | BRANCH_HOLDER, |
| 29 | }); | 38 | }); |
| 30 | 39 | ||
| 31 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | 40 | // Arbitrarily chosen based on current booting games. |
| 32 | : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | 41 | constexpr size_t MAX_CODE_SIZE = 0x10000; |
| 33 | 42 | ||
| 34 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | 43 | std::bitset<32> PersistentCallerSavedRegs() { |
| 35 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | 44 | return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; |
| 36 | } | 45 | } |
| 37 | 46 | ||
| 38 | MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | 47 | class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { |
| 39 | : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | 48 | public: |
| 40 | Compile(); | 49 | explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) |
| 41 | } | 50 | : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { |
| 51 | Compile(); | ||
| 52 | } | ||
| 53 | |||
| 54 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 55 | |||
| 56 | void Compile_ALU(Macro::Opcode opcode); | ||
| 57 | void Compile_AddImmediate(Macro::Opcode opcode); | ||
| 58 | void Compile_ExtractInsert(Macro::Opcode opcode); | ||
| 59 | void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||
| 60 | void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||
| 61 | void Compile_Read(Macro::Opcode opcode); | ||
| 62 | void Compile_Branch(Macro::Opcode opcode); | ||
| 63 | |||
| 64 | private: | ||
| 65 | void Optimizer_ScanFlags(); | ||
| 66 | |||
| 67 | void Compile(); | ||
| 68 | bool Compile_NextInstruction(); | ||
| 69 | |||
| 70 | Xbyak::Reg32 Compile_FetchParameter(); | ||
| 71 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||
| 72 | |||
| 73 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||
| 74 | void Compile_Send(Xbyak::Reg32 value); | ||
| 42 | 75 | ||
| 43 | MacroJITx64Impl::~MacroJITx64Impl() = default; | 76 | Macro::Opcode GetOpCode() const; |
| 77 | |||
| 78 | struct JITState { | ||
| 79 | Engines::Maxwell3D* maxwell3d{}; | ||
| 80 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||
| 81 | u32 carry_flag{}; | ||
| 82 | }; | ||
| 83 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||
| 84 | using ProgramType = void (*)(JITState*, const u32*); | ||
| 85 | |||
| 86 | struct OptimizerState { | ||
| 87 | bool can_skip_carry{}; | ||
| 88 | bool has_delayed_pc{}; | ||
| 89 | bool zero_reg_skip{}; | ||
| 90 | bool skip_dummy_addimmediate{}; | ||
| 91 | bool optimize_for_method_move{}; | ||
| 92 | bool enable_asserts{}; | ||
| 93 | }; | ||
| 94 | OptimizerState optimizer{}; | ||
| 95 | |||
| 96 | std::optional<Macro::Opcode> next_opcode{}; | ||
| 97 | ProgramType program{nullptr}; | ||
| 98 | |||
| 99 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||
| 100 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||
| 101 | Xbyak::Label end_of_code{}; | ||
| 102 | |||
| 103 | bool is_delay_slot{}; | ||
| 104 | u32 pc{}; | ||
| 105 | |||
| 106 | const std::vector<u32>& code; | ||
| 107 | Engines::Maxwell3D& maxwell3d; | ||
| 108 | }; | ||
| 44 | 109 | ||
| 45 | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | 110 | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { |
| 46 | MICROPROFILE_SCOPE(MacroJitExecute); | 111 | MICROPROFILE_SCOPE(MacroJitExecute); |
| @@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | |||
| 307 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | 372 | Compile_ProcessResult(opcode.result_operation, opcode.dst); |
| 308 | } | 373 | } |
| 309 | 374 | ||
| 310 | static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | 375 | void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { |
| 311 | maxwell3d->CallMethodFromMME(method_address.address, value); | 376 | maxwell3d->CallMethodFromMME(method_address.address, value); |
| 312 | } | 377 | } |
| 313 | 378 | ||
| 314 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | 379 | void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
| 315 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | 380 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 316 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | 381 | mov(Common::X64::ABI_PARAM1, qword[STATE]); |
| 317 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | 382 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); |
| @@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | |||
| 338 | L(dont_process); | 403 | L(dont_process); |
| 339 | } | 404 | } |
| 340 | 405 | ||
| 341 | void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | 406 | void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { |
| 342 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | 407 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
| 343 | const s32 jump_address = | 408 | const s32 jump_address = |
| 344 | static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); | 409 | static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); |
| @@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | |||
| 392 | L(end); | 457 | L(end); |
| 393 | } | 458 | } |
| 394 | 459 | ||
| 395 | void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { | 460 | void MacroJITx64Impl::Optimizer_ScanFlags() { |
| 396 | optimizer.can_skip_carry = true; | 461 | optimizer.can_skip_carry = true; |
| 397 | optimizer.has_delayed_pc = false; | 462 | optimizer.has_delayed_pc = false; |
| 398 | for (auto raw_op : code) { | 463 | for (auto raw_op : code) { |
| @@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { | |||
| 534 | return true; | 599 | return true; |
| 535 | } | 600 | } |
| 536 | 601 | ||
| 537 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | 602 | Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { |
| 538 | mov(eax, dword[PARAMETERS]); | 603 | mov(eax, dword[PARAMETERS]); |
| 539 | add(PARAMETERS, sizeof(u32)); | 604 | add(PARAMETERS, sizeof(u32)); |
| 540 | return eax; | 605 | return eax; |
| @@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { | |||
| 611 | ASSERT(pc < code.size()); | 676 | ASSERT(pc < code.size()); |
| 612 | return {code[pc]}; | 677 | return {code[pc]}; |
| 613 | } | 678 | } |
| 679 | } // Anonymous namespace | ||
| 614 | 680 | ||
| 615 | std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { | 681 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) |
| 616 | return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; | 682 | : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} |
| 617 | } | ||
| 618 | 683 | ||
| 684 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||
| 685 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||
| 686 | } | ||
| 619 | } // namespace Tegra | 687 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index d03d480b4..773b037ae 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h | |||
| @@ -4,12 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <xbyak/xbyak.h> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 12 | #include "common/x64/xbyak_abi.h" | ||
| 13 | #include "video_core/macro/macro.h" | 8 | #include "video_core/macro/macro.h" |
| 14 | 9 | ||
| 15 | namespace Tegra { | 10 | namespace Tegra { |
| @@ -18,9 +13,6 @@ namespace Engines { | |||
| 18 | class Maxwell3D; | 13 | class Maxwell3D; |
| 19 | } | 14 | } |
| 20 | 15 | ||
| 21 | /// MAX_CODE_SIZE is arbitrarily chosen based on current booting games | ||
| 22 | constexpr size_t MAX_CODE_SIZE = 0x10000; | ||
| 23 | |||
| 24 | class MacroJITx64 final : public MacroEngine { | 16 | class MacroJITx64 final : public MacroEngine { |
| 25 | public: | 17 | public: |
| 26 | explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); | 18 | explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); |
| @@ -32,67 +24,4 @@ private: | |||
| 32 | Engines::Maxwell3D& maxwell3d; | 24 | Engines::Maxwell3D& maxwell3d; |
| 33 | }; | 25 | }; |
| 34 | 26 | ||
| 35 | class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { | ||
| 36 | public: | ||
| 37 | explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); | ||
| 38 | ~MacroJITx64Impl(); | ||
| 39 | |||
| 40 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 41 | |||
| 42 | void Compile_ALU(Macro::Opcode opcode); | ||
| 43 | void Compile_AddImmediate(Macro::Opcode opcode); | ||
| 44 | void Compile_ExtractInsert(Macro::Opcode opcode); | ||
| 45 | void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||
| 46 | void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||
| 47 | void Compile_Read(Macro::Opcode opcode); | ||
| 48 | void Compile_Branch(Macro::Opcode opcode); | ||
| 49 | |||
| 50 | private: | ||
| 51 | void Optimizer_ScanFlags(); | ||
| 52 | |||
| 53 | void Compile(); | ||
| 54 | bool Compile_NextInstruction(); | ||
| 55 | |||
| 56 | Xbyak::Reg32 Compile_FetchParameter(); | ||
| 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||
| 58 | |||
| 59 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||
| 60 | void Compile_Send(Xbyak::Reg32 value); | ||
| 61 | |||
| 62 | Macro::Opcode GetOpCode() const; | ||
| 63 | std::bitset<32> PersistentCallerSavedRegs() const; | ||
| 64 | |||
| 65 | struct JITState { | ||
| 66 | Engines::Maxwell3D* maxwell3d{}; | ||
| 67 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||
| 68 | u32 carry_flag{}; | ||
| 69 | }; | ||
| 70 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||
| 71 | using ProgramType = void (*)(JITState*, const u32*); | ||
| 72 | |||
| 73 | struct OptimizerState { | ||
| 74 | bool can_skip_carry{}; | ||
| 75 | bool has_delayed_pc{}; | ||
| 76 | bool zero_reg_skip{}; | ||
| 77 | bool skip_dummy_addimmediate{}; | ||
| 78 | bool optimize_for_method_move{}; | ||
| 79 | bool enable_asserts{}; | ||
| 80 | }; | ||
| 81 | OptimizerState optimizer{}; | ||
| 82 | |||
| 83 | std::optional<Macro::Opcode> next_opcode{}; | ||
| 84 | ProgramType program{nullptr}; | ||
| 85 | |||
| 86 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||
| 87 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||
| 88 | Xbyak::Label end_of_code{}; | ||
| 89 | |||
| 90 | bool is_delay_slot{}; | ||
| 91 | u32 pc{}; | ||
| 92 | std::optional<u32> delayed_pc; | ||
| 93 | |||
| 94 | const std::vector<u32>& code; | ||
| 95 | Engines::Maxwell3D& maxwell3d; | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace Tegra | 27 | } // namespace Tegra |