diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 25 | ||||
| -rw-r--r-- | src/video_core/macro_interpreter.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/macro_interpreter.h | 12 |
4 files changed, 55 insertions, 27 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7357d20d1..d79c50919 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | |||
| 43 | // Reset the current macro. | 43 | // Reset the current macro. |
| 44 | executing_macro = 0; | 44 | executing_macro = 0; |
| 45 | 45 | ||
| 46 | // The requested macro must have been uploaded already. | 46 | // Lookup the macro offset |
| 47 | auto macro_code = uploaded_macros.find(method); | 47 | const u32 entry{(method - MacroRegistersStart) >> 1}; |
| 48 | if (macro_code == uploaded_macros.end()) { | 48 | const auto& search{macro_offsets.find(entry)}; |
| 49 | LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); | 49 | if (search == macro_offsets.end()) { |
| 50 | LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); | ||
| 51 | UNREACHABLE(); | ||
| 50 | return; | 52 | return; |
| 51 | } | 53 | } |
| 52 | 54 | ||
| 53 | // Execute the current macro. | 55 | // Execute the current macro. |
| 54 | macro_interpreter.Execute(macro_code->second, std::move(parameters)); | 56 | macro_interpreter.Execute(search->second, std::move(parameters)); |
| 55 | } | 57 | } |
| 56 | 58 | ||
| 57 | void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | 59 | void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { |
| @@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 97 | ProcessMacroUpload(value); | 99 | ProcessMacroUpload(value); |
| 98 | break; | 100 | break; |
| 99 | } | 101 | } |
| 102 | case MAXWELL3D_REG_INDEX(macros.bind): { | ||
| 103 | ProcessMacroBind(value); | ||
| 104 | break; | ||
| 105 | } | ||
| 100 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): | 106 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): |
| 101 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): | 107 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): |
| 102 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): | 108 | case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): |
| @@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { | |||
| 158 | } | 164 | } |
| 159 | 165 | ||
| 160 | void Maxwell3D::ProcessMacroUpload(u32 data) { | 166 | void Maxwell3D::ProcessMacroUpload(u32 data) { |
| 161 | // Store the uploaded macro code to interpret them when they're called. | 167 | ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), |
| 162 | auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; | 168 | "upload_address exceeded macro_memory size!"); |
| 163 | macro.push_back(data); | 169 | macro_memory[regs.macros.upload_address++] = data; |
| 170 | } | ||
| 171 | |||
| 172 | void Maxwell3D::ProcessMacroBind(u32 data) { | ||
| 173 | macro_offsets[regs.macros.entry] = data; | ||
| 164 | } | 174 | } |
| 165 | 175 | ||
| 166 | void Maxwell3D::ProcessQueryGet() { | 176 | void Maxwell3D::ProcessQueryGet() { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 443affc36..50873813e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -475,12 +475,13 @@ public: | |||
| 475 | INSERT_PADDING_WORDS(0x45); | 475 | INSERT_PADDING_WORDS(0x45); |
| 476 | 476 | ||
| 477 | struct { | 477 | struct { |
| 478 | INSERT_PADDING_WORDS(1); | 478 | u32 upload_address; |
| 479 | u32 data; | 479 | u32 data; |
| 480 | u32 entry; | 480 | u32 entry; |
| 481 | u32 bind; | ||
| 481 | } macros; | 482 | } macros; |
| 482 | 483 | ||
| 483 | INSERT_PADDING_WORDS(0x189); | 484 | INSERT_PADDING_WORDS(0x188); |
| 484 | 485 | ||
| 485 | u32 tfb_enabled; | 486 | u32 tfb_enabled; |
| 486 | 487 | ||
| @@ -994,12 +995,25 @@ public: | |||
| 994 | /// Returns the texture information for a specific texture in a specific shader stage. | 995 | /// Returns the texture information for a specific texture in a specific shader stage. |
| 995 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | 996 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; |
| 996 | 997 | ||
| 998 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | ||
| 999 | /// we've seen used. | ||
| 1000 | using MacroMemory = std::array<u32, 0x40000>; | ||
| 1001 | |||
| 1002 | /// Gets a reference to macro memory. | ||
| 1003 | const MacroMemory& GetMacroMemory() const { | ||
| 1004 | return macro_memory; | ||
| 1005 | } | ||
| 1006 | |||
| 997 | private: | 1007 | private: |
| 998 | void InitializeRegisterDefaults(); | 1008 | void InitializeRegisterDefaults(); |
| 999 | 1009 | ||
| 1000 | VideoCore::RasterizerInterface& rasterizer; | 1010 | VideoCore::RasterizerInterface& rasterizer; |
| 1001 | 1011 | ||
| 1002 | std::unordered_map<u32, std::vector<u32>> uploaded_macros; | 1012 | /// Start offsets of each macro in macro_memory |
| 1013 | std::unordered_map<u32, u32> macro_offsets; | ||
| 1014 | |||
| 1015 | /// Memory for macro code | ||
| 1016 | MacroMemory macro_memory; | ||
| 1003 | 1017 | ||
| 1004 | /// Macro method that is currently being executed / being fed parameters. | 1018 | /// Macro method that is currently being executed / being fed parameters. |
| 1005 | u32 executing_macro = 0; | 1019 | u32 executing_macro = 0; |
| @@ -1022,9 +1036,12 @@ private: | |||
| 1022 | */ | 1036 | */ |
| 1023 | void CallMacroMethod(u32 method, std::vector<u32> parameters); | 1037 | void CallMacroMethod(u32 method, std::vector<u32> parameters); |
| 1024 | 1038 | ||
| 1025 | /// Handles writes to the macro uploading registers. | 1039 | /// Handles writes to the macro uploading register. |
| 1026 | void ProcessMacroUpload(u32 data); | 1040 | void ProcessMacroUpload(u32 data); |
| 1027 | 1041 | ||
| 1042 | /// Handles writes to the macro bind register. | ||
| 1043 | void ProcessMacroBind(u32 data); | ||
| 1044 | |||
| 1028 | /// Handles a write to the CLEAR_BUFFERS register. | 1045 | /// Handles a write to the CLEAR_BUFFERS register. |
| 1029 | void ProcessClearBuffers(); | 1046 | void ProcessClearBuffers(); |
| 1030 | 1047 | ||
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index f6af132fb..335a8d407 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -11,7 +11,7 @@ namespace Tegra { | |||
| 11 | 11 | ||
| 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 13 | 13 | ||
| 14 | void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) { | 14 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { |
| 15 | Reset(); | 15 | Reset(); |
| 16 | registers[1] = parameters[0]; | 16 | registers[1] = parameters[0]; |
| 17 | this->parameters = std::move(parameters); | 17 | this->parameters = std::move(parameters); |
| @@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa | |||
| 19 | // Execute the code until we hit an exit condition. | 19 | // Execute the code until we hit an exit condition. |
| 20 | bool keep_executing = true; | 20 | bool keep_executing = true; |
| 21 | while (keep_executing) { | 21 | while (keep_executing) { |
| 22 | keep_executing = Step(code, false); | 22 | keep_executing = Step(offset, false); |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | // Assert the the macro used all the input parameters | 25 | // Assert the the macro used all the input parameters |
| @@ -37,10 +37,10 @@ void MacroInterpreter::Reset() { | |||
| 37 | next_parameter_index = 1; | 37 | next_parameter_index = 1; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | 40 | bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { |
| 41 | u32 base_address = pc; | 41 | u32 base_address = pc; |
| 42 | 42 | ||
| 43 | Opcode opcode = GetOpcode(code); | 43 | Opcode opcode = GetOpcode(offset); |
| 44 | pc += 4; | 44 | pc += 4; |
| 45 | 45 | ||
| 46 | // Update the program counter if we were delayed | 46 | // Update the program counter if we were delayed |
| @@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | |||
| 108 | 108 | ||
| 109 | delayed_pc = base_address + opcode.GetBranchTarget(); | 109 | delayed_pc = base_address + opcode.GetBranchTarget(); |
| 110 | // Execute one more instruction due to the delay slot. | 110 | // Execute one more instruction due to the delay slot. |
| 111 | return Step(code, true); | 111 | return Step(offset, true); |
| 112 | } | 112 | } |
| 113 | break; | 113 | break; |
| 114 | } | 114 | } |
| @@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) { | |||
| 121 | // Exit has a delay slot, execute the next instruction | 121 | // Exit has a delay slot, execute the next instruction |
| 122 | // Note: Executing an exit during a branch delay slot will cause the instruction at the | 122 | // Note: Executing an exit during a branch delay slot will cause the instruction at the |
| 123 | // branch target to be executed before exiting. | 123 | // branch target to be executed before exiting. |
| 124 | Step(code, true); | 124 | Step(offset, true); |
| 125 | return false; | 125 | return false; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | return true; | 128 | return true; |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const { | 131 | MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { |
| 132 | const auto& macro_memory{maxwell3d.GetMacroMemory()}; | ||
| 132 | ASSERT((pc % sizeof(u32)) == 0); | 133 | ASSERT((pc % sizeof(u32)) == 0); |
| 133 | ASSERT(pc < code.size() * sizeof(u32)); | 134 | ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); |
| 134 | return {code[pc / sizeof(u32)]}; | 135 | return {macro_memory[offset + pc / sizeof(u32)]}; |
| 135 | } | 136 | } |
| 136 | 137 | ||
| 137 | u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { | 138 | u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const { |
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 773684bde..62d1ce289 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h | |||
| @@ -22,10 +22,10 @@ public: | |||
| 22 | 22 | ||
| 23 | /** | 23 | /** |
| 24 | * Executes the macro code with the specified input parameters. | 24 | * Executes the macro code with the specified input parameters. |
| 25 | * @param code The macro byte code to execute | 25 | * @param offset Offset to start execution at. |
| 26 | * @param parameters The parameters of the macro | 26 | * @param parameters The parameters of the macro. |
| 27 | */ | 27 | */ |
| 28 | void Execute(const std::vector<u32>& code, std::vector<u32> parameters); | 28 | void Execute(u32 offset, std::vector<u32> parameters); |
| 29 | 29 | ||
| 30 | private: | 30 | private: |
| 31 | enum class Operation : u32 { | 31 | enum class Operation : u32 { |
| @@ -110,11 +110,11 @@ private: | |||
| 110 | /** | 110 | /** |
| 111 | * Executes a single macro instruction located at the current program counter. Returns whether | 111 | * Executes a single macro instruction located at the current program counter. Returns whether |
| 112 | * the interpreter should keep running. | 112 | * the interpreter should keep running. |
| 113 | * @param code The macro code to execute. | 113 | * @param offset Offset to start execution at. |
| 114 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | 114 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a |
| 115 | * previous instruction. | 115 | * previous instruction. |
| 116 | */ | 116 | */ |
| 117 | bool Step(const std::vector<u32>& code, bool is_delay_slot); | 117 | bool Step(u32 offset, bool is_delay_slot); |
| 118 | 118 | ||
| 119 | /// Calculates the result of an ALU operation. src_a OP src_b; | 119 | /// Calculates the result of an ALU operation. src_a OP src_b; |
| 120 | u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; | 120 | u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const; |
| @@ -127,7 +127,7 @@ private: | |||
| 127 | bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; | 127 | bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; |
| 128 | 128 | ||
| 129 | /// Reads an opcode at the current program counter location. | 129 | /// Reads an opcode at the current program counter location. |
| 130 | Opcode GetOpcode(const std::vector<u32>& code) const; | 130 | Opcode GetOpcode(u32 offset) const; |
| 131 | 131 | ||
| 132 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | 132 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. |
| 133 | u32 GetRegister(u32 register_id) const; | 133 | u32 GetRegister(u32 register_id) const; |