diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/x64/xbyak_abi.h | 95 | ||||
| -rw-r--r-- | src/video_core/macro/macro_jit_x64.cpp | 12 |
2 files changed, 35 insertions, 72 deletions
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h index 794da8a52..a5f5d4fc1 100644 --- a/src/common/x64/xbyak_abi.h +++ b/src/common/x64/xbyak_abi.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | namespace Common::X64 { | 12 | namespace Common::X64 { |
| 13 | 13 | ||
| 14 | inline int RegToIndex(const Xbyak::Reg& reg) { | 14 | inline std::size_t RegToIndex(const Xbyak::Reg& reg) { |
| 15 | using Kind = Xbyak::Reg::Kind; | 15 | using Kind = Xbyak::Reg::Kind; |
| 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, |
| 17 | "RegSet only support GPRs and XMM registers."); | 17 | "RegSet only support GPRs and XMM registers."); |
| @@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) { | |||
| 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | inline Xbyak::Reg64 IndexToReg64(int reg_index) { | 22 | inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { |
| 23 | ASSERT(reg_index < 16); | 23 | ASSERT(reg_index < 16); |
| 24 | return Xbyak::Reg64(reg_index); | 24 | return Xbyak::Reg64(static_cast<int>(reg_index)); |
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | inline Xbyak::Xmm IndexToXmm(int reg_index) { | 27 | inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { |
| 28 | ASSERT(reg_index >= 16 && reg_index < 32); | 28 | ASSERT(reg_index >= 16 && reg_index < 32); |
| 29 | return Xbyak::Xmm(reg_index - 16); | 29 | return Xbyak::Xmm(static_cast<int>(reg_index - 16)); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | inline Xbyak::Reg IndexToReg(int reg_index) { | 32 | inline Xbyak::Reg IndexToReg(std::size_t reg_index) { |
| 33 | if (reg_index < 16) { | 33 | if (reg_index < 16) { |
| 34 | return IndexToReg64(reg_index); | 34 | return IndexToReg64(reg_index); |
| 35 | } else { | 35 | } else { |
| @@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0; | |||
| 151 | 151 | ||
| 152 | #endif | 152 | #endif |
| 153 | 153 | ||
| 154 | inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | 154 | struct ABIFrameInfo { |
| 155 | size_t needed_frame_size, s32* out_subtraction, | 155 | s32 subtraction; |
| 156 | s32* out_xmm_offset) { | 156 | s32 xmm_offset; |
| 157 | }; | ||
| 158 | |||
| 159 | inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | ||
| 160 | size_t needed_frame_size) { | ||
| 157 | const auto count = (regs & ABI_ALL_GPRS).count(); | 161 | const auto count = (regs & ABI_ALL_GPRS).count(); |
| 158 | rsp_alignment -= count * 8; | 162 | rsp_alignment -= count * 8; |
| 159 | size_t subtraction = 0; | 163 | size_t subtraction = 0; |
| @@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | |||
| 170 | rsp_alignment -= subtraction; | 174 | rsp_alignment -= subtraction; |
| 171 | subtraction += rsp_alignment & 0xF; | 175 | subtraction += rsp_alignment & 0xF; |
| 172 | 176 | ||
| 173 | *out_subtraction = (s32)subtraction; | 177 | return ABIFrameInfo{static_cast<s32>(subtraction), |
| 174 | *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); | 178 | static_cast<s32>(subtraction - xmm_base_subtraction)}; |
| 175 | } | 179 | } |
| 176 | 180 | ||
| 177 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | 181 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, |
| 178 | size_t rsp_alignment, size_t needed_frame_size = 0) { | 182 | size_t rsp_alignment, size_t needed_frame_size = 0) { |
| 179 | s32 subtraction, xmm_offset; | 183 | auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); |
| 180 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | 184 | |
| 181 | for (std::size_t i = 0; i < regs.size(); ++i) { | 185 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 182 | if (regs[i] && ABI_ALL_GPRS[i]) { | 186 | if (regs[i] && ABI_ALL_GPRS[i]) { |
| 183 | code.push(IndexToReg64(static_cast<int>(i))); | 187 | code.push(IndexToReg64(i)); |
| 184 | } | 188 | } |
| 185 | } | 189 | } |
| 186 | if (subtraction != 0) { | ||
| 187 | code.sub(code.rsp, subtraction); | ||
| 188 | } | ||
| 189 | 190 | ||
| 190 | for (int i = 0; i < regs.count(); i++) { | 191 | if (frame_info.subtraction != 0) { |
| 191 | if (regs.test(i) & ABI_ALL_GPRS.test(i)) { | 192 | code.sub(code.rsp, frame_info.subtraction); |
| 192 | code.push(IndexToReg64(i)); | ||
| 193 | } | ||
| 194 | } | 193 | } |
| 195 | 194 | ||
| 196 | for (std::size_t i = 0; i < regs.size(); ++i) { | 195 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 197 | if (regs[i] && ABI_ALL_XMMS[i]) { | 196 | if (regs[i] && ABI_ALL_XMMS[i]) { |
| 198 | code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); | 197 | code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i)); |
| 199 | xmm_offset += 0x10; | 198 | frame_info.xmm_offset += 0x10; |
| 200 | } | 199 | } |
| 201 | } | 200 | } |
| 202 | 201 | ||
| @@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b | |||
| 205 | 204 | ||
| 206 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | 205 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, |
| 207 | size_t rsp_alignment, size_t needed_frame_size = 0) { | 206 | size_t rsp_alignment, size_t needed_frame_size = 0) { |
| 208 | s32 subtraction, xmm_offset; | 207 | auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); |
| 209 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 210 | 208 | ||
| 211 | for (std::size_t i = 0; i < regs.size(); ++i) { | 209 | for (std::size_t i = 0; i < regs.size(); ++i) { |
| 212 | if (regs[i] && ABI_ALL_XMMS[i]) { | 210 | if (regs[i] && ABI_ALL_XMMS[i]) { |
| 213 | code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); | 211 | code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]); |
| 214 | xmm_offset += 0x10; | 212 | frame_info.xmm_offset += 0x10; |
| 215 | } | 213 | } |
| 216 | } | 214 | } |
| 217 | 215 | ||
| 218 | if (subtraction != 0) { | 216 | if (frame_info.subtraction != 0) { |
| 219 | code.add(code.rsp, subtraction); | 217 | code.add(code.rsp, frame_info.subtraction); |
| 220 | } | 218 | } |
| 221 | 219 | ||
| 222 | // GPRs need to be popped in reverse order | 220 | // GPRs need to be popped in reverse order |
| 223 | for (int i = 15; i >= 0; i--) { | 221 | for (std::size_t j = 0; j < regs.size(); ++j) { |
| 224 | if (regs[i]) { | 222 | const std::size_t i = regs.size() - j - 1; |
| 225 | code.pop(IndexToReg64(i)); | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 231 | size_t rsp_alignment, | ||
| 232 | size_t needed_frame_size = 0) { | ||
| 233 | s32 subtraction, xmm_offset; | ||
| 234 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 235 | |||
| 236 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 237 | if (regs[i] && ABI_ALL_GPRS[i]) { | 223 | if (regs[i] && ABI_ALL_GPRS[i]) { |
| 238 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | if (subtraction != 0) { | ||
| 243 | code.sub(code.rsp, subtraction); | ||
| 244 | } | ||
| 245 | |||
| 246 | return ABI_SHADOW_SPACE; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 250 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 251 | s32 subtraction, xmm_offset; | ||
| 252 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 253 | |||
| 254 | if (subtraction != 0) { | ||
| 255 | code.add(code.rsp, subtraction); | ||
| 256 | } | ||
| 257 | |||
| 258 | // GPRs need to be popped in reverse order | ||
| 259 | for (int i = 15; i >= 0; i--) { | ||
| 260 | if (regs[i]) { | ||
| 261 | code.pop(IndexToReg64(i)); | 224 | code.pop(IndexToReg64(i)); |
| 262 | } | 225 | } |
| 263 | } | 226 | } |
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 2eb98173d..d4a97ec7b 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -295,22 +295,22 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | |||
| 295 | sub(result, opcode.immediate * -1); | 295 | sub(result, opcode.immediate * -1); |
| 296 | } | 296 | } |
| 297 | } | 297 | } |
| 298 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 298 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 299 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | 299 | mov(Common::X64::ABI_PARAM1, qword[STATE]); |
| 300 | mov(Common::X64::ABI_PARAM2, RESULT); | 300 | mov(Common::X64::ABI_PARAM2, RESULT); |
| 301 | Common::X64::CallFarFunction(*this, &Read); | 301 | Common::X64::CallFarFunction(*this, &Read); |
| 302 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 302 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 303 | mov(RESULT, Common::X64::ABI_RETURN.cvt32()); | 303 | mov(RESULT, Common::X64::ABI_RETURN.cvt32()); |
| 304 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | 304 | Compile_ProcessResult(opcode.result_operation, opcode.dst); |
| 305 | } | 305 | } |
| 306 | 306 | ||
| 307 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | 307 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { |
| 308 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 308 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 309 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | 309 | mov(Common::X64::ABI_PARAM1, qword[STATE]); |
| 310 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | 310 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); |
| 311 | mov(Common::X64::ABI_PARAM3, value); | 311 | mov(Common::X64::ABI_PARAM3, value); |
| 312 | Common::X64::CallFarFunction(*this, &Send); | 312 | Common::X64::CallFarFunction(*this, &Send); |
| 313 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | 313 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |
| 314 | 314 | ||
| 315 | Xbyak::Label dont_process{}; | 315 | Xbyak::Label dont_process{}; |
| 316 | // Get increment | 316 | // Get increment |
| @@ -414,7 +414,7 @@ void MacroJITx64Impl::Compile() { | |||
| 414 | bool keep_executing = true; | 414 | bool keep_executing = true; |
| 415 | labels.fill(Xbyak::Label()); | 415 | labels.fill(Xbyak::Label()); |
| 416 | 416 | ||
| 417 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | 417 | Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
| 418 | // JIT state | 418 | // JIT state |
| 419 | mov(STATE, Common::X64::ABI_PARAM1); | 419 | mov(STATE, Common::X64::ABI_PARAM1); |
| 420 | mov(PARAMETERS, Common::X64::ABI_PARAM2); | 420 | mov(PARAMETERS, Common::X64::ABI_PARAM2); |
| @@ -452,7 +452,7 @@ void MacroJITx64Impl::Compile() { | |||
| 452 | 452 | ||
| 453 | L(end_of_code); | 453 | L(end_of_code); |
| 454 | 454 | ||
| 455 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | 455 | Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); |
| 456 | ret(); | 456 | ret(); |
| 457 | ready(); | 457 | ready(); |
| 458 | program = getCode<ProgramType>(); | 458 | program = getCode<ProgramType>(); |