diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 3 |
2 files changed, 18 insertions, 28 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index c7b63a9b7..d6011832c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14; | |||
| 122 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR | 122 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR |
| 123 | static const X64Reg NEGBIT = XMM15; | 123 | static const X64Reg NEGBIT = XMM15; |
| 124 | 124 | ||
| 125 | // State registers that must not be modified by external functions calls | ||
| 126 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | ||
| 127 | static const BitSet32 persistent_regs = { | ||
| 128 | UNIFORMS, REGISTERS, // Pointers to register blocks | ||
| 129 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers | ||
| 130 | ONE+16, NEGBIT+16, // Constants | ||
| 131 | }; | ||
| 132 | |||
| 125 | /// Raw constant for the source register selector that indicates no swizzling is performed | 133 | /// Raw constant for the source register selector that indicates no swizzling is performed |
| 126 | static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | 134 | static const u8 NO_SRC_REG_SWIZZLE = 0x1b; |
| 127 | /// Raw constant for the destination register enable mask that indicates all components are enabled | 135 | /// Raw constant for the destination register enable mask that indicates all components are enabled |
| @@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) { | |||
| 295 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 303 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); |
| 296 | } | 304 | } |
| 297 | 305 | ||
| 298 | void JitCompiler::Compile_PushCallerSavedXMM() { | 306 | BitSet32 JitCompiler::PersistentCallerSavedRegs() { |
| 299 | #ifndef _WIN32 | 307 | return persistent_regs & ABI_ALL_CALLER_SAVED; |
| 300 | SUB(64, R(RSP), Imm8(2 * 16)); | ||
| 301 | MOVUPS(MDisp(RSP, 16), ONE); | ||
| 302 | MOVUPS(MDisp(RSP, 0), NEGBIT); | ||
| 303 | #endif | ||
| 304 | } | ||
| 305 | |||
| 306 | void JitCompiler::Compile_PopCallerSavedXMM() { | ||
| 307 | #ifndef _WIN32 | ||
| 308 | MOVUPS(NEGBIT, MDisp(RSP, 0)); | ||
| 309 | MOVUPS(ONE, MDisp(RSP, 16)); | ||
| 310 | ADD(64, R(RSP), Imm8(2 * 16)); | ||
| 311 | #endif | ||
| 312 | } | 308 | } |
| 313 | 309 | ||
| 314 | void JitCompiler::Compile_ADD(Instruction instr) { | 310 | void JitCompiler::Compile_ADD(Instruction instr) { |
| @@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) { | |||
| 390 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 386 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 391 | MOVSS(XMM0, R(SRC1)); | 387 | MOVSS(XMM0, R(SRC1)); |
| 392 | 388 | ||
| 393 | // The following will actually break the stack alignment | 389 | ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 394 | ABI_PushAllCallerSavedRegsAndAdjustStack(); | ||
| 395 | Compile_PushCallerSavedXMM(); | ||
| 396 | ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); | 390 | ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); |
| 397 | Compile_PopCallerSavedXMM(); | 391 | ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 398 | ABI_PopAllCallerSavedRegsAndAdjustStack(); | ||
| 399 | 392 | ||
| 400 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); | 393 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); |
| 401 | MOVAPS(SRC1, R(XMM0)); | 394 | MOVAPS(SRC1, R(XMM0)); |
| @@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) { | |||
| 406 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 399 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 407 | MOVSS(XMM0, R(SRC1)); | 400 | MOVSS(XMM0, R(SRC1)); |
| 408 | 401 | ||
| 409 | // The following will actually break the stack alignment | 402 | ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 410 | ABI_PushAllCallerSavedRegsAndAdjustStack(); | ||
| 411 | Compile_PushCallerSavedXMM(); | ||
| 412 | ABI_CallFunction(reinterpret_cast<const void*>(log2f)); | 403 | ABI_CallFunction(reinterpret_cast<const void*>(log2f)); |
| 413 | Compile_PopCallerSavedXMM(); | 404 | ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 414 | ABI_PopAllCallerSavedRegsAndAdjustStack(); | ||
| 415 | 405 | ||
| 416 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); | 406 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); |
| 417 | MOVAPS(SRC1, R(XMM0)); | 407 | MOVAPS(SRC1, R(XMM0)); |
| @@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) { | |||
| 560 | } | 550 | } |
| 561 | 551 | ||
| 562 | void JitCompiler::Compile_END(Instruction instr) { | 552 | void JitCompiler::Compile_END(Instruction instr) { |
| 563 | ABI_PopAllCalleeSavedRegsAndAdjustStack(); | 553 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| 564 | RET(); | 554 | RET(); |
| 565 | } | 555 | } |
| 566 | 556 | ||
| @@ -755,7 +745,8 @@ CompiledShader* JitCompiler::Compile() { | |||
| 755 | const auto& code = g_state.vs.program_code; | 745 | const auto& code = g_state.vs.program_code; |
| 756 | unsigned offset = g_state.regs.vs.main_offset; | 746 | unsigned offset = g_state.regs.vs.main_offset; |
| 757 | 747 | ||
| 758 | ABI_PushAllCalleeSavedRegsAndAdjustStack(); | 748 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 749 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | ||
| 759 | 750 | ||
| 760 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); | 751 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); |
| 761 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | 752 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 58828ecc8..8668cfff4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -77,8 +77,7 @@ private: | |||
| 77 | void Compile_EvaluateCondition(Instruction instr); | 77 | void Compile_EvaluateCondition(Instruction instr); |
| 78 | void Compile_UniformCondition(Instruction instr); | 78 | void Compile_UniformCondition(Instruction instr); |
| 79 | 79 | ||
| 80 | void Compile_PushCallerSavedXMM(); | 80 | BitSet32 PersistentCallerSavedRegs(); |
| 81 | void Compile_PopCallerSavedXMM(); | ||
| 82 | 81 | ||
| 83 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. | 82 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. |
| 84 | unsigned* offset_ptr = nullptr; | 83 | unsigned* offset_ptr = nullptr; |