summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp43
-rw-r--r--src/video_core/shader/shader_jit_x64.h3
2 files changed, 18 insertions, 28 deletions
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c7b63a9b7..d6011832c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14;
122/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR 122/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
123static const X64Reg NEGBIT = XMM15; 123static const X64Reg NEGBIT = XMM15;
124 124
125// State registers that must not be modified by external functions calls
126// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
127static const BitSet32 persistent_regs = {
128 UNIFORMS, REGISTERS, // Pointers to register blocks
129 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
130 ONE+16, NEGBIT+16, // Constants
131};
132
125/// Raw constant for the source register selector that indicates no swizzling is performed 133/// Raw constant for the source register selector that indicates no swizzling is performed
126static const u8 NO_SRC_REG_SWIZZLE = 0x1b; 134static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
127/// Raw constant for the destination register enable mask that indicates all components are enabled 135/// Raw constant for the destination register enable mask that indicates all components are enabled
@@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
295 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 303 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
296} 304}
297 305
298void JitCompiler::Compile_PushCallerSavedXMM() { 306BitSet32 JitCompiler::PersistentCallerSavedRegs() {
299#ifndef _WIN32 307 return persistent_regs & ABI_ALL_CALLER_SAVED;
300 SUB(64, R(RSP), Imm8(2 * 16));
301 MOVUPS(MDisp(RSP, 16), ONE);
302 MOVUPS(MDisp(RSP, 0), NEGBIT);
303#endif
304}
305
306void JitCompiler::Compile_PopCallerSavedXMM() {
307#ifndef _WIN32
308 MOVUPS(NEGBIT, MDisp(RSP, 0));
309 MOVUPS(ONE, MDisp(RSP, 16));
310 ADD(64, R(RSP), Imm8(2 * 16));
311#endif
312} 308}
313 309
314void JitCompiler::Compile_ADD(Instruction instr) { 310void JitCompiler::Compile_ADD(Instruction instr) {
@@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) {
390 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 386 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
391 MOVSS(XMM0, R(SRC1)); 387 MOVSS(XMM0, R(SRC1));
392 388
393 // The following will actually break the stack alignment 389 ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
394 ABI_PushAllCallerSavedRegsAndAdjustStack();
395 Compile_PushCallerSavedXMM();
396 ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); 390 ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
397 Compile_PopCallerSavedXMM(); 391 ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
398 ABI_PopAllCallerSavedRegsAndAdjustStack();
399 392
400 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); 393 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
401 MOVAPS(SRC1, R(XMM0)); 394 MOVAPS(SRC1, R(XMM0));
@@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) {
406 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 399 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
407 MOVSS(XMM0, R(SRC1)); 400 MOVSS(XMM0, R(SRC1));
408 401
409 // The following will actually break the stack alignment 402 ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
410 ABI_PushAllCallerSavedRegsAndAdjustStack();
411 Compile_PushCallerSavedXMM();
412 ABI_CallFunction(reinterpret_cast<const void*>(log2f)); 403 ABI_CallFunction(reinterpret_cast<const void*>(log2f));
413 Compile_PopCallerSavedXMM(); 404 ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
414 ABI_PopAllCallerSavedRegsAndAdjustStack();
415 405
416 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); 406 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
417 MOVAPS(SRC1, R(XMM0)); 407 MOVAPS(SRC1, R(XMM0));
@@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) {
560} 550}
561 551
562void JitCompiler::Compile_END(Instruction instr) { 552void JitCompiler::Compile_END(Instruction instr) {
563 ABI_PopAllCalleeSavedRegsAndAdjustStack(); 553 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
564 RET(); 554 RET();
565} 555}
566 556
@@ -755,7 +745,8 @@ CompiledShader* JitCompiler::Compile() {
755 const auto& code = g_state.vs.program_code; 745 const auto& code = g_state.vs.program_code;
756 unsigned offset = g_state.regs.vs.main_offset; 746 unsigned offset = g_state.regs.vs.main_offset;
757 747
758 ABI_PushAllCalleeSavedRegsAndAdjustStack(); 748 // The stack pointer is 8 modulo 16 at the entry of a procedure
749 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
759 750
760 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); 751 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
761 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 752 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 58828ecc8..8668cfff4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -77,8 +77,7 @@ private:
77 void Compile_EvaluateCondition(Instruction instr); 77 void Compile_EvaluateCondition(Instruction instr);
78 void Compile_UniformCondition(Instruction instr); 78 void Compile_UniformCondition(Instruction instr);
79 79
80 void Compile_PushCallerSavedXMM(); 80 BitSet32 PersistentCallerSavedRegs();
81 void Compile_PopCallerSavedXMM();
82 81
83 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. 82 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
84 unsigned* offset_ptr = nullptr; 83 unsigned* offset_ptr = nullptr;