summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar aroulin2015-08-26 09:12:14 +0200
committerGravatar aroulin2015-09-01 23:39:52 +0200
commit179ad35c2e6dff0c367dedb63c47a78c6cd052a5 (patch)
tree8e60274a443cdd8e651ab768c45f0f7a1e6ecbf2 /src
parentCommon: Import BitSet from Dolphin (diff)
downloadyuzu-179ad35c2e6dff0c367dedb63c47a78c6cd052a5.tar.gz
yuzu-179ad35c2e6dff0c367dedb63c47a78c6cd052a5.tar.xz
yuzu-179ad35c2e6dff0c367dedb63c47a78c6cd052a5.zip
x64: Proper stack alignment in shader JIT function calls
Import Dolphin stack handling and register saving routines Also removes the x86 parts from abi files
Diffstat (limited to 'src')
-rw-r--r--src/common/x64/abi.cpp411
-rw-r--r--src/common/x64/abi.h61
-rw-r--r--src/common/x64/emitter.h42
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp43
-rw-r--r--src/video_core/shader/shader_jit_x64.h3
5 files changed, 108 insertions, 452 deletions
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp
index 4c07a6ebe..955eb86ce 100644
--- a/src/common/x64/abi.cpp
+++ b/src/common/x64/abi.cpp
@@ -22,247 +22,69 @@ using namespace Gen;
22 22
23// Shared code between Win64 and Unix64 23// Shared code between Win64 and Unix64
24 24
25// Sets up a __cdecl function. 25void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
26void XEmitter::ABI_EmitPrologue(int maxCallParams) 26 size_t shadow = 0;
27{ 27#if defined(_WIN32)
28#ifdef _M_IX86 28 shadow = 0x20;
29 // Don't really need to do anything
30#elif defined(ARCHITECTURE_x86_64)
31#if _WIN32
32 int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8;
33 // Set up a stack frame so that we can call functions
34 // TODO: use maxCallParams
35 SUB(64, R(RSP), Imm8(stacksize));
36#endif
37#else
38#error Arch not supported
39#endif 29#endif
40}
41
42void XEmitter::ABI_EmitEpilogue(int maxCallParams)
43{
44#ifdef _M_IX86
45 RET();
46#elif defined(ARCHITECTURE_x86_64)
47#ifdef _WIN32
48 int stacksize = ((maxCallParams+1)&~1)*8 + 8;
49 ADD(64, R(RSP), Imm8(stacksize));
50#endif
51 RET();
52#else
53#error Arch not supported
54
55
56#endif
57}
58
59#ifdef _M_IX86 // All32
60
61// Shared code between Win32 and Unix32
62void XEmitter::ABI_CallFunction(const void *func) {
63 ABI_AlignStack(0);
64 CALL(func);
65 ABI_RestoreStack(0);
66}
67
68void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
69 ABI_AlignStack(1 * 2);
70 PUSH(16, Imm16(param1));
71 CALL(func);
72 ABI_RestoreStack(1 * 2);
73}
74
75void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
76 ABI_AlignStack(1 * 2 + 1 * 4);
77 PUSH(16, Imm16(param2));
78 PUSH(32, Imm32(param1));
79 CALL(func);
80 ABI_RestoreStack(1 * 2 + 1 * 4);
81}
82
83void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
84 ABI_AlignStack(1 * 4);
85 PUSH(32, Imm32(param1));
86 CALL(func);
87 ABI_RestoreStack(1 * 4);
88}
89
90void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
91 ABI_AlignStack(2 * 4);
92 PUSH(32, Imm32(param2));
93 PUSH(32, Imm32(param1));
94 CALL(func);
95 ABI_RestoreStack(2 * 4);
96}
97
98void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
99 ABI_AlignStack(3 * 4);
100 PUSH(32, Imm32(param3));
101 PUSH(32, Imm32(param2));
102 PUSH(32, Imm32(param1));
103 CALL(func);
104 ABI_RestoreStack(3 * 4);
105}
106
107void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
108 ABI_AlignStack(3 * 4);
109 PUSH(32, ImmPtr(param3));
110 PUSH(32, Imm32(param2));
111 PUSH(32, Imm32(param1));
112 CALL(func);
113 ABI_RestoreStack(3 * 4);
114}
115
116void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) {
117 ABI_AlignStack(4 * 4);
118 PUSH(32, ImmPtr(param4));
119 PUSH(32, Imm32(param3));
120 PUSH(32, Imm32(param2));
121 PUSH(32, Imm32(param1));
122 CALL(func);
123 ABI_RestoreStack(4 * 4);
124}
125
126void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
127 ABI_AlignStack(1 * 4);
128 PUSH(32, ImmPtr(param1));
129 CALL(func);
130 ABI_RestoreStack(1 * 4);
131}
132
133void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
134 ABI_AlignStack(2 * 4);
135 PUSH(32, arg2);
136 PUSH(32, ImmPtr(param1));
137 CALL(func);
138 ABI_RestoreStack(2 * 4);
139}
140
141void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
142 ABI_AlignStack(3 * 4);
143 PUSH(32, arg3);
144 PUSH(32, arg2);
145 PUSH(32, ImmPtr(param1));
146 CALL(func);
147 ABI_RestoreStack(3 * 4);
148}
149
150void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
151 ABI_AlignStack(3 * 4);
152 PUSH(32, Imm32(param3));
153 PUSH(32, ImmPtr(param2));
154 PUSH(32, ImmPtr(param1));
155 CALL(func);
156 ABI_RestoreStack(3 * 4);
157}
158
159// Pass a register as a parameter.
160void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
161 ABI_AlignStack(1 * 4);
162 PUSH(32, R(reg1));
163 CALL(func);
164 ABI_RestoreStack(1 * 4);
165}
166
167// Pass two registers as parameters.
168void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
169{
170 ABI_AlignStack(2 * 4);
171 PUSH(32, R(reg2));
172 PUSH(32, R(reg1));
173 CALL(func);
174 ABI_RestoreStack(2 * 4);
175}
176 30
177void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) 31 int count = (mask & ABI_ALL_GPRS).Count();
178{ 32 rsp_alignment -= count * 8;
179 ABI_AlignStack(2 * 4); 33 size_t subtraction = 0;
180 PUSH(32, Imm32(param2)); 34 int fpr_count = (mask & ABI_ALL_FPRS).Count();
181 PUSH(32, arg1); 35 if (fpr_count) {
182 CALL(func); 36 // If we have any XMMs to save, we must align the stack here.
183 ABI_RestoreStack(2 * 4); 37 subtraction = rsp_alignment & 0xf;
184} 38 }
39 subtraction += 16 * fpr_count;
40 size_t xmm_base_subtraction = subtraction;
41 subtraction += needed_frame_size;
42 subtraction += shadow;
43 // Final alignment.
44 rsp_alignment -= subtraction;
45 subtraction += rsp_alignment & 0xf;
185 46
186void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) 47 *shadowp = shadow;
187{ 48 *subtractionp = subtraction;
188 ABI_AlignStack(3 * 4); 49 *xmm_offsetp = subtraction - xmm_base_subtraction;
189 PUSH(32, Imm32(param3));
190 PUSH(32, Imm32(param2));
191 PUSH(32, arg1);
192 CALL(func);
193 ABI_RestoreStack(3 * 4);
194} 50}
195 51
196void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) 52size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
197{ 53 size_t shadow, subtraction, xmm_offset;
198 ABI_AlignStack(1 * 4); 54 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
199 PUSH(32, arg1);
200 CALL(func);
201 ABI_RestoreStack(1 * 4);
202}
203 55
204void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) 56 for (int r : mask & ABI_ALL_GPRS)
205{ 57 PUSH((X64Reg)r);
206 ABI_AlignStack(2 * 4);
207 PUSH(32, arg2);
208 PUSH(32, arg1);
209 CALL(func);
210 ABI_RestoreStack(2 * 4);
211}
212 58
213void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { 59 if (subtraction)
214 // Note: 4 * 4 = 16 bytes, so alignment is preserved. 60 SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
215 PUSH(EBP);
216 PUSH(EBX);
217 PUSH(ESI);
218 PUSH(EDI);
219}
220 61
221void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { 62 for (int x : mask & ABI_ALL_FPRS) {
222 POP(EDI); 63 MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
223 POP(ESI); 64 xmm_offset += 16;
224 POP(EBX); 65 }
225 POP(EBP);
226}
227 66
228unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { 67 return shadow;
229 frameSize += 4; // reserve space for return address
230 unsigned int alignedSize =
231#ifdef __GNUC__
232 (frameSize + 15) & -16;
233#else
234 (frameSize + 3) & -4;
235#endif
236 return alignedSize;
237} 68}
238 69
70void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
71 size_t shadow, subtraction, xmm_offset;
72 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
239 73
240void XEmitter::ABI_AlignStack(unsigned int frameSize) { 74 for (int x : mask & ABI_ALL_FPRS) {
241// Mac OS X requires the stack to be 16-byte aligned before every call. 75 MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
242// Linux requires the stack to be 16-byte aligned before calls that put SSE 76 xmm_offset += 16;
243// vectors on the stack, but since we do not keep track of which calls do that,
244// it is effectively every call as well.
245// Windows binaries compiled with MSVC do not have such a restriction*, but I
246// expect that GCC on Windows acts the same as GCC on Linux in this respect.
247// It would be nice if someone could verify this.
248// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times.
249 unsigned int fillSize =
250 ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4);
251 if (fillSize != 0) {
252 SUB(32, R(ESP), Imm8(fillSize));
253 } 77 }
254}
255 78
256void XEmitter::ABI_RestoreStack(unsigned int frameSize) { 79 if (subtraction)
257 unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize); 80 ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
258 alignedSize -= 4; // return address is POPped at end of call 81
259 if (alignedSize != 0) { 82 for (int r = 15; r >= 0; r--) {
260 ADD(32, R(ESP), Imm8(alignedSize)); 83 if (mask[r])
84 POP((X64Reg)r);
261 } 85 }
262} 86}
263 87
264#else //64bit
265
266// Common functions 88// Common functions
267void XEmitter::ABI_CallFunction(const void *func) { 89void XEmitter::ABI_CallFunction(const void *func) {
268 u64 distance = u64(func) - (u64(code) + 5); 90 u64 distance = u64(func) - (u64(code) + 5);
@@ -538,143 +360,4 @@ void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, cons
538 } else { 360 } else {
539 CALL(func); 361 CALL(func);
540 } 362 }
541} 363} \ No newline at end of file
542
543unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
544 return frameSize;
545}
546
547#ifdef _WIN32
548
549// The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs.
550// But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs.
551// Let's just save all 16.
552const int XMM_STACK_SPACE = 16 * 16;
553
554// Win64 Specific Code
555void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
556 //we only want to do this once
557 PUSH(RBX);
558 PUSH(RSI);
559 PUSH(RDI);
560 PUSH(RBP);
561 PUSH(R12);
562 PUSH(R13);
563 PUSH(R14);
564 PUSH(R15);
565 ABI_AlignStack(0);
566
567 // Do this after aligning, because before it's offset by 8.
568 SUB(64, R(RSP), Imm32(XMM_STACK_SPACE));
569 for (int i = 0; i < 16; ++i)
570 MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i));
571}
572
573void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
574 for (int i = 0; i < 16; ++i)
575 MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16));
576 ADD(64, R(RSP), Imm32(XMM_STACK_SPACE));
577
578 ABI_RestoreStack(0);
579 POP(R15);
580 POP(R14);
581 POP(R13);
582 POP(R12);
583 POP(RBP);
584 POP(RDI);
585 POP(RSI);
586 POP(RBX);
587}
588
589// Win64 Specific Code
590void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
591 PUSH(RCX);
592 PUSH(RDX);
593 PUSH(RSI);
594 PUSH(RDI);
595 PUSH(R8);
596 PUSH(R9);
597 PUSH(R10);
598 PUSH(R11);
599 // TODO: Callers preserve XMM4-5 (XMM0-3 are args.)
600 ABI_AlignStack(0);
601}
602
603void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
604 ABI_RestoreStack(0);
605 POP(R11);
606 POP(R10);
607 POP(R9);
608 POP(R8);
609 POP(RDI);
610 POP(RSI);
611 POP(RDX);
612 POP(RCX);
613}
614
615void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
616 SUB(64, R(RSP), Imm8(0x28));
617}
618
619void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
620 ADD(64, R(RSP), Imm8(0x28));
621}
622
623#else
624// Unix64 Specific Code
625void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
626 PUSH(RBX);
627 PUSH(RBP);
628 PUSH(R12);
629 PUSH(R13);
630 PUSH(R14);
631 PUSH(R15);
632 PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
633 // TODO: XMM?
634}
635
636void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
637 POP(R15);
638 POP(R15);
639 POP(R14);
640 POP(R13);
641 POP(R12);
642 POP(RBP);
643 POP(RBX);
644}
645
646void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
647 PUSH(RCX);
648 PUSH(RDX);
649 PUSH(RSI);
650 PUSH(RDI);
651 PUSH(R8);
652 PUSH(R9);
653 PUSH(R10);
654 PUSH(R11);
655 PUSH(R11);
656}
657
658void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
659 POP(R11);
660 POP(R11);
661 POP(R10);
662 POP(R9);
663 POP(R8);
664 POP(RDI);
665 POP(RSI);
666 POP(RDX);
667 POP(RCX);
668}
669
670void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
671 SUB(64, R(RSP), Imm8(0x08));
672}
673
674void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
675 ADD(64, R(RSP), Imm8(0x08));
676}
677
678#endif // WIN32
679
680#endif // 32bit
diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h
index 7e9c156ae..de6d62fdd 100644
--- a/src/common/x64/abi.h
+++ b/src/common/x64/abi.h
@@ -1,35 +1,15 @@
1// Copyright (C) 2003 Dolphin Project. 1// Copyright 2008 Dolphin Emulator Project
2 2// Licensed under GPLv2+
3// This program is free software: you can redistribute it and/or modify 3// Refer to the license.txt file included.
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17 4
18#pragma once 5#pragma once
19 6
20#include "common/common_types.h" 7#include "common/bit_set.h"
8#include "emitter.h"
21 9
22// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code. 10// x64 ABI:s, and helpers to help follow them when JIT-ing code.
23// All convensions return values in EAX (+ possibly EDX). 11// All convensions return values in EAX (+ possibly EDX).
24 12
25// Linux 32-bit, Windows 32-bit (cdecl, System V):
26// * Caller pushes left to right
27// * Caller fixes stack after call
28// * function subtract from stack for local storage only.
29// Scratch: EAX ECX EDX
30// Callee-save: EBX ESI EDI EBP
31// Parameters: -
32
33// Windows 64-bit 13// Windows 64-bit
34// * 4-reg "fastcall" variant, very new-skool stack handling 14// * 4-reg "fastcall" variant, very new-skool stack handling
35// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ 15// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
@@ -44,18 +24,8 @@
44// Callee-save: RBX RBP R12 R13 R14 R15 24// Callee-save: RBX RBP R12 R13 R14 R15
45// Parameters: RDI RSI RDX RCX R8 R9 25// Parameters: RDI RSI RDX RCX R8 R9
46 26
47#ifdef _M_IX86 // 32 bit calling convention, shared by all 27#define ABI_ALL_FPRS BitSet32(0xffff0000)
48 28#define ABI_ALL_GPRS BitSet32(0x0000ffff)
49// 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to
50// choose regs to put stuff in.
51#define ABI_PARAM1 RCX
52#define ABI_PARAM2 RDX
53
54// There are no ABI_PARAM* here, since args are pushed.
55// 32-bit bog standard cdecl, shared between linux and windows
56// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about.
57
58#elif ARCHITECTURE_x86_64 // 64 bit calling convention
59 29
60#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention 30#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
61 31
@@ -64,7 +34,11 @@
64#define ABI_PARAM3 R8 34#define ABI_PARAM3 R8
65#define ABI_PARAM4 R9 35#define ABI_PARAM4 R9
66 36
67#else //64-bit Unix (hopefully MacOSX too) 37// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
38#define ABI_ALL_CALLER_SAVED \
39 (BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
40 XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
41#else //64-bit Unix / OS X
68 42
69#define ABI_PARAM1 RDI 43#define ABI_PARAM1 RDI
70#define ABI_PARAM2 RSI 44#define ABI_PARAM2 RSI
@@ -73,6 +47,13 @@
73#define ABI_PARAM5 R8 47#define ABI_PARAM5 R8
74#define ABI_PARAM6 R9 48#define ABI_PARAM6 R9
75 49
50// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
51// don't actually clobber them.
52#define ABI_ALL_CALLER_SAVED \
53 (BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
54 ABI_ALL_FPRS)
76#endif // WIN32 55#endif // WIN32
77 56
78#endif // X86 57#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
58
59#define ABI_RETURN RAX
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index a49cd2cf1..2dd0dc94e 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -18,6 +18,7 @@
18#pragma once 18#pragma once
19 19
20#include "common/assert.h" 20#include "common/assert.h"
21#include "common/bit_set.h"
21#include "common/common_types.h" 22#include "common/common_types.h"
22#include "common/code_block.h" 23#include "common/code_block.h"
23 24
@@ -356,7 +357,7 @@ private:
356 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); 357 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
357 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); 358 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
358 359
359 void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); 360 void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
360 361
361protected: 362protected:
362 void Write8(u8 value); 363 void Write8(u8 value);
@@ -1007,25 +1008,26 @@ public:
1007 ABI_CallFunctionC((const void*)func, param1); 1008 ABI_CallFunctionC((const void*)func, param1);
1008 } 1009 }
1009 1010
1010 // A function that doesn't have any control over what it will do to regs, 1011 /**
1011 // such as the dispatcher, should be surrounded by these. 1012 * Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI
1012 void ABI_PushAllCalleeSavedRegsAndAdjustStack(); 1013 *
1013 void ABI_PopAllCalleeSavedRegsAndAdjustStack(); 1014 * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
1014 1015 * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
1015 // A function that doesn't know anything about it's surroundings, should 1016 * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack
1016 // be surrounded by these to establish a safe environment, where it can roam free. 1017 * @return Size of the shadow space, i.e., offset of the frame
1017 // An example is a backpatch injected function. 1018 */
1018 void ABI_PushAllCallerSavedRegsAndAdjustStack(); 1019 size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
1019 void ABI_PopAllCallerSavedRegsAndAdjustStack(); 1020
1020 1021 /**
1021 unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); 1022 * Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before
1022 void ABI_AlignStack(unsigned int frameSize); 1023 * the matching PushRegistersAndAdjustStack.
1023 void ABI_RestoreStack(unsigned int frameSize); 1024 *
1024 1025 * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs)
1025 // Sets up a __cdecl function. 1026 * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8
1026 // Only x64 really needs the parameter count. 1027 * @param needed_frame_size Additional space that was needed
1027 void ABI_EmitPrologue(int maxCallParams); 1028 * @warning Stack must be currently 16-byte aligned
1028 void ABI_EmitEpilogue(int maxCallParams); 1029 */
1030 void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0);
1029 1031
1030 #ifdef _M_IX86 1032 #ifdef _M_IX86
1031 static int ABI_GetNumXMMRegs() { return 8; } 1033 static int ABI_GetNumXMMRegs() { return 8; }
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c7b63a9b7..d6011832c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14;
122/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR 122/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
123static const X64Reg NEGBIT = XMM15; 123static const X64Reg NEGBIT = XMM15;
124 124
125// State registers that must not be modified by external functions calls
126// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
127static const BitSet32 persistent_regs = {
128 UNIFORMS, REGISTERS, // Pointers to register blocks
129 ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
130 ONE+16, NEGBIT+16, // Constants
131};
132
125/// Raw constant for the source register selector that indicates no swizzling is performed 133/// Raw constant for the source register selector that indicates no swizzling is performed
126static const u8 NO_SRC_REG_SWIZZLE = 0x1b; 134static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
127/// Raw constant for the destination register enable mask that indicates all components are enabled 135/// Raw constant for the destination register enable mask that indicates all components are enabled
@@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
295 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 303 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
296} 304}
297 305
298void JitCompiler::Compile_PushCallerSavedXMM() { 306BitSet32 JitCompiler::PersistentCallerSavedRegs() {
299#ifndef _WIN32 307 return persistent_regs & ABI_ALL_CALLER_SAVED;
300 SUB(64, R(RSP), Imm8(2 * 16));
301 MOVUPS(MDisp(RSP, 16), ONE);
302 MOVUPS(MDisp(RSP, 0), NEGBIT);
303#endif
304}
305
306void JitCompiler::Compile_PopCallerSavedXMM() {
307#ifndef _WIN32
308 MOVUPS(NEGBIT, MDisp(RSP, 0));
309 MOVUPS(ONE, MDisp(RSP, 16));
310 ADD(64, R(RSP), Imm8(2 * 16));
311#endif
312} 308}
313 309
314void JitCompiler::Compile_ADD(Instruction instr) { 310void JitCompiler::Compile_ADD(Instruction instr) {
@@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) {
390 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 386 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
391 MOVSS(XMM0, R(SRC1)); 387 MOVSS(XMM0, R(SRC1));
392 388
393 // The following will actually break the stack alignment 389 ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
394 ABI_PushAllCallerSavedRegsAndAdjustStack();
395 Compile_PushCallerSavedXMM();
396 ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); 390 ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
397 Compile_PopCallerSavedXMM(); 391 ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
398 ABI_PopAllCallerSavedRegsAndAdjustStack();
399 392
400 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); 393 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
401 MOVAPS(SRC1, R(XMM0)); 394 MOVAPS(SRC1, R(XMM0));
@@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) {
406 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 399 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
407 MOVSS(XMM0, R(SRC1)); 400 MOVSS(XMM0, R(SRC1));
408 401
409 // The following will actually break the stack alignment 402 ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
410 ABI_PushAllCallerSavedRegsAndAdjustStack();
411 Compile_PushCallerSavedXMM();
412 ABI_CallFunction(reinterpret_cast<const void*>(log2f)); 403 ABI_CallFunction(reinterpret_cast<const void*>(log2f));
413 Compile_PopCallerSavedXMM(); 404 ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
414 ABI_PopAllCallerSavedRegsAndAdjustStack();
415 405
416 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); 406 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
417 MOVAPS(SRC1, R(XMM0)); 407 MOVAPS(SRC1, R(XMM0));
@@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) {
560} 550}
561 551
562void JitCompiler::Compile_END(Instruction instr) { 552void JitCompiler::Compile_END(Instruction instr) {
563 ABI_PopAllCalleeSavedRegsAndAdjustStack(); 553 ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
564 RET(); 554 RET();
565} 555}
566 556
@@ -755,7 +745,8 @@ CompiledShader* JitCompiler::Compile() {
755 const auto& code = g_state.vs.program_code; 745 const auto& code = g_state.vs.program_code;
756 unsigned offset = g_state.regs.vs.main_offset; 746 unsigned offset = g_state.regs.vs.main_offset;
757 747
758 ABI_PushAllCalleeSavedRegsAndAdjustStack(); 748 // The stack pointer is 8 modulo 16 at the entry of a procedure
749 ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
759 750
760 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); 751 MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
761 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); 752 MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 58828ecc8..8668cfff4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -77,8 +77,7 @@ private:
77 void Compile_EvaluateCondition(Instruction instr); 77 void Compile_EvaluateCondition(Instruction instr);
78 void Compile_UniformCondition(Instruction instr); 78 void Compile_UniformCondition(Instruction instr);
79 79
80 void Compile_PushCallerSavedXMM(); 80 BitSet32 PersistentCallerSavedRegs();
81 void Compile_PopCallerSavedXMM();
82 81
83 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. 82 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
84 unsigned* offset_ptr = nullptr; 83 unsigned* offset_ptr = nullptr;