diff options
| author | 2015-08-26 09:12:14 +0200 | |
|---|---|---|
| committer | 2015-09-01 23:39:52 +0200 | |
| commit | 179ad35c2e6dff0c367dedb63c47a78c6cd052a5 (patch) | |
| tree | 8e60274a443cdd8e651ab768c45f0f7a1e6ecbf2 /src | |
| parent | Common: Import BitSet from Dolphin (diff) | |
| download | yuzu-179ad35c2e6dff0c367dedb63c47a78c6cd052a5.tar.gz yuzu-179ad35c2e6dff0c367dedb63c47a78c6cd052a5.tar.xz yuzu-179ad35c2e6dff0c367dedb63c47a78c6cd052a5.zip | |
x64: Proper stack alignment in shader JIT function calls
Import Dolphin stack handling and register saving routines
Also removes the x86 parts from abi files
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/x64/abi.cpp | 411 | ||||
| -rw-r--r-- | src/common/x64/abi.h | 61 | ||||
| -rw-r--r-- | src/common/x64/emitter.h | 42 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 3 |
5 files changed, 108 insertions, 452 deletions
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp index 4c07a6ebe..955eb86ce 100644 --- a/src/common/x64/abi.cpp +++ b/src/common/x64/abi.cpp | |||
| @@ -22,247 +22,69 @@ using namespace Gen; | |||
| 22 | 22 | ||
| 23 | // Shared code between Win64 and Unix64 | 23 | // Shared code between Win64 and Unix64 |
| 24 | 24 | ||
| 25 | // Sets up a __cdecl function. | 25 | void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { |
| 26 | void XEmitter::ABI_EmitPrologue(int maxCallParams) | 26 | size_t shadow = 0; |
| 27 | { | 27 | #if defined(_WIN32) |
| 28 | #ifdef _M_IX86 | 28 | shadow = 0x20; |
| 29 | // Don't really need to do anything | ||
| 30 | #elif defined(ARCHITECTURE_x86_64) | ||
| 31 | #if _WIN32 | ||
| 32 | int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8; | ||
| 33 | // Set up a stack frame so that we can call functions | ||
| 34 | // TODO: use maxCallParams | ||
| 35 | SUB(64, R(RSP), Imm8(stacksize)); | ||
| 36 | #endif | ||
| 37 | #else | ||
| 38 | #error Arch not supported | ||
| 39 | #endif | 29 | #endif |
| 40 | } | ||
| 41 | |||
| 42 | void XEmitter::ABI_EmitEpilogue(int maxCallParams) | ||
| 43 | { | ||
| 44 | #ifdef _M_IX86 | ||
| 45 | RET(); | ||
| 46 | #elif defined(ARCHITECTURE_x86_64) | ||
| 47 | #ifdef _WIN32 | ||
| 48 | int stacksize = ((maxCallParams+1)&~1)*8 + 8; | ||
| 49 | ADD(64, R(RSP), Imm8(stacksize)); | ||
| 50 | #endif | ||
| 51 | RET(); | ||
| 52 | #else | ||
| 53 | #error Arch not supported | ||
| 54 | |||
| 55 | |||
| 56 | #endif | ||
| 57 | } | ||
| 58 | |||
| 59 | #ifdef _M_IX86 // All32 | ||
| 60 | |||
| 61 | // Shared code between Win32 and Unix32 | ||
| 62 | void XEmitter::ABI_CallFunction(const void *func) { | ||
| 63 | ABI_AlignStack(0); | ||
| 64 | CALL(func); | ||
| 65 | ABI_RestoreStack(0); | ||
| 66 | } | ||
| 67 | |||
| 68 | void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { | ||
| 69 | ABI_AlignStack(1 * 2); | ||
| 70 | PUSH(16, Imm16(param1)); | ||
| 71 | CALL(func); | ||
| 72 | ABI_RestoreStack(1 * 2); | ||
| 73 | } | ||
| 74 | |||
| 75 | void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { | ||
| 76 | ABI_AlignStack(1 * 2 + 1 * 4); | ||
| 77 | PUSH(16, Imm16(param2)); | ||
| 78 | PUSH(32, Imm32(param1)); | ||
| 79 | CALL(func); | ||
| 80 | ABI_RestoreStack(1 * 2 + 1 * 4); | ||
| 81 | } | ||
| 82 | |||
| 83 | void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { | ||
| 84 | ABI_AlignStack(1 * 4); | ||
| 85 | PUSH(32, Imm32(param1)); | ||
| 86 | CALL(func); | ||
| 87 | ABI_RestoreStack(1 * 4); | ||
| 88 | } | ||
| 89 | |||
| 90 | void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { | ||
| 91 | ABI_AlignStack(2 * 4); | ||
| 92 | PUSH(32, Imm32(param2)); | ||
| 93 | PUSH(32, Imm32(param1)); | ||
| 94 | CALL(func); | ||
| 95 | ABI_RestoreStack(2 * 4); | ||
| 96 | } | ||
| 97 | |||
| 98 | void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { | ||
| 99 | ABI_AlignStack(3 * 4); | ||
| 100 | PUSH(32, Imm32(param3)); | ||
| 101 | PUSH(32, Imm32(param2)); | ||
| 102 | PUSH(32, Imm32(param1)); | ||
| 103 | CALL(func); | ||
| 104 | ABI_RestoreStack(3 * 4); | ||
| 105 | } | ||
| 106 | |||
| 107 | void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { | ||
| 108 | ABI_AlignStack(3 * 4); | ||
| 109 | PUSH(32, ImmPtr(param3)); | ||
| 110 | PUSH(32, Imm32(param2)); | ||
| 111 | PUSH(32, Imm32(param1)); | ||
| 112 | CALL(func); | ||
| 113 | ABI_RestoreStack(3 * 4); | ||
| 114 | } | ||
| 115 | |||
| 116 | void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) { | ||
| 117 | ABI_AlignStack(4 * 4); | ||
| 118 | PUSH(32, ImmPtr(param4)); | ||
| 119 | PUSH(32, Imm32(param3)); | ||
| 120 | PUSH(32, Imm32(param2)); | ||
| 121 | PUSH(32, Imm32(param1)); | ||
| 122 | CALL(func); | ||
| 123 | ABI_RestoreStack(4 * 4); | ||
| 124 | } | ||
| 125 | |||
| 126 | void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { | ||
| 127 | ABI_AlignStack(1 * 4); | ||
| 128 | PUSH(32, ImmPtr(param1)); | ||
| 129 | CALL(func); | ||
| 130 | ABI_RestoreStack(1 * 4); | ||
| 131 | } | ||
| 132 | |||
| 133 | void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { | ||
| 134 | ABI_AlignStack(2 * 4); | ||
| 135 | PUSH(32, arg2); | ||
| 136 | PUSH(32, ImmPtr(param1)); | ||
| 137 | CALL(func); | ||
| 138 | ABI_RestoreStack(2 * 4); | ||
| 139 | } | ||
| 140 | |||
| 141 | void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { | ||
| 142 | ABI_AlignStack(3 * 4); | ||
| 143 | PUSH(32, arg3); | ||
| 144 | PUSH(32, arg2); | ||
| 145 | PUSH(32, ImmPtr(param1)); | ||
| 146 | CALL(func); | ||
| 147 | ABI_RestoreStack(3 * 4); | ||
| 148 | } | ||
| 149 | |||
| 150 | void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { | ||
| 151 | ABI_AlignStack(3 * 4); | ||
| 152 | PUSH(32, Imm32(param3)); | ||
| 153 | PUSH(32, ImmPtr(param2)); | ||
| 154 | PUSH(32, ImmPtr(param1)); | ||
| 155 | CALL(func); | ||
| 156 | ABI_RestoreStack(3 * 4); | ||
| 157 | } | ||
| 158 | |||
| 159 | // Pass a register as a parameter. | ||
| 160 | void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { | ||
| 161 | ABI_AlignStack(1 * 4); | ||
| 162 | PUSH(32, R(reg1)); | ||
| 163 | CALL(func); | ||
| 164 | ABI_RestoreStack(1 * 4); | ||
| 165 | } | ||
| 166 | |||
| 167 | // Pass two registers as parameters. | ||
| 168 | void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2) | ||
| 169 | { | ||
| 170 | ABI_AlignStack(2 * 4); | ||
| 171 | PUSH(32, R(reg2)); | ||
| 172 | PUSH(32, R(reg1)); | ||
| 173 | CALL(func); | ||
| 174 | ABI_RestoreStack(2 * 4); | ||
| 175 | } | ||
| 176 | 30 | ||
| 177 | void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) | 31 | int count = (mask & ABI_ALL_GPRS).Count(); |
| 178 | { | 32 | rsp_alignment -= count * 8; |
| 179 | ABI_AlignStack(2 * 4); | 33 | size_t subtraction = 0; |
| 180 | PUSH(32, Imm32(param2)); | 34 | int fpr_count = (mask & ABI_ALL_FPRS).Count(); |
| 181 | PUSH(32, arg1); | 35 | if (fpr_count) { |
| 182 | CALL(func); | 36 | // If we have any XMMs to save, we must align the stack here. |
| 183 | ABI_RestoreStack(2 * 4); | 37 | subtraction = rsp_alignment & 0xf; |
| 184 | } | 38 | } |
| 39 | subtraction += 16 * fpr_count; | ||
| 40 | size_t xmm_base_subtraction = subtraction; | ||
| 41 | subtraction += needed_frame_size; | ||
| 42 | subtraction += shadow; | ||
| 43 | // Final alignment. | ||
| 44 | rsp_alignment -= subtraction; | ||
| 45 | subtraction += rsp_alignment & 0xf; | ||
| 185 | 46 | ||
| 186 | void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) | 47 | *shadowp = shadow; |
| 187 | { | 48 | *subtractionp = subtraction; |
| 188 | ABI_AlignStack(3 * 4); | 49 | *xmm_offsetp = subtraction - xmm_base_subtraction; |
| 189 | PUSH(32, Imm32(param3)); | ||
| 190 | PUSH(32, Imm32(param2)); | ||
| 191 | PUSH(32, arg1); | ||
| 192 | CALL(func); | ||
| 193 | ABI_RestoreStack(3 * 4); | ||
| 194 | } | 50 | } |
| 195 | 51 | ||
| 196 | void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) | 52 | size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { |
| 197 | { | 53 | size_t shadow, subtraction, xmm_offset; |
| 198 | ABI_AlignStack(1 * 4); | 54 | ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); |
| 199 | PUSH(32, arg1); | ||
| 200 | CALL(func); | ||
| 201 | ABI_RestoreStack(1 * 4); | ||
| 202 | } | ||
| 203 | 55 | ||
| 204 | void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) | 56 | for (int r : mask & ABI_ALL_GPRS) |
| 205 | { | 57 | PUSH((X64Reg)r); |
| 206 | ABI_AlignStack(2 * 4); | ||
| 207 | PUSH(32, arg2); | ||
| 208 | PUSH(32, arg1); | ||
| 209 | CALL(func); | ||
| 210 | ABI_RestoreStack(2 * 4); | ||
| 211 | } | ||
| 212 | 58 | ||
| 213 | void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { | 59 | if (subtraction) |
| 214 | // Note: 4 * 4 = 16 bytes, so alignment is preserved. | 60 | SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); |
| 215 | PUSH(EBP); | ||
| 216 | PUSH(EBX); | ||
| 217 | PUSH(ESI); | ||
| 218 | PUSH(EDI); | ||
| 219 | } | ||
| 220 | 61 | ||
| 221 | void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { | 62 | for (int x : mask & ABI_ALL_FPRS) { |
| 222 | POP(EDI); | 63 | MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16)); |
| 223 | POP(ESI); | 64 | xmm_offset += 16; |
| 224 | POP(EBX); | 65 | } |
| 225 | POP(EBP); | ||
| 226 | } | ||
| 227 | 66 | ||
| 228 | unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { | 67 | return shadow; |
| 229 | frameSize += 4; // reserve space for return address | ||
| 230 | unsigned int alignedSize = | ||
| 231 | #ifdef __GNUC__ | ||
| 232 | (frameSize + 15) & -16; | ||
| 233 | #else | ||
| 234 | (frameSize + 3) & -4; | ||
| 235 | #endif | ||
| 236 | return alignedSize; | ||
| 237 | } | 68 | } |
| 238 | 69 | ||
| 70 | void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { | ||
| 71 | size_t shadow, subtraction, xmm_offset; | ||
| 72 | ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); | ||
| 239 | 73 | ||
| 240 | void XEmitter::ABI_AlignStack(unsigned int frameSize) { | 74 | for (int x : mask & ABI_ALL_FPRS) { |
| 241 | // Mac OS X requires the stack to be 16-byte aligned before every call. | 75 | MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset)); |
| 242 | // Linux requires the stack to be 16-byte aligned before calls that put SSE | 76 | xmm_offset += 16; |
| 243 | // vectors on the stack, but since we do not keep track of which calls do that, | ||
| 244 | // it is effectively every call as well. | ||
| 245 | // Windows binaries compiled with MSVC do not have such a restriction*, but I | ||
| 246 | // expect that GCC on Windows acts the same as GCC on Linux in this respect. | ||
| 247 | // It would be nice if someone could verify this. | ||
| 248 | // *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times. | ||
| 249 | unsigned int fillSize = | ||
| 250 | ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4); | ||
| 251 | if (fillSize != 0) { | ||
| 252 | SUB(32, R(ESP), Imm8(fillSize)); | ||
| 253 | } | 77 | } |
| 254 | } | ||
| 255 | 78 | ||
| 256 | void XEmitter::ABI_RestoreStack(unsigned int frameSize) { | 79 | if (subtraction) |
| 257 | unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize); | 80 | ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); |
| 258 | alignedSize -= 4; // return address is POPped at end of call | 81 | |
| 259 | if (alignedSize != 0) { | 82 | for (int r = 15; r >= 0; r--) { |
| 260 | ADD(32, R(ESP), Imm8(alignedSize)); | 83 | if (mask[r]) |
| 84 | POP((X64Reg)r); | ||
| 261 | } | 85 | } |
| 262 | } | 86 | } |
| 263 | 87 | ||
| 264 | #else //64bit | ||
| 265 | |||
| 266 | // Common functions | 88 | // Common functions |
| 267 | void XEmitter::ABI_CallFunction(const void *func) { | 89 | void XEmitter::ABI_CallFunction(const void *func) { |
| 268 | u64 distance = u64(func) - (u64(code) + 5); | 90 | u64 distance = u64(func) - (u64(code) + 5); |
| @@ -538,143 +360,4 @@ void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, cons | |||
| 538 | } else { | 360 | } else { |
| 539 | CALL(func); | 361 | CALL(func); |
| 540 | } | 362 | } |
| 541 | } | 363 | } \ No newline at end of file |
| 542 | |||
| 543 | unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { | ||
| 544 | return frameSize; | ||
| 545 | } | ||
| 546 | |||
| 547 | #ifdef _WIN32 | ||
| 548 | |||
| 549 | // The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs. | ||
| 550 | // But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs. | ||
| 551 | // Let's just save all 16. | ||
| 552 | const int XMM_STACK_SPACE = 16 * 16; | ||
| 553 | |||
| 554 | // Win64 Specific Code | ||
| 555 | void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { | ||
| 556 | //we only want to do this once | ||
| 557 | PUSH(RBX); | ||
| 558 | PUSH(RSI); | ||
| 559 | PUSH(RDI); | ||
| 560 | PUSH(RBP); | ||
| 561 | PUSH(R12); | ||
| 562 | PUSH(R13); | ||
| 563 | PUSH(R14); | ||
| 564 | PUSH(R15); | ||
| 565 | ABI_AlignStack(0); | ||
| 566 | |||
| 567 | // Do this after aligning, because before it's offset by 8. | ||
| 568 | SUB(64, R(RSP), Imm32(XMM_STACK_SPACE)); | ||
| 569 | for (int i = 0; i < 16; ++i) | ||
| 570 | MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i)); | ||
| 571 | } | ||
| 572 | |||
| 573 | void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { | ||
| 574 | for (int i = 0; i < 16; ++i) | ||
| 575 | MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16)); | ||
| 576 | ADD(64, R(RSP), Imm32(XMM_STACK_SPACE)); | ||
| 577 | |||
| 578 | ABI_RestoreStack(0); | ||
| 579 | POP(R15); | ||
| 580 | POP(R14); | ||
| 581 | POP(R13); | ||
| 582 | POP(R12); | ||
| 583 | POP(RBP); | ||
| 584 | POP(RDI); | ||
| 585 | POP(RSI); | ||
| 586 | POP(RBX); | ||
| 587 | } | ||
| 588 | |||
| 589 | // Win64 Specific Code | ||
| 590 | void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { | ||
| 591 | PUSH(RCX); | ||
| 592 | PUSH(RDX); | ||
| 593 | PUSH(RSI); | ||
| 594 | PUSH(RDI); | ||
| 595 | PUSH(R8); | ||
| 596 | PUSH(R9); | ||
| 597 | PUSH(R10); | ||
| 598 | PUSH(R11); | ||
| 599 | // TODO: Callers preserve XMM4-5 (XMM0-3 are args.) | ||
| 600 | ABI_AlignStack(0); | ||
| 601 | } | ||
| 602 | |||
| 603 | void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { | ||
| 604 | ABI_RestoreStack(0); | ||
| 605 | POP(R11); | ||
| 606 | POP(R10); | ||
| 607 | POP(R9); | ||
| 608 | POP(R8); | ||
| 609 | POP(RDI); | ||
| 610 | POP(RSI); | ||
| 611 | POP(RDX); | ||
| 612 | POP(RCX); | ||
| 613 | } | ||
| 614 | |||
| 615 | void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { | ||
| 616 | SUB(64, R(RSP), Imm8(0x28)); | ||
| 617 | } | ||
| 618 | |||
| 619 | void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { | ||
| 620 | ADD(64, R(RSP), Imm8(0x28)); | ||
| 621 | } | ||
| 622 | |||
| 623 | #else | ||
| 624 | // Unix64 Specific Code | ||
| 625 | void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { | ||
| 626 | PUSH(RBX); | ||
| 627 | PUSH(RBP); | ||
| 628 | PUSH(R12); | ||
| 629 | PUSH(R13); | ||
| 630 | PUSH(R14); | ||
| 631 | PUSH(R15); | ||
| 632 | PUSH(R15); //just to align stack. duped push/pop doesn't hurt. | ||
| 633 | // TODO: XMM? | ||
| 634 | } | ||
| 635 | |||
| 636 | void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { | ||
| 637 | POP(R15); | ||
| 638 | POP(R15); | ||
| 639 | POP(R14); | ||
| 640 | POP(R13); | ||
| 641 | POP(R12); | ||
| 642 | POP(RBP); | ||
| 643 | POP(RBX); | ||
| 644 | } | ||
| 645 | |||
| 646 | void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { | ||
| 647 | PUSH(RCX); | ||
| 648 | PUSH(RDX); | ||
| 649 | PUSH(RSI); | ||
| 650 | PUSH(RDI); | ||
| 651 | PUSH(R8); | ||
| 652 | PUSH(R9); | ||
| 653 | PUSH(R10); | ||
| 654 | PUSH(R11); | ||
| 655 | PUSH(R11); | ||
| 656 | } | ||
| 657 | |||
| 658 | void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { | ||
| 659 | POP(R11); | ||
| 660 | POP(R11); | ||
| 661 | POP(R10); | ||
| 662 | POP(R9); | ||
| 663 | POP(R8); | ||
| 664 | POP(RDI); | ||
| 665 | POP(RSI); | ||
| 666 | POP(RDX); | ||
| 667 | POP(RCX); | ||
| 668 | } | ||
| 669 | |||
| 670 | void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { | ||
| 671 | SUB(64, R(RSP), Imm8(0x08)); | ||
| 672 | } | ||
| 673 | |||
| 674 | void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { | ||
| 675 | ADD(64, R(RSP), Imm8(0x08)); | ||
| 676 | } | ||
| 677 | |||
| 678 | #endif // WIN32 | ||
| 679 | |||
| 680 | #endif // 32bit | ||
diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h index 7e9c156ae..de6d62fdd 100644 --- a/src/common/x64/abi.h +++ b/src/common/x64/abi.h | |||
| @@ -1,35 +1,15 @@ | |||
| 1 | // Copyright (C) 2003 Dolphin Project. | 1 | // Copyright 2008 Dolphin Emulator Project |
| 2 | 2 | // Licensed under GPLv2+ | |
| 3 | // This program is free software: you can redistribute it and/or modify | 3 | // Refer to the license.txt file included. |
| 4 | // it under the terms of the GNU General Public License as published by | ||
| 5 | // the Free Software Foundation, version 2.0 or later versions. | ||
| 6 | |||
| 7 | // This program is distributed in the hope that it will be useful, | ||
| 8 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 9 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 10 | // GNU General Public License 2.0 for more details. | ||
| 11 | |||
| 12 | // A copy of the GPL 2.0 should have been included with the program. | ||
| 13 | // If not, see http://www.gnu.org/licenses/ | ||
| 14 | |||
| 15 | // Official SVN repository and contact information can be found at | ||
| 16 | // http://code.google.com/p/dolphin-emu/ | ||
| 17 | 4 | ||
| 18 | #pragma once | 5 | #pragma once |
| 19 | 6 | ||
| 20 | #include "common/common_types.h" | 7 | #include "common/bit_set.h" |
| 8 | #include "emitter.h" | ||
| 21 | 9 | ||
| 22 | // x86/x64 ABI:s, and helpers to help follow them when JIT-ing code. | 10 | // x64 ABI:s, and helpers to help follow them when JIT-ing code. |
| 23 | // All convensions return values in EAX (+ possibly EDX). | 11 | // All convensions return values in EAX (+ possibly EDX). |
| 24 | 12 | ||
| 25 | // Linux 32-bit, Windows 32-bit (cdecl, System V): | ||
| 26 | // * Caller pushes left to right | ||
| 27 | // * Caller fixes stack after call | ||
| 28 | // * function subtract from stack for local storage only. | ||
| 29 | // Scratch: EAX ECX EDX | ||
| 30 | // Callee-save: EBX ESI EDI EBP | ||
| 31 | // Parameters: - | ||
| 32 | |||
| 33 | // Windows 64-bit | 13 | // Windows 64-bit |
| 34 | // * 4-reg "fastcall" variant, very new-skool stack handling | 14 | // * 4-reg "fastcall" variant, very new-skool stack handling |
| 35 | // * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ | 15 | // * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ |
| @@ -44,18 +24,8 @@ | |||
| 44 | // Callee-save: RBX RBP R12 R13 R14 R15 | 24 | // Callee-save: RBX RBP R12 R13 R14 R15 |
| 45 | // Parameters: RDI RSI RDX RCX R8 R9 | 25 | // Parameters: RDI RSI RDX RCX R8 R9 |
| 46 | 26 | ||
| 47 | #ifdef _M_IX86 // 32 bit calling convention, shared by all | 27 | #define ABI_ALL_FPRS BitSet32(0xffff0000) |
| 48 | 28 | #define ABI_ALL_GPRS BitSet32(0x0000ffff) | |
| 49 | // 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to | ||
| 50 | // choose regs to put stuff in. | ||
| 51 | #define ABI_PARAM1 RCX | ||
| 52 | #define ABI_PARAM2 RDX | ||
| 53 | |||
| 54 | // There are no ABI_PARAM* here, since args are pushed. | ||
| 55 | // 32-bit bog standard cdecl, shared between linux and windows | ||
| 56 | // MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about. | ||
| 57 | |||
| 58 | #elif ARCHITECTURE_x86_64 // 64 bit calling convention | ||
| 59 | 29 | ||
| 60 | #ifdef _WIN32 // 64-bit Windows - the really exotic calling convention | 30 | #ifdef _WIN32 // 64-bit Windows - the really exotic calling convention |
| 61 | 31 | ||
| @@ -64,7 +34,11 @@ | |||
| 64 | #define ABI_PARAM3 R8 | 34 | #define ABI_PARAM3 R8 |
| 65 | #define ABI_PARAM4 R9 | 35 | #define ABI_PARAM4 R9 |
| 66 | 36 | ||
| 67 | #else //64-bit Unix (hopefully MacOSX too) | 37 | // xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers. |
| 38 | #define ABI_ALL_CALLER_SAVED \ | ||
| 39 | (BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \ | ||
| 40 | XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 }) | ||
| 41 | #else //64-bit Unix / OS X | ||
| 68 | 42 | ||
| 69 | #define ABI_PARAM1 RDI | 43 | #define ABI_PARAM1 RDI |
| 70 | #define ABI_PARAM2 RSI | 44 | #define ABI_PARAM2 RSI |
| @@ -73,6 +47,13 @@ | |||
| 73 | #define ABI_PARAM5 R8 | 47 | #define ABI_PARAM5 R8 |
| 74 | #define ABI_PARAM6 R9 | 48 | #define ABI_PARAM6 R9 |
| 75 | 49 | ||
| 50 | // TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably | ||
| 51 | // don't actually clobber them. | ||
| 52 | #define ABI_ALL_CALLER_SAVED \ | ||
| 53 | (BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \ | ||
| 54 | ABI_ALL_FPRS) | ||
| 76 | #endif // WIN32 | 55 | #endif // WIN32 |
| 77 | 56 | ||
| 78 | #endif // X86 | 57 | #define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED) |
| 58 | |||
| 59 | #define ABI_RETURN RAX | ||
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index a49cd2cf1..2dd0dc94e 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #pragma once | 18 | #pragma once |
| 19 | 19 | ||
| 20 | #include "common/assert.h" | 20 | #include "common/assert.h" |
| 21 | #include "common/bit_set.h" | ||
| 21 | #include "common/common_types.h" | 22 | #include "common/common_types.h" |
| 22 | #include "common/code_block.h" | 23 | #include "common/code_block.h" |
| 23 | 24 | ||
| @@ -356,7 +357,7 @@ private: | |||
| 356 | void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); | 357 | void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); |
| 357 | void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); | 358 | void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); |
| 358 | 359 | ||
| 359 | void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); | 360 | void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); |
| 360 | 361 | ||
| 361 | protected: | 362 | protected: |
| 362 | void Write8(u8 value); | 363 | void Write8(u8 value); |
| @@ -1007,25 +1008,26 @@ public: | |||
| 1007 | ABI_CallFunctionC((const void*)func, param1); | 1008 | ABI_CallFunctionC((const void*)func, param1); |
| 1008 | } | 1009 | } |
| 1009 | 1010 | ||
| 1010 | // A function that doesn't have any control over what it will do to regs, | 1011 | /** |
| 1011 | // such as the dispatcher, should be surrounded by these. | 1012 | * Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI |
| 1012 | void ABI_PushAllCalleeSavedRegsAndAdjustStack(); | 1013 | * |
| 1013 | void ABI_PopAllCalleeSavedRegsAndAdjustStack(); | 1014 | * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs) |
| 1014 | 1015 | * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8 | |
| 1015 | // A function that doesn't know anything about it's surroundings, should | 1016 | * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack |
| 1016 | // be surrounded by these to establish a safe environment, where it can roam free. | 1017 | * @return Size of the shadow space, i.e., offset of the frame |
| 1017 | // An example is a backpatch injected function. | 1018 | */ |
| 1018 | void ABI_PushAllCallerSavedRegsAndAdjustStack(); | 1019 | size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); |
| 1019 | void ABI_PopAllCallerSavedRegsAndAdjustStack(); | 1020 | |
| 1020 | 1021 | /** | |
| 1021 | unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); | 1022 | * Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before |
| 1022 | void ABI_AlignStack(unsigned int frameSize); | 1023 | * the matching PushRegistersAndAdjustStack. |
| 1023 | void ABI_RestoreStack(unsigned int frameSize); | 1024 | * |
| 1024 | 1025 | * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs) | |
| 1025 | // Sets up a __cdecl function. | 1026 | * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8 |
| 1026 | // Only x64 really needs the parameter count. | 1027 | * @param needed_frame_size Additional space that was needed |
| 1027 | void ABI_EmitPrologue(int maxCallParams); | 1028 | * @warning Stack must be currently 16-byte aligned |
| 1028 | void ABI_EmitEpilogue(int maxCallParams); | 1029 | */ |
| 1030 | void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); | ||
| 1029 | 1031 | ||
| 1030 | #ifdef _M_IX86 | 1032 | #ifdef _M_IX86 |
| 1031 | static int ABI_GetNumXMMRegs() { return 8; } | 1033 | static int ABI_GetNumXMMRegs() { return 8; } |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index c7b63a9b7..d6011832c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14; | |||
| 122 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR | 122 | /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR |
| 123 | static const X64Reg NEGBIT = XMM15; | 123 | static const X64Reg NEGBIT = XMM15; |
| 124 | 124 | ||
| 125 | // State registers that must not be modified by external functions calls | ||
| 126 | // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed | ||
| 127 | static const BitSet32 persistent_regs = { | ||
| 128 | UNIFORMS, REGISTERS, // Pointers to register blocks | ||
| 129 | ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers | ||
| 130 | ONE+16, NEGBIT+16, // Constants | ||
| 131 | }; | ||
| 132 | |||
| 125 | /// Raw constant for the source register selector that indicates no swizzling is performed | 133 | /// Raw constant for the source register selector that indicates no swizzling is performed |
| 126 | static const u8 NO_SRC_REG_SWIZZLE = 0x1b; | 134 | static const u8 NO_SRC_REG_SWIZZLE = 0x1b; |
| 127 | /// Raw constant for the destination register enable mask that indicates all components are enabled | 135 | /// Raw constant for the destination register enable mask that indicates all components are enabled |
| @@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) { | |||
| 295 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 303 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); |
| 296 | } | 304 | } |
| 297 | 305 | ||
| 298 | void JitCompiler::Compile_PushCallerSavedXMM() { | 306 | BitSet32 JitCompiler::PersistentCallerSavedRegs() { |
| 299 | #ifndef _WIN32 | 307 | return persistent_regs & ABI_ALL_CALLER_SAVED; |
| 300 | SUB(64, R(RSP), Imm8(2 * 16)); | ||
| 301 | MOVUPS(MDisp(RSP, 16), ONE); | ||
| 302 | MOVUPS(MDisp(RSP, 0), NEGBIT); | ||
| 303 | #endif | ||
| 304 | } | ||
| 305 | |||
| 306 | void JitCompiler::Compile_PopCallerSavedXMM() { | ||
| 307 | #ifndef _WIN32 | ||
| 308 | MOVUPS(NEGBIT, MDisp(RSP, 0)); | ||
| 309 | MOVUPS(ONE, MDisp(RSP, 16)); | ||
| 310 | ADD(64, R(RSP), Imm8(2 * 16)); | ||
| 311 | #endif | ||
| 312 | } | 308 | } |
| 313 | 309 | ||
| 314 | void JitCompiler::Compile_ADD(Instruction instr) { | 310 | void JitCompiler::Compile_ADD(Instruction instr) { |
| @@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) { | |||
| 390 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 386 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 391 | MOVSS(XMM0, R(SRC1)); | 387 | MOVSS(XMM0, R(SRC1)); |
| 392 | 388 | ||
| 393 | // The following will actually break the stack alignment | 389 | ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 394 | ABI_PushAllCallerSavedRegsAndAdjustStack(); | ||
| 395 | Compile_PushCallerSavedXMM(); | ||
| 396 | ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); | 390 | ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); |
| 397 | Compile_PopCallerSavedXMM(); | 391 | ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 398 | ABI_PopAllCallerSavedRegsAndAdjustStack(); | ||
| 399 | 392 | ||
| 400 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); | 393 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); |
| 401 | MOVAPS(SRC1, R(XMM0)); | 394 | MOVAPS(SRC1, R(XMM0)); |
| @@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) { | |||
| 406 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 399 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 407 | MOVSS(XMM0, R(SRC1)); | 400 | MOVSS(XMM0, R(SRC1)); |
| 408 | 401 | ||
| 409 | // The following will actually break the stack alignment | 402 | ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 410 | ABI_PushAllCallerSavedRegsAndAdjustStack(); | ||
| 411 | Compile_PushCallerSavedXMM(); | ||
| 412 | ABI_CallFunction(reinterpret_cast<const void*>(log2f)); | 403 | ABI_CallFunction(reinterpret_cast<const void*>(log2f)); |
| 413 | Compile_PopCallerSavedXMM(); | 404 | ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); |
| 414 | ABI_PopAllCallerSavedRegsAndAdjustStack(); | ||
| 415 | 405 | ||
| 416 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); | 406 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); |
| 417 | MOVAPS(SRC1, R(XMM0)); | 407 | MOVAPS(SRC1, R(XMM0)); |
| @@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) { | |||
| 560 | } | 550 | } |
| 561 | 551 | ||
| 562 | void JitCompiler::Compile_END(Instruction instr) { | 552 | void JitCompiler::Compile_END(Instruction instr) { |
| 563 | ABI_PopAllCalleeSavedRegsAndAdjustStack(); | 553 | ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); |
| 564 | RET(); | 554 | RET(); |
| 565 | } | 555 | } |
| 566 | 556 | ||
| @@ -755,7 +745,8 @@ CompiledShader* JitCompiler::Compile() { | |||
| 755 | const auto& code = g_state.vs.program_code; | 745 | const auto& code = g_state.vs.program_code; |
| 756 | unsigned offset = g_state.regs.vs.main_offset; | 746 | unsigned offset = g_state.regs.vs.main_offset; |
| 757 | 747 | ||
| 758 | ABI_PushAllCalleeSavedRegsAndAdjustStack(); | 748 | // The stack pointer is 8 modulo 16 at the entry of a procedure |
| 749 | ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); | ||
| 759 | 750 | ||
| 760 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); | 751 | MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); |
| 761 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); | 752 | MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); |
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 58828ecc8..8668cfff4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -77,8 +77,7 @@ private: | |||
| 77 | void Compile_EvaluateCondition(Instruction instr); | 77 | void Compile_EvaluateCondition(Instruction instr); |
| 78 | void Compile_UniformCondition(Instruction instr); | 78 | void Compile_UniformCondition(Instruction instr); |
| 79 | 79 | ||
| 80 | void Compile_PushCallerSavedXMM(); | 80 | BitSet32 PersistentCallerSavedRegs(); |
| 81 | void Compile_PopCallerSavedXMM(); | ||
| 82 | 81 | ||
| 83 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. | 82 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. |
| 84 | unsigned* offset_ptr = nullptr; | 83 | unsigned* offset_ptr = nullptr; |