summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2016-12-12 01:23:08 -0800
committerGravatar Yuri Kunde Schlesner2016-12-14 20:06:08 -0800
commitf4e98ecf3f3c144e0db2bc866e3fe5aac39f6ec9 (patch)
tree9cb61994f928e64ca68d07150fc2cb5719fe6457 /src/common
parentExternals: Add Xbyak (diff)
downloadyuzu-f4e98ecf3f3c144e0db2bc866e3fe5aac39f6ec9.tar.gz
yuzu-f4e98ecf3f3c144e0db2bc866e3fe5aac39f6ec9.tar.xz
yuzu-f4e98ecf3f3c144e0db2bc866e3fe5aac39f6ec9.zip
VideoCore: Convert x64 shader JIT to use Xbyak for assembly
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/x64/xbyak_abi.h178
-rw-r--r--src/common/x64/xbyak_util.h49
3 files changed, 234 insertions, 1 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 74a271f08..e6c2ce335 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -71,9 +71,15 @@ if(ARCHITECTURE_x86_64)
71 set(HEADERS ${HEADERS} 71 set(HEADERS ${HEADERS}
72 x64/abi.h 72 x64/abi.h
73 x64/cpu_detect.h 73 x64/cpu_detect.h
74 x64/emitter.h) 74 x64/emitter.h
75 x64/xbyak_abi.h
76 x64/xbyak_util.h
77 )
75endif() 78endif()
76 79
77create_directory_groups(${SRCS} ${HEADERS}) 80create_directory_groups(${SRCS} ${HEADERS})
78 81
79add_library(common STATIC ${SRCS} ${HEADERS}) 82add_library(common STATIC ${SRCS} ${HEADERS})
83if (ARCHITECTURE_x86_64)
84 target_link_libraries(common xbyak)
85endif()
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..6090d93e1
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,178 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8#include <xbyak.h>
9#include "common/assert.h"
10#include "common/bit_set.h"
11
12namespace Common {
13namespace X64 {
14
15int RegToIndex(const Xbyak::Reg& reg) {
16 using Kind = Xbyak::Reg::Kind;
17 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
18 "RegSet only support GPRs and XMM registers.");
19 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
20 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
21}
22
23inline Xbyak::Reg64 IndexToReg64(int reg_index) {
24 ASSERT(reg_index < 16);
25 return Xbyak::Reg64(reg_index);
26}
27
28inline Xbyak::Xmm IndexToXmm(int reg_index) {
29 ASSERT(reg_index >= 16 && reg_index < 32);
30 return Xbyak::Xmm(reg_index - 16);
31}
32
33inline Xbyak::Reg IndexToReg(int reg_index) {
34 if (reg_index < 16) {
35 return IndexToReg64(reg_index);
36 } else {
37 return IndexToXmm(reg_index);
38 }
39}
40
41inline BitSet32 BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
42 BitSet32 bits;
43 for (const Xbyak::Reg& reg : regs) {
44 bits[RegToIndex(reg)] = true;
45 }
46 return bits;
47}
48
49const BitSet32 ABI_ALL_GPRS(0x0000FFFF);
50const BitSet32 ABI_ALL_XMMS(0xFFFF0000);
51
52#ifdef _WIN32
53
54// Microsoft x64 ABI
55const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
56const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
57const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
58const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
59const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
60
61const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
62 // GPRs
63 Xbyak::util::rcx, Xbyak::util::rdx, Xbyak::util::r8, Xbyak::util::r9, Xbyak::util::r10,
64 Xbyak::util::r11,
65 // XMMs
66 Xbyak::util::xmm0, Xbyak::util::xmm1, Xbyak::util::xmm2, Xbyak::util::xmm3, Xbyak::util::xmm4,
67 Xbyak::util::xmm5,
68});
69
70const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
71 // GPRs
72 Xbyak::util::rbx, Xbyak::util::rsi, Xbyak::util::rdi, Xbyak::util::rbp, Xbyak::util::r12,
73 Xbyak::util::r13, Xbyak::util::r14, Xbyak::util::r15,
74 // XMMs
75 Xbyak::util::xmm6, Xbyak::util::xmm7, Xbyak::util::xmm8, Xbyak::util::xmm9, Xbyak::util::xmm10,
76 Xbyak::util::xmm11, Xbyak::util::xmm12, Xbyak::util::xmm13, Xbyak::util::xmm14,
77 Xbyak::util::xmm15,
78});
79
80constexpr size_t ABI_SHADOW_SPACE = 0x20;
81
82#else
83
84// System V x86-64 ABI
85const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
86const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
87const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
88const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
89const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
90
91const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
92 // GPRs
93 Xbyak::util::rcx, Xbyak::util::rdx, Xbyak::util::rdi, Xbyak::util::rsi, Xbyak::util::r8,
94 Xbyak::util::r9, Xbyak::util::r10, Xbyak::util::r11,
95 // XMMs
96 Xbyak::util::xmm0, Xbyak::util::xmm1, Xbyak::util::xmm2, Xbyak::util::xmm3, Xbyak::util::xmm4,
97 Xbyak::util::xmm5, Xbyak::util::xmm6, Xbyak::util::xmm7, Xbyak::util::xmm8, Xbyak::util::xmm9,
98 Xbyak::util::xmm10, Xbyak::util::xmm11, Xbyak::util::xmm12, Xbyak::util::xmm13,
99 Xbyak::util::xmm14, Xbyak::util::xmm15,
100});
101
102const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({
103 // GPRs
104 Xbyak::util::rbx, Xbyak::util::rbp, Xbyak::util::r12, Xbyak::util::r13, Xbyak::util::r14,
105 Xbyak::util::r15,
106});
107
108constexpr size_t ABI_SHADOW_SPACE = 0;
109
110#endif
111
112void ABI_CalculateFrameSize(BitSet32 regs, size_t rsp_alignment, size_t needed_frame_size,
113 s32* out_subtraction, s32* out_xmm_offset) {
114 int count = (regs & ABI_ALL_GPRS).Count();
115 rsp_alignment -= count * 8;
116 size_t subtraction = 0;
117 int xmm_count = (regs & ABI_ALL_XMMS).Count();
118 if (xmm_count) {
119 // If we have any XMMs to save, we must align the stack here.
120 subtraction = rsp_alignment & 0xF;
121 }
122 subtraction += 0x10 * xmm_count;
123 size_t xmm_base_subtraction = subtraction;
124 subtraction += needed_frame_size;
125 subtraction += ABI_SHADOW_SPACE;
126 // Final alignment.
127 rsp_alignment -= subtraction;
128 subtraction += rsp_alignment & 0xF;
129
130 *out_subtraction = (s32)subtraction;
131 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
132}
133
134size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs,
135 size_t rsp_alignment, size_t needed_frame_size = 0) {
136 s32 subtraction, xmm_offset;
137 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
138
139 for (int reg_index : (regs & ABI_ALL_GPRS)) {
140 code.push(IndexToReg64(reg_index));
141 }
142
143 if (subtraction != 0) {
144 code.sub(code.rsp, subtraction);
145 }
146
147 for (int reg_index : (regs & ABI_ALL_XMMS)) {
148 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(reg_index));
149 xmm_offset += 0x10;
150 }
151
152 return ABI_SHADOW_SPACE;
153}
154
155void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, size_t rsp_alignment,
156 size_t needed_frame_size = 0) {
157 s32 subtraction, xmm_offset;
158 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
159
160 for (int reg_index : (regs & ABI_ALL_XMMS)) {
161 code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + xmm_offset]);
162 xmm_offset += 0x10;
163 }
164
165 if (subtraction != 0) {
166 code.add(code.rsp, subtraction);
167 }
168
169 // GPRs need to be popped in reverse order
170 for (int reg_index = 15; reg_index >= 0; reg_index--) {
171 if (regs[reg_index]) {
172 code.pop(IndexToReg64(reg_index));
173 }
174 }
175}
176
177} // namespace X64
178} // namespace Common
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..0f52f704b
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,49 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <xbyak.h>
9#include "common/x64/xbyak_abi.h"
10
11namespace Common {
12namespace X64 {
13
14// Constants for use with cmpps/cmpss
15enum {
16 CMP_EQ = 0,
17 CMP_LT = 1,
18 CMP_LE = 2,
19 CMP_UNORD = 3,
20 CMP_NEQ = 4,
21 CMP_NLT = 5,
22 CMP_NLE = 6,
23 CMP_ORD = 7,
24};
25
26inline bool IsWithin2G(uintptr_t ref, uintptr_t target) {
27 u64 distance = target - (ref + 5);
28 return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
29}
30
31inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
32 return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
33}
34
35template <typename T>
36inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
37 static_assert(std::is_pointer<T>(), "Argument must be a (function) pointer.");
38 size_t addr = reinterpret_cast<size_t>(f);
39 if (IsWithin2G(code, addr)) {
40 code.call(f);
41 } else {
42 // ABI_RETURN is a safe temp register to use before a call
43 code.mov(ABI_RETURN, addr);
44 code.call(ABI_RETURN);
45 }
46}
47
48} // namespace X64
49} // namespace Common