summaryrefslogtreecommitdiff
path: root/src/common/x64
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/x64')
-rw-r--r--src/common/x64/abi.cpp127
-rw-r--r--src/common/x64/abi.h15
-rw-r--r--src/common/x64/cpu_detect.cpp92
-rw-r--r--src/common/x64/emitter.cpp2612
-rw-r--r--src/common/x64/emitter.h601
5 files changed, 2093 insertions, 1354 deletions
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp
index 955eb86ce..504b9c940 100644
--- a/src/common/x64/abi.cpp
+++ b/src/common/x64/abi.cpp
@@ -22,7 +22,8 @@ using namespace Gen;
22 22
23// Shared code between Win64 and Unix64 23// Shared code between Win64 and Unix64
24 24
25void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { 25void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
26 size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
26 size_t shadow = 0; 27 size_t shadow = 0;
27#if defined(_WIN32) 28#if defined(_WIN32)
28 shadow = 0x20; 29 shadow = 0x20;
@@ -49,17 +50,19 @@ void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_
49 *xmm_offsetp = subtraction - xmm_base_subtraction; 50 *xmm_offsetp = subtraction - xmm_base_subtraction;
50} 51}
51 52
52size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { 53size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
54 size_t needed_frame_size) {
53 size_t shadow, subtraction, xmm_offset; 55 size_t shadow, subtraction, xmm_offset;
54 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); 56 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
57 &xmm_offset);
55 58
56 for (int r : mask & ABI_ALL_GPRS) 59 for (int r : mask& ABI_ALL_GPRS)
57 PUSH((X64Reg)r); 60 PUSH((X64Reg)r);
58 61
59 if (subtraction) 62 if (subtraction)
60 SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); 63 SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
61 64
62 for (int x : mask & ABI_ALL_FPRS) { 65 for (int x : mask& ABI_ALL_FPRS) {
63 MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16)); 66 MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
64 xmm_offset += 16; 67 xmm_offset += 16;
65 } 68 }
@@ -67,12 +70,14 @@ size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_align
67 return shadow; 70 return shadow;
68} 71}
69 72
70void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { 73void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
74 size_t needed_frame_size) {
71 size_t shadow, subtraction, xmm_offset; 75 size_t shadow, subtraction, xmm_offset;
72 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); 76 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
77 &xmm_offset);
73 78
74 for (int x : mask & ABI_ALL_FPRS) { 79 for (int x : mask& ABI_ALL_FPRS) {
75 MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset)); 80 MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
76 xmm_offset += 16; 81 xmm_offset += 16;
77 } 82 }
78 83
@@ -86,10 +91,9 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignmen
86} 91}
87 92
88// Common functions 93// Common functions
89void XEmitter::ABI_CallFunction(const void *func) { 94void XEmitter::ABI_CallFunction(const void* func) {
90 u64 distance = u64(func) - (u64(code) + 5); 95 u64 distance = u64(func) - (u64(code) + 5);
91 if (distance >= 0x0000000080000000ULL 96 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
92 && distance < 0xFFFFFFFF80000000ULL) {
93 // Far call 97 // Far call
94 MOV(64, R(RAX), ImmPtr(func)); 98 MOV(64, R(RAX), ImmPtr(func));
95 CALLptr(R(RAX)); 99 CALLptr(R(RAX));
@@ -98,11 +102,10 @@ void XEmitter::ABI_CallFunction(const void *func) {
98 } 102 }
99} 103}
100 104
101void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { 105void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) {
102 MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); 106 MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
103 u64 distance = u64(func) - (u64(code) + 5); 107 u64 distance = u64(func) - (u64(code) + 5);
104 if (distance >= 0x0000000080000000ULL 108 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
105 && distance < 0xFFFFFFFF80000000ULL) {
106 // Far call 109 // Far call
107 MOV(64, R(RAX), ImmPtr(func)); 110 MOV(64, R(RAX), ImmPtr(func));
108 CALLptr(R(RAX)); 111 CALLptr(R(RAX));
@@ -111,25 +114,23 @@ void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
111 } 114 }
112} 115}
113 116
114void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { 117void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) {
115 MOV(32, R(ABI_PARAM1), Imm32(param1)); 118 MOV(32, R(ABI_PARAM1), Imm32(param1));
116 MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); 119 MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
117 u64 distance = u64(func) - (u64(code) + 5); 120 u64 distance = u64(func) - (u64(code) + 5);
118 if (distance >= 0x0000000080000000ULL 121 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
119 && distance < 0xFFFFFFFF80000000ULL) { 122 // Far call
120 // Far call 123 MOV(64, R(RAX), ImmPtr(func));
121 MOV(64, R(RAX), ImmPtr(func)); 124 CALLptr(R(RAX));
122 CALLptr(R(RAX));
123 } else { 125 } else {
124 CALL(func); 126 CALL(func);
125 } 127 }
126} 128}
127 129
128void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { 130void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) {
129 MOV(32, R(ABI_PARAM1), Imm32(param1)); 131 MOV(32, R(ABI_PARAM1), Imm32(param1));
130 u64 distance = u64(func) - (u64(code) + 5); 132 u64 distance = u64(func) - (u64(code) + 5);
131 if (distance >= 0x0000000080000000ULL 133 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
132 && distance < 0xFFFFFFFF80000000ULL) {
133 // Far call 134 // Far call
134 MOV(64, R(RAX), ImmPtr(func)); 135 MOV(64, R(RAX), ImmPtr(func));
135 CALLptr(R(RAX)); 136 CALLptr(R(RAX));
@@ -138,12 +139,11 @@ void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
138 } 139 }
139} 140}
140 141
141void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { 142void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) {
142 MOV(32, R(ABI_PARAM1), Imm32(param1)); 143 MOV(32, R(ABI_PARAM1), Imm32(param1));
143 MOV(32, R(ABI_PARAM2), Imm32(param2)); 144 MOV(32, R(ABI_PARAM2), Imm32(param2));
144 u64 distance = u64(func) - (u64(code) + 5); 145 u64 distance = u64(func) - (u64(code) + 5);
145 if (distance >= 0x0000000080000000ULL 146 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
146 && distance < 0xFFFFFFFF80000000ULL) {
147 // Far call 147 // Far call
148 MOV(64, R(RAX), ImmPtr(func)); 148 MOV(64, R(RAX), ImmPtr(func));
149 CALLptr(R(RAX)); 149 CALLptr(R(RAX));
@@ -152,13 +152,12 @@ void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
152 } 152 }
153} 153}
154 154
155void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { 155void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) {
156 MOV(32, R(ABI_PARAM1), Imm32(param1)); 156 MOV(32, R(ABI_PARAM1), Imm32(param1));
157 MOV(32, R(ABI_PARAM2), Imm32(param2)); 157 MOV(32, R(ABI_PARAM2), Imm32(param2));
158 MOV(32, R(ABI_PARAM3), Imm32(param3)); 158 MOV(32, R(ABI_PARAM3), Imm32(param3));
159 u64 distance = u64(func) - (u64(code) + 5); 159 u64 distance = u64(func) - (u64(code) + 5);
160 if (distance >= 0x0000000080000000ULL 160 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
161 && distance < 0xFFFFFFFF80000000ULL) {
162 // Far call 161 // Far call
163 MOV(64, R(RAX), ImmPtr(func)); 162 MOV(64, R(RAX), ImmPtr(func));
164 CALLptr(R(RAX)); 163 CALLptr(R(RAX));
@@ -167,13 +166,12 @@ void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32
167 } 166 }
168} 167}
169 168
170void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { 169void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) {
171 MOV(32, R(ABI_PARAM1), Imm32(param1)); 170 MOV(32, R(ABI_PARAM1), Imm32(param1));
172 MOV(32, R(ABI_PARAM2), Imm32(param2)); 171 MOV(32, R(ABI_PARAM2), Imm32(param2));
173 MOV(64, R(ABI_PARAM3), ImmPtr(param3)); 172 MOV(64, R(ABI_PARAM3), ImmPtr(param3));
174 u64 distance = u64(func) - (u64(code) + 5); 173 u64 distance = u64(func) - (u64(code) + 5);
175 if (distance >= 0x0000000080000000ULL 174 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
176 && distance < 0xFFFFFFFF80000000ULL) {
177 // Far call 175 // Far call
178 MOV(64, R(RAX), ImmPtr(func)); 176 MOV(64, R(RAX), ImmPtr(func));
179 CALLptr(R(RAX)); 177 CALLptr(R(RAX));
@@ -182,14 +180,14 @@ void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, voi
182 } 180 }
183} 181}
184 182
185void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) { 183void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3,
184 void* param4) {
186 MOV(32, R(ABI_PARAM1), Imm32(param1)); 185 MOV(32, R(ABI_PARAM1), Imm32(param1));
187 MOV(32, R(ABI_PARAM2), Imm32(param2)); 186 MOV(32, R(ABI_PARAM2), Imm32(param2));
188 MOV(32, R(ABI_PARAM3), Imm32(param3)); 187 MOV(32, R(ABI_PARAM3), Imm32(param3));
189 MOV(64, R(ABI_PARAM4), ImmPtr(param4)); 188 MOV(64, R(ABI_PARAM4), ImmPtr(param4));
190 u64 distance = u64(func) - (u64(code) + 5); 189 u64 distance = u64(func) - (u64(code) + 5);
191 if (distance >= 0x0000000080000000ULL 190 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
192 && distance < 0xFFFFFFFF80000000ULL) {
193 // Far call 191 // Far call
194 MOV(64, R(RAX), ImmPtr(func)); 192 MOV(64, R(RAX), ImmPtr(func));
195 CALLptr(R(RAX)); 193 CALLptr(R(RAX));
@@ -198,11 +196,10 @@ void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u3
198 } 196 }
199} 197}
200 198
201void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { 199void XEmitter::ABI_CallFunctionP(const void* func, void* param1) {
202 MOV(64, R(ABI_PARAM1), ImmPtr(param1)); 200 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
203 u64 distance = u64(func) - (u64(code) + 5); 201 u64 distance = u64(func) - (u64(code) + 5);
204 if (distance >= 0x0000000080000000ULL 202 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
205 && distance < 0xFFFFFFFF80000000ULL) {
206 // Far call 203 // Far call
207 MOV(64, R(RAX), ImmPtr(func)); 204 MOV(64, R(RAX), ImmPtr(func));
208 CALLptr(R(RAX)); 205 CALLptr(R(RAX));
@@ -211,13 +208,12 @@ void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
211 } 208 }
212} 209}
213 210
214void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { 211void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) {
215 MOV(64, R(ABI_PARAM1), ImmPtr(param1)); 212 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
216 if (!arg2.IsSimpleReg(ABI_PARAM2)) 213 if (!arg2.IsSimpleReg(ABI_PARAM2))
217 MOV(32, R(ABI_PARAM2), arg2); 214 MOV(32, R(ABI_PARAM2), arg2);
218 u64 distance = u64(func) - (u64(code) + 5); 215 u64 distance = u64(func) - (u64(code) + 5);
219 if (distance >= 0x0000000080000000ULL 216 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
220 && distance < 0xFFFFFFFF80000000ULL) {
221 // Far call 217 // Far call
222 MOV(64, R(RAX), ImmPtr(func)); 218 MOV(64, R(RAX), ImmPtr(func));
223 CALLptr(R(RAX)); 219 CALLptr(R(RAX));
@@ -226,15 +222,15 @@ void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpA
226 } 222 }
227} 223}
228 224
229void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { 225void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2,
226 const Gen::OpArg& arg3) {
230 MOV(64, R(ABI_PARAM1), ImmPtr(param1)); 227 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
231 if (!arg2.IsSimpleReg(ABI_PARAM2)) 228 if (!arg2.IsSimpleReg(ABI_PARAM2))
232 MOV(32, R(ABI_PARAM2), arg2); 229 MOV(32, R(ABI_PARAM2), arg2);
233 if (!arg3.IsSimpleReg(ABI_PARAM3)) 230 if (!arg3.IsSimpleReg(ABI_PARAM3))
234 MOV(32, R(ABI_PARAM3), arg3); 231 MOV(32, R(ABI_PARAM3), arg3);
235 u64 distance = u64(func) - (u64(code) + 5); 232 u64 distance = u64(func) - (u64(code) + 5);
236 if (distance >= 0x0000000080000000ULL 233 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
237 && distance < 0xFFFFFFFF80000000ULL) {
238 // Far call 234 // Far call
239 MOV(64, R(RAX), ImmPtr(func)); 235 MOV(64, R(RAX), ImmPtr(func));
240 CALLptr(R(RAX)); 236 CALLptr(R(RAX));
@@ -243,13 +239,12 @@ void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::Op
243 } 239 }
244} 240}
245 241
246void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { 242void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) {
247 MOV(64, R(ABI_PARAM1), ImmPtr(param1)); 243 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
248 MOV(64, R(ABI_PARAM2), ImmPtr(param2)); 244 MOV(64, R(ABI_PARAM2), ImmPtr(param2));
249 MOV(32, R(ABI_PARAM3), Imm32(param3)); 245 MOV(32, R(ABI_PARAM3), Imm32(param3));
250 u64 distance = u64(func) - (u64(code) + 5); 246 u64 distance = u64(func) - (u64(code) + 5);
251 if (distance >= 0x0000000080000000ULL 247 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
252 && distance < 0xFFFFFFFF80000000ULL) {
253 // Far call 248 // Far call
254 MOV(64, R(RAX), ImmPtr(func)); 249 MOV(64, R(RAX), ImmPtr(func));
255 CALLptr(R(RAX)); 250 CALLptr(R(RAX));
@@ -259,12 +254,11 @@ void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2,
259} 254}
260 255
261// Pass a register as a parameter. 256// Pass a register as a parameter.
262void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { 257void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) {
263 if (reg1 != ABI_PARAM1) 258 if (reg1 != ABI_PARAM1)
264 MOV(32, R(ABI_PARAM1), R(reg1)); 259 MOV(32, R(ABI_PARAM1), R(reg1));
265 u64 distance = u64(func) - (u64(code) + 5); 260 u64 distance = u64(func) - (u64(code) + 5);
266 if (distance >= 0x0000000080000000ULL 261 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
267 && distance < 0xFFFFFFFF80000000ULL) {
268 // Far call 262 // Far call
269 MOV(64, R(RAX), ImmPtr(func)); 263 MOV(64, R(RAX), ImmPtr(func));
270 CALLptr(R(RAX)); 264 CALLptr(R(RAX));
@@ -274,7 +268,7 @@ void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
274} 268}
275 269
276// Pass two registers as parameters. 270// Pass two registers as parameters.
277void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { 271void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) {
278 if (reg2 != ABI_PARAM1) { 272 if (reg2 != ABI_PARAM1) {
279 if (reg1 != ABI_PARAM1) 273 if (reg1 != ABI_PARAM1)
280 MOV(64, R(ABI_PARAM1), R(reg1)); 274 MOV(64, R(ABI_PARAM1), R(reg1));
@@ -287,8 +281,7 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
287 MOV(64, R(ABI_PARAM1), R(reg1)); 281 MOV(64, R(ABI_PARAM1), R(reg1));
288 } 282 }
289 u64 distance = u64(func) - (u64(code) + 5); 283 u64 distance = u64(func) - (u64(code) + 5);
290 if (distance >= 0x0000000080000000ULL 284 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
291 && distance < 0xFFFFFFFF80000000ULL) {
292 // Far call 285 // Far call
293 MOV(64, R(RAX), ImmPtr(func)); 286 MOV(64, R(RAX), ImmPtr(func));
294 CALLptr(R(RAX)); 287 CALLptr(R(RAX));
@@ -297,14 +290,12 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
297 } 290 }
298} 291}
299 292
300void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) 293void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) {
301{
302 if (!arg1.IsSimpleReg(ABI_PARAM1)) 294 if (!arg1.IsSimpleReg(ABI_PARAM1))
303 MOV(32, R(ABI_PARAM1), arg1); 295 MOV(32, R(ABI_PARAM1), arg1);
304 MOV(32, R(ABI_PARAM2), Imm32(param2)); 296 MOV(32, R(ABI_PARAM2), Imm32(param2));
305 u64 distance = u64(func) - (u64(code) + 5); 297 u64 distance = u64(func) - (u64(code) + 5);
306 if (distance >= 0x0000000080000000ULL 298 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
307 && distance < 0xFFFFFFFF80000000ULL) {
308 // Far call 299 // Far call
309 MOV(64, R(RAX), ImmPtr(func)); 300 MOV(64, R(RAX), ImmPtr(func));
310 CALLptr(R(RAX)); 301 CALLptr(R(RAX));
@@ -313,15 +304,14 @@ void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32
313 } 304 }
314} 305}
315 306
316void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) 307void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2,
317{ 308 u32 param3) {
318 if (!arg1.IsSimpleReg(ABI_PARAM1)) 309 if (!arg1.IsSimpleReg(ABI_PARAM1))
319 MOV(32, R(ABI_PARAM1), arg1); 310 MOV(32, R(ABI_PARAM1), arg1);
320 MOV(32, R(ABI_PARAM2), Imm32(param2)); 311 MOV(32, R(ABI_PARAM2), Imm32(param2));
321 MOV(64, R(ABI_PARAM3), Imm64(param3)); 312 MOV(64, R(ABI_PARAM3), Imm64(param3));
322 u64 distance = u64(func) - (u64(code) + 5); 313 u64 distance = u64(func) - (u64(code) + 5);
323 if (distance >= 0x0000000080000000ULL 314 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
324 && distance < 0xFFFFFFFF80000000ULL) {
325 // Far call 315 // Far call
326 MOV(64, R(RAX), ImmPtr(func)); 316 MOV(64, R(RAX), ImmPtr(func));
327 CALLptr(R(RAX)); 317 CALLptr(R(RAX));
@@ -330,13 +320,11 @@ void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32
330 } 320 }
331} 321}
332 322
333void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) 323void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) {
334{
335 if (!arg1.IsSimpleReg(ABI_PARAM1)) 324 if (!arg1.IsSimpleReg(ABI_PARAM1))
336 MOV(32, R(ABI_PARAM1), arg1); 325 MOV(32, R(ABI_PARAM1), arg1);
337 u64 distance = u64(func) - (u64(code) + 5); 326 u64 distance = u64(func) - (u64(code) + 5);
338 if (distance >= 0x0000000080000000ULL 327 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
339 && distance < 0xFFFFFFFF80000000ULL) {
340 // Far call 328 // Far call
341 MOV(64, R(RAX), ImmPtr(func)); 329 MOV(64, R(RAX), ImmPtr(func));
342 CALLptr(R(RAX)); 330 CALLptr(R(RAX));
@@ -345,15 +333,14 @@ void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
345 } 333 }
346} 334}
347 335
348void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) 336void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1,
349{ 337 const Gen::OpArg& arg2) {
350 if (!arg1.IsSimpleReg(ABI_PARAM1)) 338 if (!arg1.IsSimpleReg(ABI_PARAM1))
351 MOV(32, R(ABI_PARAM1), arg1); 339 MOV(32, R(ABI_PARAM1), arg1);
352 if (!arg2.IsSimpleReg(ABI_PARAM2)) 340 if (!arg2.IsSimpleReg(ABI_PARAM2))
353 MOV(32, R(ABI_PARAM2), arg2); 341 MOV(32, R(ABI_PARAM2), arg2);
354 u64 distance = u64(func) - (u64(code) + 5); 342 u64 distance = u64(func) - (u64(code) + 5);
355 if (distance >= 0x0000000080000000ULL 343 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
356 && distance < 0xFFFFFFFF80000000ULL) {
357 // Far call 344 // Far call
358 MOV(64, R(RAX), ImmPtr(func)); 345 MOV(64, R(RAX), ImmPtr(func));
359 CALLptr(R(RAX)); 346 CALLptr(R(RAX));
diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h
index de6d62fdd..eaaf81d89 100644
--- a/src/common/x64/abi.h
+++ b/src/common/x64/abi.h
@@ -12,7 +12,8 @@
12 12
13// Windows 64-bit 13// Windows 64-bit
14// * 4-reg "fastcall" variant, very new-skool stack handling 14// * 4-reg "fastcall" variant, very new-skool stack handling
15// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ 15// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
16// calls_
16// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space. 17// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
17// Scratch: RAX RCX RDX R8 R9 R10 R11 18// Scratch: RAX RCX RDX R8 R9 R10 R11
18// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15 19// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
@@ -35,10 +36,10 @@
35#define ABI_PARAM4 R9 36#define ABI_PARAM4 R9
36 37
37// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers. 38// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
38#define ABI_ALL_CALLER_SAVED \ 39#define ABI_ALL_CALLER_SAVED \
39 (BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \ 40 (BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
40 XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 }) 41 XMM4 + 16, XMM5 + 16})
41#else //64-bit Unix / OS X 42#else // 64-bit Unix / OS X
42 43
43#define ABI_PARAM1 RDI 44#define ABI_PARAM1 RDI
44#define ABI_PARAM2 RSI 45#define ABI_PARAM2 RSI
@@ -49,9 +50,7 @@
49 50
50// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably 51// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
51// don't actually clobber them. 52// don't actually clobber them.
52#define ABI_ALL_CALLER_SAVED \ 53#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
53 (BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
54 ABI_ALL_FPRS)
55#endif // WIN32 54#endif // WIN32
56 55
57#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED) 56#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index d9c430c67..19f1a4030 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -15,8 +15,8 @@ namespace Common {
15#ifndef _MSC_VER 15#ifndef _MSC_VER
16 16
17#ifdef __FreeBSD__ 17#ifdef __FreeBSD__
18#include <sys/types.h>
19#include <machine/cpufunc.h> 18#include <machine/cpufunc.h>
19#include <sys/types.h>
20#endif 20#endif
21 21
22static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { 22static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
@@ -26,15 +26,9 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
26#else 26#else
27 info[0] = function_id; // eax 27 info[0] = function_id; // eax
28 info[2] = subfunction_id; // ecx 28 info[2] = subfunction_id; // ecx
29 __asm__( 29 __asm__("cpuid"
30 "cpuid" 30 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
31 : "=a" (info[0]), 31 : "a"(function_id), "c"(subfunction_id));
32 "=b" (info[1]),
33 "=c" (info[2]),
34 "=d" (info[3])
35 : "a" (function_id),
36 "c" (subfunction_id)
37 );
38#endif 32#endif
39} 33}
40 34
@@ -88,14 +82,22 @@ static CPUCaps Detect() {
88 if (max_std_fn >= 1) { 82 if (max_std_fn >= 1) {
89 __cpuid(cpu_id, 0x00000001); 83 __cpuid(cpu_id, 0x00000001);
90 84
91 if ((cpu_id[3] >> 25) & 1) caps.sse = true; 85 if ((cpu_id[3] >> 25) & 1)
92 if ((cpu_id[3] >> 26) & 1) caps.sse2 = true; 86 caps.sse = true;
93 if ((cpu_id[2]) & 1) caps.sse3 = true; 87 if ((cpu_id[3] >> 26) & 1)
94 if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true; 88 caps.sse2 = true;
95 if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true; 89 if ((cpu_id[2]) & 1)
96 if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true; 90 caps.sse3 = true;
97 if ((cpu_id[2] >> 22) & 1) caps.movbe = true; 91 if ((cpu_id[2] >> 9) & 1)
98 if ((cpu_id[2] >> 25) & 1) caps.aes = true; 92 caps.ssse3 = true;
93 if ((cpu_id[2] >> 19) & 1)
94 caps.sse4_1 = true;
95 if ((cpu_id[2] >> 20) & 1)
96 caps.sse4_2 = true;
97 if ((cpu_id[2] >> 22) & 1)
98 caps.movbe = true;
99 if ((cpu_id[2] >> 25) & 1)
100 caps.aes = true;
99 101
100 if ((cpu_id[3] >> 24) & 1) { 102 if ((cpu_id[3] >> 24) & 1) {
101 caps.fxsave_fxrstor = true; 103 caps.fxsave_fxrstor = true;
@@ -140,10 +142,14 @@ static CPUCaps Detect() {
140 if (max_ex_fn >= 0x80000001) { 142 if (max_ex_fn >= 0x80000001) {
141 // Check for more features 143 // Check for more features
142 __cpuid(cpu_id, 0x80000001); 144 __cpuid(cpu_id, 0x80000001);
143 if (cpu_id[2] & 1) caps.lahf_sahf_64 = true; 145 if (cpu_id[2] & 1)
144 if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true; 146 caps.lahf_sahf_64 = true;
145 if ((cpu_id[2] >> 16) & 1) caps.fma4 = true; 147 if ((cpu_id[2] >> 5) & 1)
146 if ((cpu_id[3] >> 29) & 1) caps.long_mode = true; 148 caps.lzcnt = true;
149 if ((cpu_id[2] >> 16) & 1)
150 caps.fma4 = true;
151 if ((cpu_id[3] >> 29) & 1)
152 caps.long_mode = true;
147 } 153 }
148 154
149 return caps; 155 return caps;
@@ -162,24 +168,38 @@ std::string GetCPUCapsString() {
162 sum += caps.brand_string; 168 sum += caps.brand_string;
163 sum += ")"; 169 sum += ")";
164 170
165 if (caps.sse) sum += ", SSE"; 171 if (caps.sse)
172 sum += ", SSE";
166 if (caps.sse2) { 173 if (caps.sse2) {
167 sum += ", SSE2"; 174 sum += ", SSE2";
168 if (!caps.flush_to_zero) sum += " (without DAZ)"; 175 if (!caps.flush_to_zero)
176 sum += " (without DAZ)";
169 } 177 }
170 178
171 if (caps.sse3) sum += ", SSE3"; 179 if (caps.sse3)
172 if (caps.ssse3) sum += ", SSSE3"; 180 sum += ", SSE3";
173 if (caps.sse4_1) sum += ", SSE4.1"; 181 if (caps.ssse3)
174 if (caps.sse4_2) sum += ", SSE4.2"; 182 sum += ", SSSE3";
175 if (caps.avx) sum += ", AVX"; 183 if (caps.sse4_1)
176 if (caps.avx2) sum += ", AVX2"; 184 sum += ", SSE4.1";
177 if (caps.bmi1) sum += ", BMI1"; 185 if (caps.sse4_2)
178 if (caps.bmi2) sum += ", BMI2"; 186 sum += ", SSE4.2";
179 if (caps.fma) sum += ", FMA"; 187 if (caps.avx)
180 if (caps.aes) sum += ", AES"; 188 sum += ", AVX";
181 if (caps.movbe) sum += ", MOVBE"; 189 if (caps.avx2)
182 if (caps.long_mode) sum += ", 64-bit support"; 190 sum += ", AVX2";
191 if (caps.bmi1)
192 sum += ", BMI1";
193 if (caps.bmi2)
194 sum += ", BMI2";
195 if (caps.fma)
196 sum += ", FMA";
197 if (caps.aes)
198 sum += ", AES";
199 if (caps.movbe)
200 sum += ", MOVBE";
201 if (caps.long_mode)
202 sum += ", 64-bit support";
183 203
184 return sum; 204 return sum;
185} 205}
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 5662f7f86..1a9fd6a6b 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -26,179 +26,162 @@
26#include "cpu_detect.h" 26#include "cpu_detect.h"
27#include "emitter.h" 27#include "emitter.h"
28 28
29namespace Gen 29namespace Gen {
30{
31 30
32struct NormalOpDef 31struct NormalOpDef {
33{
34 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; 32 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
35}; 33};
36 34
37// 0xCC is code for invalid combination of immediates 35// 0xCC is code for invalid combination of immediates
38static const NormalOpDef normalops[11] = 36static const NormalOpDef normalops[11] = {
39{ 37 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD
40 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD 38 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC
41 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC
42 39
43 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB 40 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB
44 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB 41 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB
45 42
46 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND 43 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND
47 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR 44 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR
48 45
49 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR 46 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR
50 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV 47 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV
51 48
52 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from) 49 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from)
53 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP 50 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP
54 51
55 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG 52 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG
56}; 53};
57 54
58enum NormalSSEOps 55enum NormalSSEOps {
59{ 56 sseCMP = 0xC2,
60 sseCMP = 0xC2, 57 sseADD = 0x58, // ADD
61 sseADD = 0x58, //ADD 58 sseSUB = 0x5C, // SUB
62 sseSUB = 0x5C, //SUB 59 sseAND = 0x54, // AND
63 sseAND = 0x54, //AND 60 sseANDN = 0x55, // ANDN
64 sseANDN = 0x55, //ANDN 61 sseOR = 0x56,
65 sseOR = 0x56, 62 sseXOR = 0x57,
66 sseXOR = 0x57, 63 sseMUL = 0x59, // MUL
67 sseMUL = 0x59, //MUL 64 sseDIV = 0x5E, // DIV
68 sseDIV = 0x5E, //DIV 65 sseMIN = 0x5D, // MIN
69 sseMIN = 0x5D, //MIN 66 sseMAX = 0x5F, // MAX
70 sseMAX = 0x5F, //MAX 67 sseCOMIS = 0x2F, // COMIS
71 sseCOMIS = 0x2F, //COMIS 68 sseUCOMIS = 0x2E, // UCOMIS
72 sseUCOMIS = 0x2E, //UCOMIS 69 sseSQRT = 0x51, // SQRT
73 sseSQRT = 0x51, //SQRT 70 sseRSQRT = 0x52, // RSQRT (NO DOUBLE PRECISION!!!)
74 sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) 71 sseRCP = 0x53, // RCP
75 sseRCP = 0x53, //RCP 72 sseMOVAPfromRM = 0x28, // MOVAP from RM
76 sseMOVAPfromRM = 0x28, //MOVAP from RM 73 sseMOVAPtoRM = 0x29, // MOVAP to RM
77 sseMOVAPtoRM = 0x29, //MOVAP to RM 74 sseMOVUPfromRM = 0x10, // MOVUP from RM
78 sseMOVUPfromRM = 0x10, //MOVUP from RM 75 sseMOVUPtoRM = 0x11, // MOVUP to RM
79 sseMOVUPtoRM = 0x11, //MOVUP to RM 76 sseMOVLPfromRM = 0x12,
80 sseMOVLPfromRM= 0x12, 77 sseMOVLPtoRM = 0x13,
81 sseMOVLPtoRM = 0x13, 78 sseMOVHPfromRM = 0x16,
82 sseMOVHPfromRM= 0x16, 79 sseMOVHPtoRM = 0x17,
83 sseMOVHPtoRM = 0x17, 80 sseMOVHLPS = 0x12,
84 sseMOVHLPS = 0x12, 81 sseMOVLHPS = 0x16,
85 sseMOVLHPS = 0x16,
86 sseMOVDQfromRM = 0x6F, 82 sseMOVDQfromRM = 0x6F,
87 sseMOVDQtoRM = 0x7F, 83 sseMOVDQtoRM = 0x7F,
88 sseMASKMOVDQU = 0xF7, 84 sseMASKMOVDQU = 0xF7,
89 sseLDDQU = 0xF0, 85 sseLDDQU = 0xF0,
90 sseSHUF = 0xC6, 86 sseSHUF = 0xC6,
91 sseMOVNTDQ = 0xE7, 87 sseMOVNTDQ = 0xE7,
92 sseMOVNTP = 0x2B, 88 sseMOVNTP = 0x2B,
93 sseHADD = 0x7C, 89 sseHADD = 0x7C,
94}; 90};
95 91
96 92void XEmitter::SetCodePtr(u8* ptr) {
97void XEmitter::SetCodePtr(u8 *ptr)
98{
99 code = ptr; 93 code = ptr;
100} 94}
101 95
102const u8 *XEmitter::GetCodePtr() const 96const u8* XEmitter::GetCodePtr() const {
103{
104 return code; 97 return code;
105} 98}
106 99
107u8 *XEmitter::GetWritableCodePtr() 100u8* XEmitter::GetWritableCodePtr() {
108{
109 return code; 101 return code;
110} 102}
111 103
112void XEmitter::Write8(u8 value) 104void XEmitter::Write8(u8 value) {
113{
114 *code++ = value; 105 *code++ = value;
115} 106}
116 107
117void XEmitter::Write16(u16 value) 108void XEmitter::Write16(u16 value) {
118{
119 std::memcpy(code, &value, sizeof(u16)); 109 std::memcpy(code, &value, sizeof(u16));
120 code += sizeof(u16); 110 code += sizeof(u16);
121} 111}
122 112
123void XEmitter::Write32(u32 value) 113void XEmitter::Write32(u32 value) {
124{
125 std::memcpy(code, &value, sizeof(u32)); 114 std::memcpy(code, &value, sizeof(u32));
126 code += sizeof(u32); 115 code += sizeof(u32);
127} 116}
128 117
129void XEmitter::Write64(u64 value) 118void XEmitter::Write64(u64 value) {
130{
131 std::memcpy(code, &value, sizeof(u64)); 119 std::memcpy(code, &value, sizeof(u64));
132 code += sizeof(u64); 120 code += sizeof(u64);
133} 121}
134 122
135void XEmitter::ReserveCodeSpace(int bytes) 123void XEmitter::ReserveCodeSpace(int bytes) {
136{
137 for (int i = 0; i < bytes; i++) 124 for (int i = 0; i < bytes; i++)
138 *code++ = 0xCC; 125 *code++ = 0xCC;
139} 126}
140 127
141const u8 *XEmitter::AlignCode4() 128const u8* XEmitter::AlignCode4() {
142{
143 int c = int((u64)code & 3); 129 int c = int((u64)code & 3);
144 if (c) 130 if (c)
145 ReserveCodeSpace(4-c); 131 ReserveCodeSpace(4 - c);
146 return code; 132 return code;
147} 133}
148 134
149const u8 *XEmitter::AlignCode16() 135const u8* XEmitter::AlignCode16() {
150{
151 int c = int((u64)code & 15); 136 int c = int((u64)code & 15);
152 if (c) 137 if (c)
153 ReserveCodeSpace(16-c); 138 ReserveCodeSpace(16 - c);
154 return code; 139 return code;
155} 140}
156 141
157const u8 *XEmitter::AlignCodePage() 142const u8* XEmitter::AlignCodePage() {
158{
159 int c = int((u64)code & 4095); 143 int c = int((u64)code & 4095);
160 if (c) 144 if (c)
161 ReserveCodeSpace(4096-c); 145 ReserveCodeSpace(4096 - c);
162 return code; 146 return code;
163} 147}
164 148
165// This operation modifies flags; check to see the flags are locked. 149// This operation modifies flags; check to see the flags are locked.
166// If the flags are locked, we should immediately and loudly fail before 150// If the flags are locked, we should immediately and loudly fail before
167// causing a subtle JIT bug. 151// causing a subtle JIT bug.
168void XEmitter::CheckFlags() 152void XEmitter::CheckFlags() {
169{
170 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!"); 153 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
171} 154}
172 155
173void XEmitter::WriteModRM(int mod, int reg, int rm) 156void XEmitter::WriteModRM(int mod, int reg, int rm) {
174{
175 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); 157 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
176} 158}
177 159
178void XEmitter::WriteSIB(int scale, int index, int base) 160void XEmitter::WriteSIB(int scale, int index, int base) {
179{
180 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); 161 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
181} 162}
182 163
183void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const 164void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const {
184{ 165 if (customOp == -1)
185 if (customOp == -1) customOp = operandReg; 166 customOp = operandReg;
186#ifdef ARCHITECTURE_x86_64 167#ifdef ARCHITECTURE_x86_64
187 u8 op = 0x40; 168 u8 op = 0x40;
188 // REX.W (whether operation is a 64-bit operation) 169 // REX.W (whether operation is a 64-bit operation)
189 if (opBits == 64) op |= 8; 170 if (opBits == 64)
171 op |= 8;
190 // REX.R (whether ModR/M reg field refers to R8-R15. 172 // REX.R (whether ModR/M reg field refers to R8-R15.
191 if (customOp & 8) op |= 4; 173 if (customOp & 8)
174 op |= 4;
192 // REX.X (whether ModR/M SIB index field refers to R8-R15) 175 // REX.X (whether ModR/M SIB index field refers to R8-R15)
193 if (indexReg & 8) op |= 2; 176 if (indexReg & 8)
177 op |= 2;
194 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) 178 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
195 if (offsetOrBaseReg & 8) op |= 1; 179 if (offsetOrBaseReg & 8)
180 op |= 1;
196 // Write REX if wr have REX bits to write, or if the operation accesses 181 // Write REX if wr have REX bits to write, or if the operation accesses
197 // SIL, DIL, BPL, or SPL. 182 // SIL, DIL, BPL, or SPL.
198 if (op != 0x40 || 183 if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
199 (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || 184 (opBits == 8 && (customOp & 0x10c) == 4)) {
200 (opBits == 8 && (customOp & 0x10c) == 4))
201 {
202 emit->Write8(op); 185 emit->Write8(op);
203 // Check the operation doesn't access AH, BH, CH, or DH. 186 // Check the operation doesn't access AH, BH, CH, or DH.
204 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0); 187 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
@@ -214,8 +197,8 @@ void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const
214#endif 197#endif
215} 198}
216 199
217void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const 200void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
218{ 201 int W) const {
219 int R = !(regOp1 & 8); 202 int R = !(regOp1 & 8);
220 int X = !(indexReg & 8); 203 int X = !(indexReg & 8);
221 int B = !(offsetOrBaseReg & 8); 204 int B = !(offsetOrBaseReg & 8);
@@ -223,14 +206,11 @@ void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp
223 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); 206 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
224 207
225 // do we need any VEX fields that only appear in the three-byte form? 208 // do we need any VEX fields that only appear in the three-byte form?
226 if (X == 1 && B == 1 && W == 0 && mmmmm == 1) 209 if (X == 1 && B == 1 && W == 0 && mmmmm == 1) {
227 {
228 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp; 210 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp;
229 emit->Write8(0xC5); 211 emit->Write8(0xC5);
230 emit->Write8(RvvvvLpp); 212 emit->Write8(RvvvvLpp);
231 } 213 } else {
232 else
233 {
234 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; 214 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
235 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp; 215 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp;
236 emit->Write8(0xC4); 216 emit->Write8(0xC4);
@@ -239,31 +219,27 @@ void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp
239 } 219 }
240} 220}
241 221
242void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, 222void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg,
243 bool warn_64bit_offset) const 223 bool warn_64bit_offset) const {
244{
245 if (_operandReg == INVALID_REG) 224 if (_operandReg == INVALID_REG)
246 _operandReg = (X64Reg)this->operandReg; 225 _operandReg = (X64Reg) this->operandReg;
247 int mod = 0; 226 int mod = 0;
248 int ireg = indexReg; 227 int ireg = indexReg;
249 bool SIB = false; 228 bool SIB = false;
250 int _offsetOrBaseReg = this->offsetOrBaseReg; 229 int _offsetOrBaseReg = this->offsetOrBaseReg;
251 230
252 if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address 231 if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address
253 { 232 {
254 // Oh, RIP addressing. 233 // Oh, RIP addressing.
255 _offsetOrBaseReg = 5; 234 _offsetOrBaseReg = 5;
256 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); 235 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
257 //TODO : add some checks 236// TODO : add some checks
258#ifdef ARCHITECTURE_x86_64 237#ifdef ARCHITECTURE_x86_64
259 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; 238 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
260 s64 distance = (s64)offset - (s64)ripAddr; 239 s64 distance = (s64)offset - (s64)ripAddr;
261 ASSERT_MSG( 240 ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset,
262 (distance < 0x80000000LL && 241 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr,
263 distance >= -0x80000000LL) || 242 offset);
264 !warn_64bit_offset,
265 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")",
266 ripAddr, offset);
267 s32 offs = (s32)distance; 243 s32 offs = (s32)distance;
268 emit->Write32((u32)offs); 244 emit->Write32((u32)offs);
269#else 245#else
@@ -272,66 +248,49 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
272 return; 248 return;
273 } 249 }
274 250
275 if (scale == 0) 251 if (scale == 0) {
276 {
277 // Oh, no memory, Just a reg. 252 // Oh, no memory, Just a reg.
278 mod = 3; //11 253 mod = 3; // 11
279 } 254 } else if (scale >= 1) {
280 else if (scale >= 1) 255 // Ah good, no scaling.
281 { 256 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) {
282 //Ah good, no scaling. 257 // Okay, we're good. No SIB necessary.
283 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5))
284 {
285 //Okay, we're good. No SIB necessary.
286 int ioff = (int)offset; 258 int ioff = (int)offset;
287 if (ioff == 0) 259 if (ioff == 0) {
288 {
289 mod = 0; 260 mod = 0;
261 } else if (ioff < -128 || ioff > 127) {
262 mod = 2; // 32-bit displacement
263 } else {
264 mod = 1; // 8-bit displacement
290 } 265 }
291 else if (ioff<-128 || ioff>127) 266 } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) {
292 {
293 mod = 2; //32-bit displacement
294 }
295 else
296 {
297 mod = 1; //8-bit displacement
298 }
299 }
300 else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)
301 {
302 SIB = true; 267 SIB = true;
303 mod = 0; 268 mod = 0;
304 _offsetOrBaseReg = 5; 269 _offsetOrBaseReg = 5;
305 } 270 } else // if (scale != SCALE_ATREG)
306 else //if (scale != SCALE_ATREG)
307 { 271 {
308 if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :( 272 if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :(
309 { 273 {
310 //So we have to fake it with SIB encoding :( 274 // So we have to fake it with SIB encoding :(
311 SIB = true; 275 SIB = true;
312 } 276 }
313 277
314 if (scale >= SCALE_1 && scale < SCALE_ATREG) 278 if (scale >= SCALE_1 && scale < SCALE_ATREG) {
315 {
316 SIB = true; 279 SIB = true;
317 } 280 }
318 281
319 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) 282 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) {
320 {
321 SIB = true; 283 SIB = true;
322 ireg = _offsetOrBaseReg; 284 ireg = _offsetOrBaseReg;
323 } 285 }
324 286
325 //Okay, we're fine. Just disp encoding. 287 // Okay, we're fine. Just disp encoding.
326 //We need displacement. Which size? 288 // We need displacement. Which size?
327 int ioff = (int)(s64)offset; 289 int ioff = (int)(s64)offset;
328 if (ioff < -128 || ioff > 127) 290 if (ioff < -128 || ioff > 127) {
329 { 291 mod = 2; // 32-bit displacement
330 mod = 2; //32-bit displacement 292 } else {
331 } 293 mod = 1; // 8-bit displacement
332 else
333 {
334 mod = 1; //8-bit displacement
335 } 294 }
336 } 295 }
337 } 296 }
@@ -343,36 +302,55 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
343 oreg = 4; 302 oreg = 4;
344 303
345 // TODO(ector): WTF is this if about? I don't remember writing it :-) 304 // TODO(ector): WTF is this if about? I don't remember writing it :-)
346 //if (RIP) 305 // if (RIP)
347 // oreg = 5; 306 // oreg = 5;
348 307
349 emit->WriteModRM(mod, _operandReg&7, oreg&7); 308 emit->WriteModRM(mod, _operandReg & 7, oreg & 7);
350 309
351 if (SIB) 310 if (SIB) {
352 { 311 // SIB byte
353 //SIB byte
354 int ss; 312 int ss;
355 switch (scale) 313 switch (scale) {
356 { 314 case SCALE_NONE:
357 case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP 315 _offsetOrBaseReg = 4;
358 case SCALE_1: ss = 0; break; 316 ss = 0;
359 case SCALE_2: ss = 1; break; 317 break; // RSP
360 case SCALE_4: ss = 2; break; 318 case SCALE_1:
361 case SCALE_8: ss = 3; break; 319 ss = 0;
362 case SCALE_NOBASE_2: ss = 1; break; 320 break;
363 case SCALE_NOBASE_4: ss = 2; break; 321 case SCALE_2:
364 case SCALE_NOBASE_8: ss = 3; break; 322 ss = 1;
365 case SCALE_ATREG: ss = 0; break; 323 break;
366 default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break; 324 case SCALE_4:
325 ss = 2;
326 break;
327 case SCALE_8:
328 ss = 3;
329 break;
330 case SCALE_NOBASE_2:
331 ss = 1;
332 break;
333 case SCALE_NOBASE_4:
334 ss = 2;
335 break;
336 case SCALE_NOBASE_8:
337 ss = 3;
338 break;
339 case SCALE_ATREG:
340 ss = 0;
341 break;
342 default:
343 ASSERT_MSG(0, "Invalid scale for SIB byte");
344 ss = 0;
345 break;
367 } 346 }
368 emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); 347 emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7)));
369 } 348 }
370 349
371 if (mod == 1) //8-bit disp 350 if (mod == 1) // 8-bit disp
372 { 351 {
373 emit->Write8((u8)(s8)(s32)offset); 352 emit->Write8((u8)(s8)(s32)offset);
374 } 353 } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp
375 else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp
376 { 354 {
377 emit->Write32((u32)offset); 355 emit->Write32((u32)offset);
378 } 356 }
@@ -382,8 +360,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
382// R = register# upper bit 360// R = register# upper bit
383// X = scale amnt upper bit 361// X = scale amnt upper bit
384// B = base register# upper bit 362// B = base register# upper bit
385void XEmitter::Rex(int w, int r, int x, int b) 363void XEmitter::Rex(int w, int r, int x, int b) {
386{
387 w = w ? 1 : 0; 364 w = w ? 1 : 0;
388 r = r ? 1 : 0; 365 r = r ? 1 : 0;
389 x = x ? 1 : 0; 366 x = x ? 1 : 0;
@@ -393,70 +370,60 @@ void XEmitter::Rex(int w, int r, int x, int b)
393 Write8(rx); 370 Write8(rx);
394} 371}
395 372
396void XEmitter::JMP(const u8* addr, bool force5Bytes) 373void XEmitter::JMP(const u8* addr, bool force5Bytes) {
397{
398 u64 fn = (u64)addr; 374 u64 fn = (u64)addr;
399 if (!force5Bytes) 375 if (!force5Bytes) {
400 {
401 s64 distance = (s64)(fn - ((u64)code + 2)); 376 s64 distance = (s64)(fn - ((u64)code + 2));
402 ASSERT_MSG(distance >= -0x80 && distance < 0x80, 377 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
403 "Jump target too far away, needs force5Bytes = true"); 378 "Jump target too far away, needs force5Bytes = true");
404 //8 bits will do 379 // 8 bits will do
405 Write8(0xEB); 380 Write8(0xEB);
406 Write8((u8)(s8)distance); 381 Write8((u8)(s8)distance);
407 } 382 } else {
408 else
409 {
410 s64 distance = (s64)(fn - ((u64)code + 5)); 383 s64 distance = (s64)(fn - ((u64)code + 5));
411 384
412 ASSERT_MSG( 385 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
413 distance >= -0x80000000LL && distance < 0x80000000LL, 386 "Jump target too far away, needs indirect register");
414 "Jump target too far away, needs indirect register");
415 Write8(0xE9); 387 Write8(0xE9);
416 Write32((u32)(s32)distance); 388 Write32((u32)(s32)distance);
417 } 389 }
418} 390}
419 391
420void XEmitter::JMPptr(const OpArg& arg2) 392void XEmitter::JMPptr(const OpArg& arg2) {
421{
422 OpArg arg = arg2; 393 OpArg arg = arg2;
423 if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); 394 if (arg.IsImm())
395 ASSERT_MSG(0, "JMPptr - Imm argument");
424 arg.operandReg = 4; 396 arg.operandReg = 4;
425 arg.WriteRex(this, 0, 0); 397 arg.WriteRex(this, 0, 0);
426 Write8(0xFF); 398 Write8(0xFF);
427 arg.WriteRest(this); 399 arg.WriteRest(this);
428} 400}
429 401
430//Can be used to trap other processors, before overwriting their code 402// Can be used to trap other processors, before overwriting their code
431// not used in dolphin 403// not used in dolphin
432void XEmitter::JMPself() 404void XEmitter::JMPself() {
433{
434 Write8(0xEB); 405 Write8(0xEB);
435 Write8(0xFE); 406 Write8(0xFE);
436} 407}
437 408
438void XEmitter::CALLptr(OpArg arg) 409void XEmitter::CALLptr(OpArg arg) {
439{ 410 if (arg.IsImm())
440 if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument"); 411 ASSERT_MSG(0, "CALLptr - Imm argument");
441 arg.operandReg = 2; 412 arg.operandReg = 2;
442 arg.WriteRex(this, 0, 0); 413 arg.WriteRex(this, 0, 0);
443 Write8(0xFF); 414 Write8(0xFF);
444 arg.WriteRest(this); 415 arg.WriteRest(this);
445} 416}
446 417
447void XEmitter::CALL(const void* fnptr) 418void XEmitter::CALL(const void* fnptr) {
448{
449 u64 distance = u64(fnptr) - (u64(code) + 5); 419 u64 distance = u64(fnptr) - (u64(code) + 5);
450 ASSERT_MSG( 420 ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL,
451 distance < 0x0000000080000000ULL || 421 "CALL out of range (%p calls %p)", code, fnptr);
452 distance >= 0xFFFFFFFF80000000ULL,
453 "CALL out of range (%p calls %p)", code, fnptr);
454 Write8(0xE8); 422 Write8(0xE8);
455 Write32(u32(distance)); 423 Write32(u32(distance));
456} 424}
457 425
458FixupBranch XEmitter::CALL() 426FixupBranch XEmitter::CALL() {
459{
460 FixupBranch branch; 427 FixupBranch branch;
461 branch.type = 1; 428 branch.type = 1;
462 branch.ptr = code + 5; 429 branch.ptr = code + 5;
@@ -467,38 +434,30 @@ FixupBranch XEmitter::CALL()
467 return branch; 434 return branch;
468} 435}
469 436
470FixupBranch XEmitter::J(bool force5bytes) 437FixupBranch XEmitter::J(bool force5bytes) {
471{
472 FixupBranch branch; 438 FixupBranch branch;
473 branch.type = force5bytes ? 1 : 0; 439 branch.type = force5bytes ? 1 : 0;
474 branch.ptr = code + (force5bytes ? 5 : 2); 440 branch.ptr = code + (force5bytes ? 5 : 2);
475 if (!force5bytes) 441 if (!force5bytes) {
476 { 442 // 8 bits will do
477 //8 bits will do
478 Write8(0xEB); 443 Write8(0xEB);
479 Write8(0); 444 Write8(0);
480 } 445 } else {
481 else
482 {
483 Write8(0xE9); 446 Write8(0xE9);
484 Write32(0); 447 Write32(0);
485 } 448 }
486 return branch; 449 return branch;
487} 450}
488 451
489FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) 452FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) {
490{
491 FixupBranch branch; 453 FixupBranch branch;
492 branch.type = force5bytes ? 1 : 0; 454 branch.type = force5bytes ? 1 : 0;
493 branch.ptr = code + (force5bytes ? 6 : 2); 455 branch.ptr = code + (force5bytes ? 6 : 2);
494 if (!force5bytes) 456 if (!force5bytes) {
495 { 457 // 8 bits will do
496 //8 bits will do
497 Write8(0x70 + conditionCode); 458 Write8(0x70 + conditionCode);
498 Write8(0); 459 Write8(0);
499 } 460 } else {
500 else
501 {
502 Write8(0x0F); 461 Write8(0x0F);
503 Write8(0x80 + conditionCode); 462 Write8(0x80 + conditionCode);
504 Write32(0); 463 Write32(0);
@@ -506,198 +465,268 @@ FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes)
506 return branch; 465 return branch;
507} 466}
508 467
509void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) 468void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) {
510{
511 u64 fn = (u64)addr; 469 u64 fn = (u64)addr;
512 s64 distance = (s64)(fn - ((u64)code + 2)); 470 s64 distance = (s64)(fn - ((u64)code + 2));
513 if (distance < -0x80 || distance >= 0x80 || force5bytes) 471 if (distance < -0x80 || distance >= 0x80 || force5bytes) {
514 {
515 distance = (s64)(fn - ((u64)code + 6)); 472 distance = (s64)(fn - ((u64)code + 6));
516 ASSERT_MSG( 473 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
517 distance >= -0x80000000LL && distance < 0x80000000LL, 474 "Jump target too far away, needs indirect register");
518 "Jump target too far away, needs indirect register");
519 Write8(0x0F); 475 Write8(0x0F);
520 Write8(0x80 + conditionCode); 476 Write8(0x80 + conditionCode);
521 Write32((u32)(s32)distance); 477 Write32((u32)(s32)distance);
522 } 478 } else {
523 else
524 {
525 Write8(0x70 + conditionCode); 479 Write8(0x70 + conditionCode);
526 Write8((u8)(s8)distance); 480 Write8((u8)(s8)distance);
527 } 481 }
528} 482}
529 483
530void XEmitter::SetJumpTarget(const FixupBranch& branch) 484void XEmitter::SetJumpTarget(const FixupBranch& branch) {
531{ 485 if (branch.type == 0) {
532 if (branch.type == 0)
533 {
534 s64 distance = (s64)(code - branch.ptr); 486 s64 distance = (s64)(code - branch.ptr);
535 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); 487 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
488 "Jump target too far away, needs force5Bytes = true");
536 branch.ptr[-1] = (u8)(s8)distance; 489 branch.ptr[-1] = (u8)(s8)distance;
537 } 490 } else if (branch.type == 1) {
538 else if (branch.type == 1)
539 {
540 s64 distance = (s64)(code - branch.ptr); 491 s64 distance = (s64)(code - branch.ptr);
541 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); 492 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
493 "Jump target too far away, needs indirect register");
542 ((s32*)branch.ptr)[-1] = (s32)distance; 494 ((s32*)branch.ptr)[-1] = (s32)distance;
543 } 495 }
544} 496}
545 497
546void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) 498void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) {
547{ 499 if (branch.type == 0) {
548 if (branch.type == 0)
549 {
550 s64 distance = (s64)(target - branch.ptr); 500 s64 distance = (s64)(target - branch.ptr);
551 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); 501 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
502 "Jump target too far away, needs force5Bytes = true");
552 branch.ptr[-1] = (u8)(s8)distance; 503 branch.ptr[-1] = (u8)(s8)distance;
553 } 504 } else if (branch.type == 1) {
554 else if (branch.type == 1)
555 {
556 s64 distance = (s64)(target - branch.ptr); 505 s64 distance = (s64)(target - branch.ptr);
557 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); 506 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
507 "Jump target too far away, needs indirect register");
558 ((s32*)branch.ptr)[-1] = (s32)distance; 508 ((s32*)branch.ptr)[-1] = (s32)distance;
559 } 509 }
560} 510}
561 511
562//Single byte opcodes 512// Single byte opcodes
563//There is no PUSHAD/POPAD in 64-bit mode. 513// There is no PUSHAD/POPAD in 64-bit mode.
564void XEmitter::INT3() {Write8(0xCC);} 514void XEmitter::INT3() {
565void XEmitter::RET() {Write8(0xC3);} 515 Write8(0xCC);
566void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret 516}
517void XEmitter::RET() {
518 Write8(0xC3);
519}
520void XEmitter::RET_FAST() {
521 Write8(0xF3);
522 Write8(0xC3);
523} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a
524 // ret
567 525
568// The first sign of decadence: optimized NOPs. 526// The first sign of decadence: optimized NOPs.
569void XEmitter::NOP(size_t size) 527void XEmitter::NOP(size_t size) {
570{
571 DEBUG_ASSERT((int)size > 0); 528 DEBUG_ASSERT((int)size > 0);
572 while (true) 529 while (true) {
573 { 530 switch (size) {
574 switch (size)
575 {
576 case 0: 531 case 0:
577 return; 532 return;
578 case 1: 533 case 1:
579 Write8(0x90); 534 Write8(0x90);
580 return; 535 return;
581 case 2: 536 case 2:
582 Write8(0x66); Write8(0x90); 537 Write8(0x66);
538 Write8(0x90);
583 return; 539 return;
584 case 3: 540 case 3:
585 Write8(0x0F); Write8(0x1F); Write8(0x00); 541 Write8(0x0F);
542 Write8(0x1F);
543 Write8(0x00);
586 return; 544 return;
587 case 4: 545 case 4:
588 Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00); 546 Write8(0x0F);
547 Write8(0x1F);
548 Write8(0x40);
549 Write8(0x00);
589 return; 550 return;
590 case 5: 551 case 5:
591 Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00); 552 Write8(0x0F);
553 Write8(0x1F);
554 Write8(0x44);
555 Write8(0x00);
592 Write8(0x00); 556 Write8(0x00);
593 return; 557 return;
594 case 6: 558 case 6:
595 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44); 559 Write8(0x66);
596 Write8(0x00); Write8(0x00); 560 Write8(0x0F);
561 Write8(0x1F);
562 Write8(0x44);
563 Write8(0x00);
564 Write8(0x00);
597 return; 565 return;
598 case 7: 566 case 7:
599 Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00); 567 Write8(0x0F);
600 Write8(0x00); Write8(0x00); Write8(0x00); 568 Write8(0x1F);
569 Write8(0x80);
570 Write8(0x00);
571 Write8(0x00);
572 Write8(0x00);
573 Write8(0x00);
601 return; 574 return;
602 case 8: 575 case 8:
603 Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00); 576 Write8(0x0F);
604 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); 577 Write8(0x1F);
578 Write8(0x84);
579 Write8(0x00);
580 Write8(0x00);
581 Write8(0x00);
582 Write8(0x00);
583 Write8(0x00);
605 return; 584 return;
606 case 9: 585 case 9:
607 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84); 586 Write8(0x66);
608 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); 587 Write8(0x0F);
588 Write8(0x1F);
589 Write8(0x84);
590 Write8(0x00);
591 Write8(0x00);
592 Write8(0x00);
593 Write8(0x00);
609 Write8(0x00); 594 Write8(0x00);
610 return; 595 return;
611 case 10: 596 case 10:
612 Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F); 597 Write8(0x66);
613 Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00); 598 Write8(0x66);
614 Write8(0x00); Write8(0x00); 599 Write8(0x0F);
600 Write8(0x1F);
601 Write8(0x84);
602 Write8(0x00);
603 Write8(0x00);
604 Write8(0x00);
605 Write8(0x00);
606 Write8(0x00);
615 return; 607 return;
616 default: 608 default:
617 // Even though x86 instructions are allowed to be up to 15 bytes long, 609 // Even though x86 instructions are allowed to be up to 15 bytes long,
618 // AMD advises against using NOPs longer than 11 bytes because they 610 // AMD advises against using NOPs longer than 11 bytes because they
619 // carry a performance penalty on CPUs older than AMD family 16h. 611 // carry a performance penalty on CPUs older than AMD family 16h.
620 Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F); 612 Write8(0x66);
621 Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00); 613 Write8(0x66);
622 Write8(0x00); Write8(0x00); Write8(0x00); 614 Write8(0x66);
615 Write8(0x0F);
616 Write8(0x1F);
617 Write8(0x84);
618 Write8(0x00);
619 Write8(0x00);
620 Write8(0x00);
621 Write8(0x00);
622 Write8(0x00);
623 size -= 11; 623 size -= 11;
624 continue; 624 continue;
625 } 625 }
626 } 626 }
627} 627}
628 628
629void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu 629void XEmitter::PAUSE() {
630void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry 630 Write8(0xF3);
631void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry 631 NOP();
632void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry 632} // use in tight spinloops for energy saving on some cpu
633void XEmitter::CLC() {
634 CheckFlags();
635 Write8(0xF8);
636} // clear carry
637void XEmitter::CMC() {
638 CheckFlags();
639 Write8(0xF5);
640} // flip carry
641void XEmitter::STC() {
642 CheckFlags();
643 Write8(0xF9);
644} // set carry
633 645
634//TODO: xchg ah, al ??? 646// TODO: xchg ah, al ???
635void XEmitter::XCHG_AHAL() 647void XEmitter::XCHG_AHAL() {
636{
637 Write8(0x86); 648 Write8(0x86);
638 Write8(0xe0); 649 Write8(0xe0);
639 // alt. 86 c4 650 // alt. 86 c4
640} 651}
641 652
642//These two can not be executed on early Intel 64-bit CPU:s, only on AMD! 653// These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
643void XEmitter::LAHF() {Write8(0x9F);} 654void XEmitter::LAHF() {
644void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);} 655 Write8(0x9F);
656}
657void XEmitter::SAHF() {
658 CheckFlags();
659 Write8(0x9E);
660}
645 661
646void XEmitter::PUSHF() {Write8(0x9C);} 662void XEmitter::PUSHF() {
647void XEmitter::POPF() {CheckFlags(); Write8(0x9D);} 663 Write8(0x9C);
664}
665void XEmitter::POPF() {
666 CheckFlags();
667 Write8(0x9D);
668}
648 669
649void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} 670void XEmitter::LFENCE() {
650void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} 671 Write8(0x0F);
651void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} 672 Write8(0xAE);
673 Write8(0xE8);
674}
675void XEmitter::MFENCE() {
676 Write8(0x0F);
677 Write8(0xAE);
678 Write8(0xF0);
679}
680void XEmitter::SFENCE() {
681 Write8(0x0F);
682 Write8(0xAE);
683 Write8(0xF8);
684}
652 685
653void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) 686void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) {
654{
655 if (bits == 16) 687 if (bits == 16)
656 Write8(0x66); 688 Write8(0x66);
657 Rex(bits == 64, 0, 0, (int)reg >> 3); 689 Rex(bits == 64, 0, 0, (int)reg >> 3);
658 Write8(byte + ((int)reg & 7)); 690 Write8(byte + ((int)reg & 7));
659} 691}
660 692
661void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) 693void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) {
662{
663 if (bits == 16) 694 if (bits == 16)
664 Write8(0x66); 695 Write8(0x66);
665 Rex(bits==64, 0, 0, (int)reg >> 3); 696 Rex(bits == 64, 0, 0, (int)reg >> 3);
666 Write8(byte1); 697 Write8(byte1);
667 Write8(byte2 + ((int)reg & 7)); 698 Write8(byte2 + ((int)reg & 7));
668} 699}
669 700
670void XEmitter::CWD(int bits) 701void XEmitter::CWD(int bits) {
671{
672 if (bits == 16) 702 if (bits == 16)
673 Write8(0x66); 703 Write8(0x66);
674 Rex(bits == 64, 0, 0, 0); 704 Rex(bits == 64, 0, 0, 0);
675 Write8(0x99); 705 Write8(0x99);
676} 706}
677 707
678void XEmitter::CBW(int bits) 708void XEmitter::CBW(int bits) {
679{
680 if (bits == 8) 709 if (bits == 8)
681 Write8(0x66); 710 Write8(0x66);
682 Rex(bits == 32, 0, 0, 0); 711 Rex(bits == 32, 0, 0, 0);
683 Write8(0x98); 712 Write8(0x98);
684} 713}
685 714
686//Simple opcodes 715// Simple opcodes
687
688 716
689//push/pop do not need wide to be 64-bit 717// push/pop do not need wide to be 64-bit
690void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} 718void XEmitter::PUSH(X64Reg reg) {
691void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} 719 WriteSimple1Byte(32, 0x50, reg);
720}
721void XEmitter::POP(X64Reg reg) {
722 WriteSimple1Byte(32, 0x58, reg);
723}
692 724
693void XEmitter::PUSH(int bits, const OpArg& reg) 725void XEmitter::PUSH(int bits, const OpArg& reg) {
694{
695 if (reg.IsSimpleReg()) 726 if (reg.IsSimpleReg())
696 PUSH(reg.GetSimpleReg()); 727 PUSH(reg.GetSimpleReg());
697 else if (reg.IsImm()) 728 else if (reg.IsImm()) {
698 { 729 switch (reg.GetImmBits()) {
699 switch (reg.GetImmBits())
700 {
701 case 8: 730 case 8:
702 Write8(0x6A); 731 Write8(0x6A);
703 Write8((u8)(s8)reg.offset); 732 Write8((u8)(s8)reg.offset);
@@ -715,9 +744,7 @@ void XEmitter::PUSH(int bits, const OpArg& reg)
715 ASSERT_MSG(0, "PUSH - Bad imm bits"); 744 ASSERT_MSG(0, "PUSH - Bad imm bits");
716 break; 745 break;
717 } 746 }
718 } 747 } else {
719 else
720 {
721 if (bits == 16) 748 if (bits == 16)
722 Write8(0x66); 749 Write8(0x66);
723 reg.WriteRex(this, bits, bits); 750 reg.WriteRex(this, bits, bits);
@@ -726,44 +753,33 @@ void XEmitter::PUSH(int bits, const OpArg& reg)
726 } 753 }
727} 754}
728 755
729void XEmitter::POP(int /*bits*/, const OpArg& reg) 756void XEmitter::POP(int /*bits*/, const OpArg& reg) {
730{
731 if (reg.IsSimpleReg()) 757 if (reg.IsSimpleReg())
732 POP(reg.GetSimpleReg()); 758 POP(reg.GetSimpleReg());
733 else 759 else
734 ASSERT_MSG(0, "POP - Unsupported encoding"); 760 ASSERT_MSG(0, "POP - Unsupported encoding");
735} 761}
736 762
737void XEmitter::BSWAP(int bits, X64Reg reg) 763void XEmitter::BSWAP(int bits, X64Reg reg) {
738{ 764 if (bits >= 32) {
739 if (bits >= 32)
740 {
741 WriteSimple2Byte(bits, 0x0F, 0xC8, reg); 765 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
742 } 766 } else if (bits == 16) {
743 else if (bits == 16)
744 {
745 ROL(16, R(reg), Imm8(8)); 767 ROL(16, R(reg), Imm8(8));
746 } 768 } else if (bits == 8) {
747 else if (bits == 8)
748 {
749 // Do nothing - can't bswap a single byte... 769 // Do nothing - can't bswap a single byte...
750 } 770 } else {
751 else
752 {
753 ASSERT_MSG(0, "BSWAP - Wrong number of bits"); 771 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
754 } 772 }
755} 773}
756 774
757// Undefined opcode - reserved 775// Undefined opcode - reserved
758// If we ever need a way to always cause a non-breakpoint hard exception... 776// If we ever need a way to always cause a non-breakpoint hard exception...
759void XEmitter::UD2() 777void XEmitter::UD2() {
760{
761 Write8(0x0F); 778 Write8(0x0F);
762 Write8(0x0B); 779 Write8(0x0B);
763} 780}
764 781
765void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) 782void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) {
766{
767 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument"); 783 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
768 arg.operandReg = (u8)level; 784 arg.operandReg = (u8)level;
769 arg.WriteRex(this, 0, 0); 785 arg.WriteRex(this, 0, 0);
@@ -772,8 +788,7 @@ void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg)
772 arg.WriteRest(this); 788 arg.WriteRest(this);
773} 789}
774 790
775void XEmitter::SETcc(CCFlags flag, OpArg dest) 791void XEmitter::SETcc(CCFlags flag, OpArg dest) {
776{
777 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument"); 792 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
778 dest.operandReg = 0; 793 dest.operandReg = 0;
779 dest.WriteRex(this, 0, 8); 794 dest.WriteRex(this, 0, 8);
@@ -782,8 +797,7 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest)
782 dest.WriteRest(this); 797 dest.WriteRest(this);
783} 798}
784 799
785void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) 800void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) {
786{
787 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument"); 801 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
788 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported"); 802 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
789 if (bits == 16) 803 if (bits == 16)
@@ -795,34 +809,41 @@ void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag)
795 src.WriteRest(this); 809 src.WriteRest(this);
796} 810}
797 811
798void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) 812void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) {
799{
800 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument"); 813 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
801 CheckFlags(); 814 CheckFlags();
802 src.operandReg = ext; 815 src.operandReg = ext;
803 if (bits == 16) 816 if (bits == 16)
804 Write8(0x66); 817 Write8(0x66);
805 src.WriteRex(this, bits, bits, 0); 818 src.WriteRex(this, bits, bits, 0);
806 if (bits == 8) 819 if (bits == 8) {
807 {
808 Write8(0xF6); 820 Write8(0xF6);
809 } 821 } else {
810 else
811 {
812 Write8(0xF7); 822 Write8(0xF7);
813 } 823 }
814 src.WriteRest(this); 824 src.WriteRest(this);
815} 825}
816 826
817void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);} 827void XEmitter::MUL(int bits, const OpArg& src) {
818void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);} 828 WriteMulDivType(bits, src, 4);
819void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);} 829}
820void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);} 830void XEmitter::DIV(int bits, const OpArg& src) {
821void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);} 831 WriteMulDivType(bits, src, 6);
822void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);} 832}
833void XEmitter::IMUL(int bits, const OpArg& src) {
834 WriteMulDivType(bits, src, 5);
835}
836void XEmitter::IDIV(int bits, const OpArg& src) {
837 WriteMulDivType(bits, src, 7);
838}
839void XEmitter::NEG(int bits, const OpArg& src) {
840 WriteMulDivType(bits, src, 3);
841}
842void XEmitter::NOT(int bits, const OpArg& src) {
843 WriteMulDivType(bits, src, 2);
844}
823 845
824void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) 846void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) {
825{
826 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument"); 847 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
827 CheckFlags(); 848 CheckFlags();
828 src.operandReg = (u8)dest; 849 src.operandReg = (u8)dest;
@@ -836,36 +857,35 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo
836 src.WriteRest(this); 857 src.WriteRest(this);
837} 858}
838 859
839void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) 860void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) {
840{
841 if (bits <= 16) 861 if (bits <= 16)
842 ASSERT_MSG(0, "MOVNTI - bits<=16"); 862 ASSERT_MSG(0, "MOVNTI - bits<=16");
843 WriteBitSearchType(bits, src, dest, 0xC3); 863 WriteBitSearchType(bits, src, dest, 0xC3);
844} 864}
845 865
846void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit 866void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {
847void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit 867 WriteBitSearchType(bits, dest, src, 0xBC);
868} // Bottom bit to top bit
869void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {
870 WriteBitSearchType(bits, dest, src, 0xBD);
871} // Top bit to bottom bit
848 872
849void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) 873void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) {
850{
851 CheckFlags(); 874 CheckFlags();
852 if (!Common::GetCPUCaps().bmi1) 875 if (!Common::GetCPUCaps().bmi1)
853 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); 876 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
854 WriteBitSearchType(bits, dest, src, 0xBC, true); 877 WriteBitSearchType(bits, dest, src, 0xBC, true);
855} 878}
856void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) 879void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) {
857{
858 CheckFlags(); 880 CheckFlags();
859 if (!Common::GetCPUCaps().lzcnt) 881 if (!Common::GetCPUCaps().lzcnt)
860 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer."); 882 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
861 WriteBitSearchType(bits, dest, src, 0xBD, true); 883 WriteBitSearchType(bits, dest, src, 0xBD, true);
862} 884}
863 885
864void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) 886void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) {
865{
866 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument"); 887 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
867 if (dbits == sbits) 888 if (dbits == sbits) {
868 {
869 MOV(dbits, R(dest), src); 889 MOV(dbits, R(dest), src);
870 return; 890 return;
871 } 891 }
@@ -873,66 +893,49 @@ void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
873 if (dbits == 16) 893 if (dbits == 16)
874 Write8(0x66); 894 Write8(0x66);
875 src.WriteRex(this, dbits, sbits); 895 src.WriteRex(this, dbits, sbits);
876 if (sbits == 8) 896 if (sbits == 8) {
877 {
878 Write8(0x0F); 897 Write8(0x0F);
879 Write8(0xBE); 898 Write8(0xBE);
880 } 899 } else if (sbits == 16) {
881 else if (sbits == 16)
882 {
883 Write8(0x0F); 900 Write8(0x0F);
884 Write8(0xBF); 901 Write8(0xBF);
885 } 902 } else if (sbits == 32 && dbits == 64) {
886 else if (sbits == 32 && dbits == 64)
887 {
888 Write8(0x63); 903 Write8(0x63);
889 } 904 } else {
890 else
891 {
892 Crash(); 905 Crash();
893 } 906 }
894 src.WriteRest(this); 907 src.WriteRest(this);
895} 908}
896 909
897void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) 910void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) {
898{
899 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument"); 911 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
900 if (dbits == sbits) 912 if (dbits == sbits) {
901 {
902 MOV(dbits, R(dest), src); 913 MOV(dbits, R(dest), src);
903 return; 914 return;
904 } 915 }
905 src.operandReg = (u8)dest; 916 src.operandReg = (u8)dest;
906 if (dbits == 16) 917 if (dbits == 16)
907 Write8(0x66); 918 Write8(0x66);
908 //the 32bit result is automatically zero extended to 64bit 919 // the 32bit result is automatically zero extended to 64bit
909 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); 920 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
910 if (sbits == 8) 921 if (sbits == 8) {
911 {
912 Write8(0x0F); 922 Write8(0x0F);
913 Write8(0xB6); 923 Write8(0xB6);
914 } 924 } else if (sbits == 16) {
915 else if (sbits == 16)
916 {
917 Write8(0x0F); 925 Write8(0x0F);
918 Write8(0xB7); 926 Write8(0xB7);
919 } 927 } else if (sbits == 32 && dbits == 64) {
920 else if (sbits == 32 && dbits == 64)
921 {
922 Write8(0x8B); 928 Write8(0x8B);
923 } 929 } else {
924 else
925 {
926 ASSERT_MSG(0, "MOVZX - Invalid size"); 930 ASSERT_MSG(0, "MOVZX - Invalid size");
927 } 931 }
928 src.WriteRest(this); 932 src.WriteRest(this);
929} 933}
930 934
931void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) 935void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) {
932{ 936 ASSERT_MSG(Common::GetCPUCaps().movbe,
933 ASSERT_MSG(Common::GetCPUCaps().movbe, "Generating MOVBE on a system that does not support it."); 937 "Generating MOVBE on a system that does not support it.");
934 if (bits == 8) 938 if (bits == 8) {
935 {
936 MOV(bits, dest, src); 939 MOV(bits, dest, src);
937 return; 940 return;
938 } 941 }
@@ -940,71 +943,60 @@ void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src)
940 if (bits == 16) 943 if (bits == 16)
941 Write8(0x66); 944 Write8(0x66);
942 945
943 if (dest.IsSimpleReg()) 946 if (dest.IsSimpleReg()) {
944 {
945 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); 947 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
946 src.WriteRex(this, bits, bits, dest.GetSimpleReg()); 948 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
947 Write8(0x0F); Write8(0x38); Write8(0xF0); 949 Write8(0x0F);
950 Write8(0x38);
951 Write8(0xF0);
948 src.WriteRest(this, 0, dest.GetSimpleReg()); 952 src.WriteRest(this, 0, dest.GetSimpleReg());
949 } 953 } else if (src.IsSimpleReg()) {
950 else if (src.IsSimpleReg())
951 {
952 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); 954 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
953 dest.WriteRex(this, bits, bits, src.GetSimpleReg()); 955 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
954 Write8(0x0F); Write8(0x38); Write8(0xF1); 956 Write8(0x0F);
957 Write8(0x38);
958 Write8(0xF1);
955 dest.WriteRest(this, 0, src.GetSimpleReg()); 959 dest.WriteRest(this, 0, src.GetSimpleReg());
956 } 960 } else {
957 else
958 {
959 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem"); 961 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
960 } 962 }
961} 963}
962 964
963 965void XEmitter::LEA(int bits, X64Reg dest, OpArg src) {
964void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
965{
966 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument"); 966 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
967 src.operandReg = (u8)dest; 967 src.operandReg = (u8)dest;
968 if (bits == 16) 968 if (bits == 16)
969 Write8(0x66); //TODO: performance warning 969 Write8(0x66); // TODO: performance warning
970 src.WriteRex(this, bits, bits); 970 src.WriteRex(this, bits, bits);
971 Write8(0x8D); 971 Write8(0x8D);
972 src.WriteRest(this, 0, INVALID_REG, bits == 64); 972 src.WriteRest(this, 0, INVALID_REG, bits == 64);
973} 973}
974 974
975//shift can be either imm8 or cl 975// shift can be either imm8 or cl
976void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) 976void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) {
977{
978 CheckFlags(); 977 CheckFlags();
979 bool writeImm = false; 978 bool writeImm = false;
980 if (dest.IsImm()) 979 if (dest.IsImm()) {
981 {
982 ASSERT_MSG(0, "WriteShift - can't shift imms"); 980 ASSERT_MSG(0, "WriteShift - can't shift imms");
983 } 981 }
984 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) 982 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
985 { 983 (shift.IsImm() && shift.GetImmBits() != 8)) {
986 ASSERT_MSG(0, "WriteShift - illegal argument"); 984 ASSERT_MSG(0, "WriteShift - illegal argument");
987 } 985 }
988 dest.operandReg = ext; 986 dest.operandReg = ext;
989 if (bits == 16) 987 if (bits == 16)
990 Write8(0x66); 988 Write8(0x66);
991 dest.WriteRex(this, bits, bits, 0); 989 dest.WriteRex(this, bits, bits, 0);
992 if (shift.GetImmBits() == 8) 990 if (shift.GetImmBits() == 8) {
993 { 991 // ok an imm
994 //ok an imm
995 u8 imm = (u8)shift.offset; 992 u8 imm = (u8)shift.offset;
996 if (imm == 1) 993 if (imm == 1) {
997 {
998 Write8(bits == 8 ? 0xD0 : 0xD1); 994 Write8(bits == 8 ? 0xD0 : 0xD1);
999 } 995 } else {
1000 else
1001 {
1002 writeImm = true; 996 writeImm = true;
1003 Write8(bits == 8 ? 0xC0 : 0xC1); 997 Write8(bits == 8 ? 0xC0 : 0xC1);
1004 } 998 }
1005 } 999 } else {
1006 else
1007 {
1008 Write8(bits == 8 ? 0xD2 : 0xD3); 1000 Write8(bits == 8 ? 0xD2 : 0xD3);
1009 } 1001 }
1010 dest.WriteRest(this, writeImm ? 1 : 0); 1002 dest.WriteRest(this, writeImm ? 1 : 0);
@@ -1014,116 +1006,125 @@ void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext)
1014 1006
1015// large rotates and shift are slower on intel than amd 1007// large rotates and shift are slower on intel than amd
1016// intel likes to rotate by 1, and the op is smaller too 1008// intel likes to rotate by 1, and the op is smaller too
1017void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);} 1009void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {
1018void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);} 1010 WriteShift(bits, dest, shift, 0);
1019void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);} 1011}
1020void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);} 1012void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {
1021void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);} 1013 WriteShift(bits, dest, shift, 1);
1022void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);} 1014}
1023void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);} 1015void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {
1016 WriteShift(bits, dest, shift, 2);
1017}
1018void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {
1019 WriteShift(bits, dest, shift, 3);
1020}
1021void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {
1022 WriteShift(bits, dest, shift, 4);
1023}
1024void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {
1025 WriteShift(bits, dest, shift, 5);
1026}
1027void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {
1028 WriteShift(bits, dest, shift, 7);
1029}
1024 1030
1025// index can be either imm8 or register, don't use memory destination because it's slow 1031// index can be either imm8 or register, don't use memory destination because it's slow
1026void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) 1032void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) {
1027{
1028 CheckFlags(); 1033 CheckFlags();
1029 if (dest.IsImm()) 1034 if (dest.IsImm()) {
1030 {
1031 ASSERT_MSG(0, "WriteBitTest - can't test imms"); 1035 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1032 } 1036 }
1033 if ((index.IsImm() && index.GetImmBits() != 8)) 1037 if ((index.IsImm() && index.GetImmBits() != 8)) {
1034 {
1035 ASSERT_MSG(0, "WriteBitTest - illegal argument"); 1038 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1036 } 1039 }
1037 if (bits == 16) 1040 if (bits == 16)
1038 Write8(0x66); 1041 Write8(0x66);
1039 if (index.IsImm()) 1042 if (index.IsImm()) {
1040 {
1041 dest.WriteRex(this, bits, bits); 1043 dest.WriteRex(this, bits, bits);
1042 Write8(0x0F); Write8(0xBA); 1044 Write8(0x0F);
1045 Write8(0xBA);
1043 dest.WriteRest(this, 1, (X64Reg)ext); 1046 dest.WriteRest(this, 1, (X64Reg)ext);
1044 Write8((u8)index.offset); 1047 Write8((u8)index.offset);
1045 } 1048 } else {
1046 else
1047 {
1048 X64Reg operand = index.GetSimpleReg(); 1049 X64Reg operand = index.GetSimpleReg();
1049 dest.WriteRex(this, bits, bits, operand); 1050 dest.WriteRex(this, bits, bits, operand);
1050 Write8(0x0F); Write8(0x83 + 8*ext); 1051 Write8(0x0F);
1052 Write8(0x83 + 8 * ext);
1051 dest.WriteRest(this, 1, operand); 1053 dest.WriteRest(this, 1, operand);
1052 } 1054 }
1053} 1055}
1054 1056
1055void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);} 1057void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {
1056void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);} 1058 WriteBitTest(bits, dest, index, 4);
1057void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);} 1059}
1058void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);} 1060void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {
1061 WriteBitTest(bits, dest, index, 5);
1062}
1063void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {
1064 WriteBitTest(bits, dest, index, 6);
1065}
1066void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {
1067 WriteBitTest(bits, dest, index, 7);
1068}
1059 1069
1060//shift can be either imm8 or cl 1070// shift can be either imm8 or cl
1061void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) 1071void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1062{
1063 CheckFlags(); 1072 CheckFlags();
1064 if (dest.IsImm()) 1073 if (dest.IsImm()) {
1065 {
1066 ASSERT_MSG(0, "SHRD - can't use imms as destination"); 1074 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1067 } 1075 }
1068 if (!src.IsSimpleReg()) 1076 if (!src.IsSimpleReg()) {
1069 {
1070 ASSERT_MSG(0, "SHRD - must use simple register as source"); 1077 ASSERT_MSG(0, "SHRD - must use simple register as source");
1071 } 1078 }
1072 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) 1079 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1073 { 1080 (shift.IsImm() && shift.GetImmBits() != 8)) {
1074 ASSERT_MSG(0, "SHRD - illegal shift"); 1081 ASSERT_MSG(0, "SHRD - illegal shift");
1075 } 1082 }
1076 if (bits == 16) 1083 if (bits == 16)
1077 Write8(0x66); 1084 Write8(0x66);
1078 X64Reg operand = src.GetSimpleReg(); 1085 X64Reg operand = src.GetSimpleReg();
1079 dest.WriteRex(this, bits, bits, operand); 1086 dest.WriteRex(this, bits, bits, operand);
1080 if (shift.GetImmBits() == 8) 1087 if (shift.GetImmBits() == 8) {
1081 { 1088 Write8(0x0F);
1082 Write8(0x0F); Write8(0xAC); 1089 Write8(0xAC);
1083 dest.WriteRest(this, 1, operand); 1090 dest.WriteRest(this, 1, operand);
1084 Write8((u8)shift.offset); 1091 Write8((u8)shift.offset);
1085 } 1092 } else {
1086 else 1093 Write8(0x0F);
1087 { 1094 Write8(0xAD);
1088 Write8(0x0F); Write8(0xAD);
1089 dest.WriteRest(this, 0, operand); 1095 dest.WriteRest(this, 0, operand);
1090 } 1096 }
1091} 1097}
1092 1098
1093void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) 1099void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1094{
1095 CheckFlags(); 1100 CheckFlags();
1096 if (dest.IsImm()) 1101 if (dest.IsImm()) {
1097 {
1098 ASSERT_MSG(0, "SHLD - can't use imms as destination"); 1102 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1099 } 1103 }
1100 if (!src.IsSimpleReg()) 1104 if (!src.IsSimpleReg()) {
1101 {
1102 ASSERT_MSG(0, "SHLD - must use simple register as source"); 1105 ASSERT_MSG(0, "SHLD - must use simple register as source");
1103 } 1106 }
1104 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) 1107 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1105 { 1108 (shift.IsImm() && shift.GetImmBits() != 8)) {
1106 ASSERT_MSG(0, "SHLD - illegal shift"); 1109 ASSERT_MSG(0, "SHLD - illegal shift");
1107 } 1110 }
1108 if (bits == 16) 1111 if (bits == 16)
1109 Write8(0x66); 1112 Write8(0x66);
1110 X64Reg operand = src.GetSimpleReg(); 1113 X64Reg operand = src.GetSimpleReg();
1111 dest.WriteRex(this, bits, bits, operand); 1114 dest.WriteRex(this, bits, bits, operand);
1112 if (shift.GetImmBits() == 8) 1115 if (shift.GetImmBits() == 8) {
1113 { 1116 Write8(0x0F);
1114 Write8(0x0F); Write8(0xA4); 1117 Write8(0xA4);
1115 dest.WriteRest(this, 1, operand); 1118 dest.WriteRest(this, 1, operand);
1116 Write8((u8)shift.offset); 1119 Write8((u8)shift.offset);
1117 } 1120 } else {
1118 else 1121 Write8(0x0F);
1119 { 1122 Write8(0xA5);
1120 Write8(0x0F); Write8(0xA5);
1121 dest.WriteRest(this, 0, operand); 1123 dest.WriteRest(this, 0, operand);
1122 } 1124 }
1123} 1125}
1124 1126
1125void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits) 1127void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) {
1126{
1127 if (bits == 16) 1128 if (bits == 16)
1128 emit->Write8(0x66); 1129 emit->Write8(0x66);
1129 1130
@@ -1133,12 +1134,11 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit
1133 WriteRest(emit); 1134 WriteRest(emit);
1134} 1135}
1135 1136
1136//operand can either be immediate or register 1137// operand can either be immediate or register
1137void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const 1138void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
1138{ 1139 int bits) const {
1139 X64Reg _operandReg; 1140 X64Reg _operandReg;
1140 if (IsImm()) 1141 if (IsImm()) {
1141 {
1142 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order"); 1142 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1143 } 1143 }
1144 1144
@@ -1147,27 +1147,22 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o
1147 1147
1148 int immToWrite = 0; 1148 int immToWrite = 0;
1149 1149
1150 if (operand.IsImm()) 1150 if (operand.IsImm()) {
1151 {
1152 WriteRex(emit, bits, bits); 1151 WriteRex(emit, bits, bits);
1153 1152
1154 if (!toRM) 1153 if (!toRM) {
1155 {
1156 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)"); 1154 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1157 } 1155 }
1158 1156
1159 if (operand.scale == SCALE_IMM8 && bits == 8) 1157 if (operand.scale == SCALE_IMM8 && bits == 8) {
1160 {
1161 // op al, imm8 1158 // op al, imm8
1162 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) 1159 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) {
1163 {
1164 emit->Write8(normalops[op].eaximm8); 1160 emit->Write8(normalops[op].eaximm8);
1165 emit->Write8((u8)operand.offset); 1161 emit->Write8((u8)operand.offset);
1166 return; 1162 return;
1167 } 1163 }
1168 // mov reg, imm8 1164 // mov reg, imm8
1169 if (!scale && op == nrmMOV) 1165 if (!scale && op == nrmMOV) {
1170 {
1171 emit->Write8(0xB0 + (offsetOrBaseReg & 7)); 1166 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1172 emit->Write8((u8)operand.offset); 1167 emit->Write8((u8)operand.offset);
1173 return; 1168 return;
@@ -1175,26 +1170,20 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o
1175 // op r/m8, imm8 1170 // op r/m8, imm8
1176 emit->Write8(normalops[op].imm8); 1171 emit->Write8(normalops[op].imm8);
1177 immToWrite = 8; 1172 immToWrite = 8;
1178 } 1173 } else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1179 else if ((operand.scale == SCALE_IMM16 && bits == 16) || 1174 (operand.scale == SCALE_IMM32 && bits == 32) ||
1180 (operand.scale == SCALE_IMM32 && bits == 32) || 1175 (operand.scale == SCALE_IMM32 && bits == 64)) {
1181 (operand.scale == SCALE_IMM32 && bits == 64))
1182 {
1183 // Try to save immediate size if we can, but first check to see 1176 // Try to save immediate size if we can, but first check to see
1184 // if the instruction supports simm8. 1177 // if the instruction supports simm8.
1185 // op r/m, imm8 1178 // op r/m, imm8
1186 if (normalops[op].simm8 != 0xCC && 1179 if (normalops[op].simm8 != 0xCC &&
1187 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || 1180 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1188 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) 1181 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) {
1189 {
1190 emit->Write8(normalops[op].simm8); 1182 emit->Write8(normalops[op].simm8);
1191 immToWrite = 8; 1183 immToWrite = 8;
1192 } 1184 } else {
1193 else
1194 {
1195 // mov reg, imm 1185 // mov reg, imm
1196 if (!scale && op == nrmMOV && bits != 64) 1186 if (!scale && op == nrmMOV && bits != 64) {
1197 {
1198 emit->Write8(0xB8 + (offsetOrBaseReg & 7)); 1187 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1199 if (bits == 16) 1188 if (bits == 16)
1200 emit->Write16((u16)operand.offset); 1189 emit->Write16((u16)operand.offset);
@@ -1203,8 +1192,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o
1203 return; 1192 return;
1204 } 1193 }
1205 // op eax, imm 1194 // op eax, imm
1206 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) 1195 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) {
1207 {
1208 emit->Write8(normalops[op].eaximm32); 1196 emit->Write8(normalops[op].eaximm32);
1209 if (bits == 16) 1197 if (bits == 16)
1210 emit->Write16((u16)operand.offset); 1198 emit->Write16((u16)operand.offset);
@@ -1216,54 +1204,41 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o
1216 emit->Write8(normalops[op].imm32); 1204 emit->Write8(normalops[op].imm32);
1217 immToWrite = bits == 16 ? 16 : 32; 1205 immToWrite = bits == 16 ? 16 : 32;
1218 } 1206 }
1219 } 1207 } else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1220 else if ((operand.scale == SCALE_IMM8 && bits == 16) || 1208 (operand.scale == SCALE_IMM8 && bits == 32) ||
1221 (operand.scale == SCALE_IMM8 && bits == 32) || 1209 (operand.scale == SCALE_IMM8 && bits == 64)) {
1222 (operand.scale == SCALE_IMM8 && bits == 64))
1223 {
1224 // op r/m, imm8 1210 // op r/m, imm8
1225 emit->Write8(normalops[op].simm8); 1211 emit->Write8(normalops[op].simm8);
1226 immToWrite = 8; 1212 immToWrite = 8;
1227 } 1213 } else if (operand.scale == SCALE_IMM64 && bits == 64) {
1228 else if (operand.scale == SCALE_IMM64 && bits == 64) 1214 if (scale) {
1229 {
1230 if (scale)
1231 {
1232 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); 1215 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1233 } 1216 }
1234 // mov reg64, imm64 1217 // mov reg64, imm64
1235 else if (op == nrmMOV) 1218 else if (op == nrmMOV) {
1236 {
1237 emit->Write8(0xB8 + (offsetOrBaseReg & 7)); 1219 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1238 emit->Write64((u64)operand.offset); 1220 emit->Write64((u64)operand.offset);
1239 return; 1221 return;
1240 } 1222 }
1241 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm"); 1223 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1242 } 1224 } else {
1243 else
1244 {
1245 ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); 1225 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1246 } 1226 }
1247 _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM 1227 _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM
1248 } 1228 } else {
1249 else
1250 {
1251 _operandReg = (X64Reg)operand.offsetOrBaseReg; 1229 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1252 WriteRex(emit, bits, bits, _operandReg); 1230 WriteRex(emit, bits, bits, _operandReg);
1253 // op r/m, reg 1231 // op r/m, reg
1254 if (toRM) 1232 if (toRM) {
1255 {
1256 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); 1233 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1257 } 1234 }
1258 // op reg, r/m 1235 // op reg, r/m
1259 else 1236 else {
1260 {
1261 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); 1237 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1262 } 1238 }
1263 } 1239 }
1264 WriteRest(emit, immToWrite >> 3, _operandReg); 1240 WriteRest(emit, immToWrite >> 3, _operandReg);
1265 switch (immToWrite) 1241 switch (immToWrite) {
1266 {
1267 case 0: 1242 case 0:
1268 break; 1243 break;
1269 case 8: 1244 case 8:
@@ -1280,66 +1255,84 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o
1280 } 1255 }
1281} 1256}
1282 1257
1283void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2) 1258void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1,
1284{ 1259 const OpArg& a2) {
1285 if (a1.IsImm()) 1260 if (a1.IsImm()) {
1286 { 1261 // Booh! Can't write to an imm
1287 //Booh! Can't write to an imm
1288 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm"); 1262 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1289 return; 1263 return;
1290 } 1264 }
1291 if (a2.IsImm()) 1265 if (a2.IsImm()) {
1292 {
1293 a1.WriteNormalOp(emit, true, op, a2, bits); 1266 a1.WriteNormalOp(emit, true, op, a2, bits);
1294 } 1267 } else {
1295 else 1268 if (a1.IsSimpleReg()) {
1296 {
1297 if (a1.IsSimpleReg())
1298 {
1299 a2.WriteNormalOp(emit, false, op, a1, bits); 1269 a2.WriteNormalOp(emit, false, op, a1, bits);
1300 } 1270 } else {
1301 else 1271 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(),
1302 { 1272 "WriteNormalOp - a1 and a2 cannot both be memory");
1303 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory");
1304 a1.WriteNormalOp(emit, true, op, a2, bits); 1273 a1.WriteNormalOp(emit, true, op, a2, bits);
1305 } 1274 }
1306 } 1275 }
1307} 1276}
1308 1277
1309void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} 1278void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) {
1310void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} 1279 CheckFlags();
1311void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} 1280 WriteNormalOp(this, bits, nrmADD, a1, a2);
1312void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} 1281}
1313void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} 1282void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) {
1314void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} 1283 CheckFlags();
1315void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} 1284 WriteNormalOp(this, bits, nrmADC, a1, a2);
1316void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2) 1285}
1317{ 1286void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) {
1287 CheckFlags();
1288 WriteNormalOp(this, bits, nrmSUB, a1, a2);
1289}
1290void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) {
1291 CheckFlags();
1292 WriteNormalOp(this, bits, nrmSBB, a1, a2);
1293}
1294void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) {
1295 CheckFlags();
1296 WriteNormalOp(this, bits, nrmAND, a1, a2);
1297}
1298void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) {
1299 CheckFlags();
1300 WriteNormalOp(this, bits, nrmOR, a1, a2);
1301}
1302void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) {
1303 CheckFlags();
1304 WriteNormalOp(this, bits, nrmXOR, a1, a2);
1305}
1306void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) {
1318 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) 1307 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1319 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); 1308 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1320 WriteNormalOp(this, bits, nrmMOV, a1, a2); 1309 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1321} 1310}
1322void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} 1311void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {
1323void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} 1312 CheckFlags();
1324void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} 1313 WriteNormalOp(this, bits, nrmTEST, a1, a2);
1314}
1315void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) {
1316 CheckFlags();
1317 WriteNormalOp(this, bits, nrmCMP, a1, a2);
1318}
1319void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {
1320 WriteNormalOp(this, bits, nrmXCHG, a1, a2);
1321}
1325 1322
1326void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) 1323void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) {
1327{
1328 CheckFlags(); 1324 CheckFlags();
1329 if (bits == 8) 1325 if (bits == 8) {
1330 {
1331 ASSERT_MSG(0, "IMUL - illegal bit size!"); 1326 ASSERT_MSG(0, "IMUL - illegal bit size!");
1332 return; 1327 return;
1333 } 1328 }
1334 1329
1335 if (a1.IsImm()) 1330 if (a1.IsImm()) {
1336 {
1337 ASSERT_MSG(0, "IMUL - second arg cannot be imm!"); 1331 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1338 return; 1332 return;
1339 } 1333 }
1340 1334
1341 if (!a2.IsImm()) 1335 if (!a2.IsImm()) {
1342 {
1343 ASSERT_MSG(0, "IMUL - third arg must be imm!"); 1336 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1344 return; 1337 return;
1345 } 1338 }
@@ -1348,46 +1341,34 @@ void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
1348 Write8(0x66); 1341 Write8(0x66);
1349 a1.WriteRex(this, bits, bits, regOp); 1342 a1.WriteRex(this, bits, bits, regOp);
1350 1343
1351 if (a2.GetImmBits() == 8 || 1344 if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1352 (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || 1345 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) {
1353 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset))
1354 {
1355 Write8(0x6B); 1346 Write8(0x6B);
1356 a1.WriteRest(this, 1, regOp); 1347 a1.WriteRest(this, 1, regOp);
1357 Write8((u8)a2.offset); 1348 Write8((u8)a2.offset);
1358 } 1349 } else {
1359 else
1360 {
1361 Write8(0x69); 1350 Write8(0x69);
1362 if (a2.GetImmBits() == 16 && bits == 16) 1351 if (a2.GetImmBits() == 16 && bits == 16) {
1363 {
1364 a1.WriteRest(this, 2, regOp); 1352 a1.WriteRest(this, 2, regOp);
1365 Write16((u16)a2.offset); 1353 Write16((u16)a2.offset);
1366 } 1354 } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) {
1367 else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64))
1368 {
1369 a1.WriteRest(this, 4, regOp); 1355 a1.WriteRest(this, 4, regOp);
1370 Write32((u32)a2.offset); 1356 Write32((u32)a2.offset);
1371 } 1357 } else {
1372 else
1373 {
1374 ASSERT_MSG(0, "IMUL - unhandled case!"); 1358 ASSERT_MSG(0, "IMUL - unhandled case!");
1375 } 1359 }
1376 } 1360 }
1377} 1361}
1378 1362
1379void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) 1363void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) {
1380{
1381 CheckFlags(); 1364 CheckFlags();
1382 if (bits == 8) 1365 if (bits == 8) {
1383 {
1384 ASSERT_MSG(0, "IMUL - illegal bit size!"); 1366 ASSERT_MSG(0, "IMUL - illegal bit size!");
1385 return; 1367 return;
1386 } 1368 }
1387 1369
1388 if (a.IsImm()) 1370 if (a.IsImm()) {
1389 { 1371 IMUL(bits, regOp, R(regOp), a);
1390 IMUL(bits, regOp, R(regOp), a) ;
1391 return; 1372 return;
1392 } 1373 }
1393 1374
@@ -1399,9 +1380,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a)
1399 a.WriteRest(this, 0, regOp); 1380 a.WriteRest(this, 0, regOp);
1400} 1381}
1401 1382
1402 1383void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) {
1403void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1404{
1405 if (opPrefix) 1384 if (opPrefix)
1406 Write8(opPrefix); 1385 Write8(opPrefix);
1407 arg.operandReg = regOp; 1386 arg.operandReg = regOp;
@@ -1413,13 +1392,11 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr
1413 arg.WriteRest(this, extrabytes); 1392 arg.WriteRest(this, extrabytes);
1414} 1393}
1415 1394
1416void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) 1395void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1417{
1418 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); 1396 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1419} 1397}
1420 1398
1421static int GetVEXmmmmm(u16 op) 1399static int GetVEXmmmmm(u16 op) {
1422{
1423 // Currently, only 0x38 and 0x3A are used as secondary escape byte. 1400 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1424 if ((op >> 8) == 0x3A) 1401 if ((op >> 8) == 0x3A)
1425 return 3; 1402 return 3;
@@ -1429,8 +1406,7 @@ static int GetVEXmmmmm(u16 op)
1429 return 1; 1406 return 1;
1430} 1407}
1431 1408
1432static int GetVEXpp(u8 opPrefix) 1409static int GetVEXpp(u8 opPrefix) {
1433{
1434 if (opPrefix == 0x66) 1410 if (opPrefix == 0x66)
1435 return 1; 1411 return 1;
1436 if (opPrefix == 0xF3) 1412 if (opPrefix == 0xF3)
@@ -1441,21 +1417,22 @@ static int GetVEXpp(u8 opPrefix)
1441 return 0; 1417 return 0;
1442} 1418}
1443 1419
1444void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) 1420void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
1445{ 1421 int extrabytes) {
1446 if (!Common::GetCPUCaps().avx) 1422 if (!Common::GetCPUCaps().avx)
1447 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); 1423 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1448 int mmmmm = GetVEXmmmmm(op); 1424 int mmmmm = GetVEXmmmmm(op);
1449 int pp = GetVEXpp(opPrefix); 1425 int pp = GetVEXpp(opPrefix);
1450 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here 1426 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size
1427 // here
1451 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); 1428 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1452 Write8(op & 0xFF); 1429 Write8(op & 0xFF);
1453 arg.WriteRest(this, extrabytes, regOp1); 1430 arg.WriteRest(this, extrabytes, regOp1);
1454} 1431}
1455 1432
1456// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 1433// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1457void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) 1434void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1458{ 1435 const OpArg& arg, int extrabytes) {
1459 if (size != 32 && size != 64) 1436 if (size != 32 && size != 64)
1460 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); 1437 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1461 int mmmmm = GetVEXmmmmm(op); 1438 int mmmmm = GetVEXmmmmm(op);
@@ -1465,49 +1442,50 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r
1465 arg.WriteRest(this, extrabytes, regOp1); 1442 arg.WriteRest(this, extrabytes, regOp1);
1466} 1443}
1467 1444
1468void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) 1445void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1469{ 1446 const OpArg& arg, int extrabytes) {
1470 CheckFlags(); 1447 CheckFlags();
1471 if (!Common::GetCPUCaps().bmi1) 1448 if (!Common::GetCPUCaps().bmi1)
1472 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); 1449 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1473 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); 1450 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1474} 1451}
1475 1452
1476void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) 1453void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1477{ 1454 const OpArg& arg, int extrabytes) {
1478 CheckFlags(); 1455 CheckFlags();
1479 if (!Common::GetCPUCaps().bmi2) 1456 if (!Common::GetCPUCaps().bmi2)
1480 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer."); 1457 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1481 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); 1458 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1482} 1459}
1483 1460
1484void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);} 1461void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) {
1485void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);} 1462 WriteSSEOp(0x66, 0x6E, dest, arg, 0);
1463}
1464void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) {
1465 WriteSSEOp(0x66, 0x7E, src, arg, 0);
1466}
1486 1467
1487void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) 1468void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
1488{
1489#ifdef ARCHITECTURE_x86_64 1469#ifdef ARCHITECTURE_x86_64
1490 // Alternate encoding 1470 // Alternate encoding
1491 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD 1471 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1492 arg.operandReg = dest; 1472 arg.operandReg = dest;
1493 Write8(0x66); 1473 Write8(0x66);
1494 arg.WriteRex(this, 64, 0); 1474 arg.WriteRex(this, 64, 0);
1495 Write8(0x0f); 1475 Write8(0x0f);
1496 Write8(0x6E); 1476 Write8(0x6E);
1497 arg.WriteRest(this, 0); 1477 arg.WriteRest(this, 0);
1498#else 1478#else
1499 arg.operandReg = dest; 1479 arg.operandReg = dest;
1500 Write8(0xF3); 1480 Write8(0xF3);
1501 Write8(0x0f); 1481 Write8(0x0f);
1502 Write8(0x7E); 1482 Write8(0x7E);
1503 arg.WriteRest(this, 0); 1483 arg.WriteRest(this, 0);
1504#endif 1484#endif
1505} 1485}
1506 1486
1507void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) 1487void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
1508{ 1488 if (src > 7 || arg.IsSimpleReg()) {
1509 if (src > 7 || arg.IsSimpleReg())
1510 {
1511 // Alternate encoding 1489 // Alternate encoding
1512 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD 1490 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1513 arg.operandReg = src; 1491 arg.operandReg = src;
@@ -1516,9 +1494,7 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src)
1516 Write8(0x0f); 1494 Write8(0x0f);
1517 Write8(0x7E); 1495 Write8(0x7E);
1518 arg.WriteRest(this, 0); 1496 arg.WriteRest(this, 0);
1519 } 1497 } else {
1520 else
1521 {
1522 arg.operandReg = src; 1498 arg.operandReg = src;
1523 arg.WriteRex(this, 0, 0); 1499 arg.WriteRex(this, 0, 0);
1524 Write8(0x66); 1500 Write8(0x66);
@@ -1528,8 +1504,7 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src)
1528 } 1504 }
1529} 1505}
1530 1506
1531void XEmitter::WriteMXCSR(OpArg arg, int ext) 1507void XEmitter::WriteMXCSR(OpArg arg, int ext) {
1532{
1533 if (arg.IsImm() || arg.IsSimpleReg()) 1508 if (arg.IsImm() || arg.IsSimpleReg())
1534 ASSERT_MSG(0, "MXCSR - invalid operand"); 1509 ASSERT_MSG(0, "MXCSR - invalid operand");
1535 1510
@@ -1540,143 +1515,357 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext)
1540 arg.WriteRest(this); 1515 arg.WriteRest(this);
1541} 1516}
1542 1517
1543void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);} 1518void XEmitter::STMXCSR(const OpArg& memloc) {
1544void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);} 1519 WriteMXCSR(memloc, 3);
1545 1520}
1546void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} 1521void XEmitter::LDMXCSR(const OpArg& memloc) {
1547void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} 1522 WriteMXCSR(memloc, 2);
1548void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} 1523}
1549 1524
1550void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} 1525void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {
1551void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} 1526 WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);
1552void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} 1527}
1553void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} 1528void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {
1554void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} 1529 WriteSSEOp(0x00, sseMOVNTP, regOp, arg);
1555void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} 1530}
1556void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} 1531void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {
1557void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} 1532 WriteSSEOp(0x66, sseMOVNTP, regOp, arg);
1558void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} 1533}
1559void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} 1534
1560void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} 1535void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {
1561void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} 1536 WriteSSEOp(0xF3, sseADD, regOp, arg);
1562void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} 1537}
1563void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} 1538void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {
1564void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} 1539 WriteSSEOp(0xF2, sseADD, regOp, arg);
1565void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} 1540}
1566void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);} 1541void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {
1567void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} 1542 WriteSSEOp(0xF3, sseSUB, regOp, arg);
1568 1543}
1569void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} 1544void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {
1570void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} 1545 WriteSSEOp(0xF2, sseSUB, regOp, arg);
1571void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} 1546}
1572void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} 1547void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {
1573void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} 1548 WriteSSEOp(0xF3, sseCMP, regOp, arg, 1);
1574void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} 1549 Write8(compare);
1575void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} 1550}
1576void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} 1551void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {
1577void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} 1552 WriteSSEOp(0xF2, sseCMP, regOp, arg, 1);
1578void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} 1553 Write8(compare);
1579void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} 1554}
1580void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} 1555void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {
1581void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} 1556 WriteSSEOp(0xF3, sseMUL, regOp, arg);
1582void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} 1557}
1583void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} 1558void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {
1584void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} 1559 WriteSSEOp(0xF2, sseMUL, regOp, arg);
1585void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} 1560}
1586void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} 1561void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {
1587void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} 1562 WriteSSEOp(0xF3, sseDIV, regOp, arg);
1588void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} 1563}
1589void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} 1564void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {
1590void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} 1565 WriteSSEOp(0xF2, sseDIV, regOp, arg);
1591void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} 1566}
1592void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} 1567void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {
1593void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } 1568 WriteSSEOp(0xF3, sseMIN, regOp, arg);
1594void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} 1569}
1595void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} 1570void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {
1596void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} 1571 WriteSSEOp(0xF2, sseMIN, regOp, arg);
1597 1572}
1598void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} 1573void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {
1599 1574 WriteSSEOp(0xF3, sseMAX, regOp, arg);
1600void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed 1575}
1601void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered 1576void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {
1602void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered 1577 WriteSSEOp(0xF2, sseMAX, regOp, arg);
1603void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} 1578}
1604 1579void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {
1605void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} 1580 WriteSSEOp(0xF3, sseSQRT, regOp, arg);
1606void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} 1581}
1607void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} 1582void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {
1608void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} 1583 WriteSSEOp(0xF2, sseSQRT, regOp, arg);
1609 1584}
1610void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} 1585void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {
1611void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} 1586 WriteSSEOp(0xF3, sseRCP, regOp, arg);
1612void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} 1587}
1613void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} 1588void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {
1614 1589 WriteSSEOp(0xF3, sseRSQRT, regOp, arg);
1615void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} 1590}
1616void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} 1591
1617void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} 1592void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {
1618void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} 1593 WriteSSEOp(0x00, sseADD, regOp, arg);
1619 1594}
1620void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} 1595void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {
1621void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} 1596 WriteSSEOp(0x66, sseADD, regOp, arg);
1622void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} 1597}
1623void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} 1598void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {
1624 1599 WriteSSEOp(0x00, sseSUB, regOp, arg);
1625void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } 1600}
1626void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } 1601void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {
1627void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } 1602 WriteSSEOp(0x66, sseSUB, regOp, arg);
1628void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } 1603}
1629 1604void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {
1630void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } 1605 WriteSSEOp(0x00, sseCMP, regOp, arg, 1);
1631void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } 1606 Write8(compare);
1632void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } 1607}
1633void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } 1608void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {
1634 1609 WriteSSEOp(0x66, sseCMP, regOp, arg, 1);
1635void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} 1610 Write8(compare);
1636void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} 1611}
1637 1612void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {
1638void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} 1613 WriteSSEOp(0x00, sseAND, regOp, arg);
1639void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} 1614}
1640 1615void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {
1641void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} 1616 WriteSSEOp(0x66, sseAND, regOp, arg);
1642void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} 1617}
1643void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} 1618void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {
1644void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} 1619 WriteSSEOp(0x00, sseANDN, regOp, arg);
1645void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} 1620}
1646void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} 1621void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {
1647 1622 WriteSSEOp(0x66, sseANDN, regOp, arg);
1648void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} 1623}
1649void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} 1624void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {
1650void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} 1625 WriteSSEOp(0x00, sseOR, regOp, arg);
1651void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} 1626}
1652 1627void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {
1653void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} 1628 WriteSSEOp(0x66, sseOR, regOp, arg);
1654void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} 1629}
1655void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} 1630void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {
1656void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} 1631 WriteSSEOp(0x00, sseXOR, regOp, arg);
1657 1632}
1658void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} 1633void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {
1659 1634 WriteSSEOp(0x66, sseXOR, regOp, arg);
1660void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);} 1635}
1661void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);} 1636void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {
1662 1637 WriteSSEOp(0x00, sseMUL, regOp, arg);
1663void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only 1638}
1639void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {
1640 WriteSSEOp(0x66, sseMUL, regOp, arg);
1641}
1642void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {
1643 WriteSSEOp(0x00, sseDIV, regOp, arg);
1644}
1645void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {
1646 WriteSSEOp(0x66, sseDIV, regOp, arg);
1647}
1648void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {
1649 WriteSSEOp(0x00, sseMIN, regOp, arg);
1650}
1651void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {
1652 WriteSSEOp(0x66, sseMIN, regOp, arg);
1653}
1654void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {
1655 WriteSSEOp(0x00, sseMAX, regOp, arg);
1656}
1657void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {
1658 WriteSSEOp(0x66, sseMAX, regOp, arg);
1659}
1660void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {
1661 WriteSSEOp(0x00, sseSQRT, regOp, arg);
1662}
1663void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {
1664 WriteSSEOp(0x66, sseSQRT, regOp, arg);
1665}
1666void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) {
1667 WriteSSEOp(0x00, sseRCP, regOp, arg);
1668}
1669void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {
1670 WriteSSEOp(0x00, sseRSQRT, regOp, arg);
1671}
1672void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1673 WriteSSEOp(0x00, sseSHUF, regOp, arg, 1);
1674 Write8(shuffle);
1675}
1676void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1677 WriteSSEOp(0x66, sseSHUF, regOp, arg, 1);
1678 Write8(shuffle);
1679}
1680
1681void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {
1682 WriteSSEOp(0xF2, sseHADD, regOp, arg);
1683}
1684
1685void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {
1686 WriteSSEOp(0x00, sseCOMIS, regOp, arg);
1687} // weird that these should be packed
1688void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {
1689 WriteSSEOp(0x66, sseCOMIS, regOp, arg);
1690} // ordered
1691void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {
1692 WriteSSEOp(0x00, sseUCOMIS, regOp, arg);
1693} // unordered
1694void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {
1695 WriteSSEOp(0x66, sseUCOMIS, regOp, arg);
1696}
1697
1698void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {
1699 WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);
1700}
1701void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {
1702 WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);
1703}
1704void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {
1705 WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);
1706}
1707void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {
1708 WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);
1709}
1710
1711void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {
1712 WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);
1713}
1714void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {
1715 WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);
1716}
1717void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {
1718 WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);
1719}
1720void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {
1721 WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);
1722}
1723
1724void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {
1725 WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);
1726}
1727void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {
1728 WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);
1729}
1730void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {
1731 WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);
1732}
1733void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {
1734 WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);
1735}
1736
1737void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {
1738 WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);
1739}
1740void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {
1741 WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);
1742}
1743void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {
1744 WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);
1745}
1746void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {
1747 WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);
1748}
1749
1750void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) {
1751 WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg);
1752}
1753void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) {
1754 WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg);
1755}
1756void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) {
1757 WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg);
1758}
1759void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) {
1760 WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg);
1761}
1762
1763void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) {
1764 WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg);
1765}
1766void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) {
1767 WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg);
1768}
1769void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) {
1770 WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg);
1771}
1772void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) {
1773 WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg);
1774}
1775
1776void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {
1777 WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));
1778}
1779void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {
1780 WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));
1781}
1782
1783void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {
1784 WriteSSEOp(0x00, 0x5A, regOp, arg);
1785}
1786void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {
1787 WriteSSEOp(0x66, 0x5A, regOp, arg);
1788}
1789
1790void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {
1791 WriteSSEOp(0xF2, 0x5A, regOp, arg);
1792}
1793void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {
1794 WriteSSEOp(0xF3, 0x5A, regOp, arg);
1795}
1796void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {
1797 WriteSSEOp(0xF2, 0x2D, regOp, arg);
1798}
1799void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {
1800 WriteSSEOp(0xF3, 0x2D, regOp, arg);
1801}
1802void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {
1803 WriteSSEOp(0xF2, 0x2A, regOp, arg);
1804}
1805void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {
1806 WriteSSEOp(0xF3, 0x2A, regOp, arg);
1807}
1808
1809void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {
1810 WriteSSEOp(0xF3, 0xE6, regOp, arg);
1811}
1812void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {
1813 WriteSSEOp(0x00, 0x5B, regOp, arg);
1814}
1815void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {
1816 WriteSSEOp(0xF2, 0xE6, regOp, arg);
1817}
1818void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {
1819 WriteSSEOp(0x66, 0x5B, regOp, arg);
1820}
1821
1822void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {
1823 WriteSSEOp(0xF2, 0x2C, regOp, arg);
1824}
1825void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {
1826 WriteSSEOp(0xF3, 0x2C, regOp, arg);
1827}
1828void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {
1829 WriteSSEOp(0xF3, 0x5B, regOp, arg);
1830}
1831void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {
1832 WriteSSEOp(0x66, 0xE6, regOp, arg);
1833}
1834
1835void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {
1836 WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));
1837}
1838
1839void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {
1840 WriteSSEOp(0x00, 0x50, dest, arg);
1841}
1842void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {
1843 WriteSSEOp(0x66, 0x50, dest, arg);
1844}
1845
1846void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {
1847 WriteSSEOp(0xF2, sseLDDQU, dest, arg);
1848} // For integer data only
1664 1849
1665// THESE TWO ARE UNTESTED. 1850// THESE TWO ARE UNTESTED.
1666void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} 1851void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {
1667void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} 1852 WriteSSEOp(0x00, 0x14, dest, arg);
1853}
1854void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {
1855 WriteSSEOp(0x00, 0x15, dest, arg);
1856}
1668 1857
1669void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} 1858void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {
1670void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} 1859 WriteSSEOp(0x66, 0x14, dest, arg);
1860}
1861void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {
1862 WriteSSEOp(0x66, 0x15, dest, arg);
1863}
1671 1864
1672void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) 1865void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) {
1673{ 1866 if (Common::GetCPUCaps().sse3) {
1674 if (Common::GetCPUCaps().sse3) 1867 WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup
1675 { 1868 } else {
1676 WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup
1677 }
1678 else
1679 {
1680 // Simulate this instruction with SSE2 instructions 1869 // Simulate this instruction with SSE2 instructions
1681 if (!arg.IsSimpleReg(regOp)) 1870 if (!arg.IsSimpleReg(regOp))
1682 MOVSD(regOp, arg); 1871 MOVSD(regOp, arg);
@@ -1684,38 +1873,48 @@ void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
1684 } 1873 }
1685} 1874}
1686 1875
1687//There are a few more left 1876// There are a few more left
1688 1877
1689// Also some integer instructions are missing 1878// Also some integer instructions are missing
1690void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} 1879void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {
1691void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);} 1880 WriteSSEOp(0x66, 0x6B, dest, arg);
1692void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);} 1881}
1882void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {
1883 WriteSSEOp(0x66, 0x63, dest, arg);
1884}
1885void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {
1886 WriteSSEOp(0x66, 0x67, dest, arg);
1887}
1693 1888
1694void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} 1889void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) {
1695void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} 1890 WriteSSEOp(0x66, 0x60, dest, arg);
1696void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);} 1891}
1697void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);} 1892void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) {
1893 WriteSSEOp(0x66, 0x61, dest, arg);
1894}
1895void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) {
1896 WriteSSEOp(0x66, 0x62, dest, arg);
1897}
1898void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) {
1899 WriteSSEOp(0x66, 0x6C, dest, arg);
1900}
1698 1901
1699void XEmitter::PSRLW(X64Reg reg, int shift) 1902void XEmitter::PSRLW(X64Reg reg, int shift) {
1700{
1701 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); 1903 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1702 Write8(shift); 1904 Write8(shift);
1703} 1905}
1704 1906
1705void XEmitter::PSRLD(X64Reg reg, int shift) 1907void XEmitter::PSRLD(X64Reg reg, int shift) {
1706{
1707 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); 1908 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1708 Write8(shift); 1909 Write8(shift);
1709} 1910}
1710 1911
1711void XEmitter::PSRLQ(X64Reg reg, int shift) 1912void XEmitter::PSRLQ(X64Reg reg, int shift) {
1712{
1713 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); 1913 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1714 Write8(shift); 1914 Write8(shift);
1715} 1915}
1716 1916
1717void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) 1917void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) {
1718{
1719 WriteSSEOp(0x66, 0xd3, reg, arg); 1918 WriteSSEOp(0x66, 0xd3, reg, arg);
1720} 1919}
1721 1920
@@ -1724,20 +1923,17 @@ void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1724 Write8(shift); 1923 Write8(shift);
1725} 1924}
1726 1925
1727void XEmitter::PSLLW(X64Reg reg, int shift) 1926void XEmitter::PSLLW(X64Reg reg, int shift) {
1728{
1729 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); 1927 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1730 Write8(shift); 1928 Write8(shift);
1731} 1929}
1732 1930
1733void XEmitter::PSLLD(X64Reg reg, int shift) 1931void XEmitter::PSLLD(X64Reg reg, int shift) {
1734{
1735 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); 1932 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1736 Write8(shift); 1933 Write8(shift);
1737} 1934}
1738 1935
1739void XEmitter::PSLLQ(X64Reg reg, int shift) 1936void XEmitter::PSLLQ(X64Reg reg, int shift) {
1740{
1741 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); 1937 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1742 Write8(shift); 1938 Write8(shift);
1743} 1939}
@@ -1747,267 +1943,643 @@ void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1747 Write8(shift); 1943 Write8(shift);
1748} 1944}
1749 1945
1750void XEmitter::PSRAW(X64Reg reg, int shift) 1946void XEmitter::PSRAW(X64Reg reg, int shift) {
1751{
1752 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg)); 1947 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1753 Write8(shift); 1948 Write8(shift);
1754} 1949}
1755 1950
1756void XEmitter::PSRAD(X64Reg reg, int shift) 1951void XEmitter::PSRAD(X64Reg reg, int shift) {
1757{
1758 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg)); 1952 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1759 Write8(shift); 1953 Write8(shift);
1760} 1954}
1761 1955
1762void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) 1956void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1763{
1764 if (!Common::GetCPUCaps().ssse3) 1957 if (!Common::GetCPUCaps().ssse3)
1765 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); 1958 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1766 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); 1959 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1767} 1960}
1768 1961
1769void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) 1962void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1770{
1771 if (!Common::GetCPUCaps().sse4_1) 1963 if (!Common::GetCPUCaps().sse4_1)
1772 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); 1964 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1773 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); 1965 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1774} 1966}
1775 1967
1776void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} 1968void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {
1777void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} 1969 WriteSSSE3Op(0x66, 0x3800, dest, arg);
1778void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} 1970}
1779void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} 1971void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {
1780 1972 WriteSSE41Op(0x66, 0x3817, dest, arg);
1781void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} 1973}
1782void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} 1974void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {
1783void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} 1975 WriteSSE41Op(0x66, 0x382b, dest, arg);
1784void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} 1976}
1785void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} 1977void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {
1786void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} 1978 WriteSSE41Op(0x66, 0x3A40, dest, arg, 1);
1787void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} 1979 Write8(mask);
1788void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} 1980}
1789 1981
1790void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} 1982void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {
1791void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} 1983 WriteSSE41Op(0x66, 0x3838, dest, arg);
1792void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} 1984}
1793void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} 1985void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {
1794void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} 1986 WriteSSE41Op(0x66, 0x3839, dest, arg);
1795void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} 1987}
1796void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} 1988void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {
1797void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} 1989 WriteSSE41Op(0x66, 0x383a, dest, arg);
1798void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} 1990}
1799void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} 1991void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {
1800void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} 1992 WriteSSE41Op(0x66, 0x383b, dest, arg);
1801void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} 1993}
1802 1994void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {
1803void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} 1995 WriteSSE41Op(0x66, 0x383c, dest, arg);
1804void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} 1996}
1805void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} 1997void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {
1806void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } 1998 WriteSSE41Op(0x66, 0x383d, dest, arg);
1807void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } 1999}
1808 2000void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {
1809void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} 2001 WriteSSE41Op(0x66, 0x383e, dest, arg);
1810void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} 2002}
1811void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} 2003void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {
1812void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} 2004 WriteSSE41Op(0x66, 0x383f, dest, arg);
1813 2005}
1814void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} 2006
1815void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} 2007void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {
1816void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} 2008 WriteSSE41Op(0x66, 0x3820, dest, arg);
1817void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} 2009}
1818 2010void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {
1819void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} 2011 WriteSSE41Op(0x66, 0x3821, dest, arg);
1820void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} 2012}
1821void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} 2013void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {
1822void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} 2014 WriteSSE41Op(0x66, 0x3822, dest, arg);
1823 2015}
1824void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} 2016void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {
1825void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);} 2017 WriteSSE41Op(0x66, 0x3823, dest, arg);
1826void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} 2018}
1827void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} 2019void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {
1828 2020 WriteSSE41Op(0x66, 0x3824, dest, arg);
1829void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} 2021}
1830void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} 2022void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {
1831void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} 2023 WriteSSE41Op(0x66, 0x3825, dest, arg);
1832void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} 2024}
1833 2025void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {
1834void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} 2026 WriteSSE41Op(0x66, 0x3830, dest, arg);
1835void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} 2027}
1836void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} 2028void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {
1837void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} 2029 WriteSSE41Op(0x66, 0x3831, dest, arg);
1838 2030}
1839void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} 2031void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {
1840void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} 2032 WriteSSE41Op(0x66, 0x3832, dest, arg);
1841 2033}
1842void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);} 2034void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {
1843void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);} 2035 WriteSSE41Op(0x66, 0x3833, dest, arg);
1844void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);} 2036}
1845 2037void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {
1846void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);} 2038 WriteSSE41Op(0x66, 0x3834, dest, arg);
1847void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);} 2039}
1848void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);} 2040void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {
1849 2041 WriteSSE41Op(0x66, 0x3835, dest, arg);
1850void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} 2042}
1851void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} 2043
1852 2044void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {
1853void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } 2045 WriteSSE41Op(0x66, 0x3810, dest, arg);
1854void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} 2046}
1855 2047void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {
1856void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } 2048 WriteSSE41Op(0x66, 0x3814, dest, arg);
1857void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } 2049}
1858void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } 2050void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {
1859void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } 2051 WriteSSE41Op(0x66, 0x3815, dest, arg);
1860 2052}
1861void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } 2053void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) {
1862void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} 2054 WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1);
1863void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} 2055 Write8(blend);
1864void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} 2056}
2057void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) {
2058 WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1);
2059 Write8(blend);
2060}
2061
2062void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {
2063 WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1);
2064 Write8(mode);
2065}
2066void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {
2067 WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1);
2068 Write8(mode);
2069}
2070void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {
2071 WriteSSE41Op(0x66, 0x3A08, dest, arg, 1);
2072 Write8(mode);
2073}
2074void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {
2075 WriteSSE41Op(0x66, 0x3A09, dest, arg, 1);
2076 Write8(mode);
2077}
2078
2079void XEmitter::PAND(X64Reg dest, const OpArg& arg) {
2080 WriteSSEOp(0x66, 0xDB, dest, arg);
2081}
2082void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {
2083 WriteSSEOp(0x66, 0xDF, dest, arg);
2084}
2085void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {
2086 WriteSSEOp(0x66, 0xEF, dest, arg);
2087}
2088void XEmitter::POR(X64Reg dest, const OpArg& arg) {
2089 WriteSSEOp(0x66, 0xEB, dest, arg);
2090}
2091
2092void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {
2093 WriteSSEOp(0x66, 0xFC, dest, arg);
2094}
2095void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {
2096 WriteSSEOp(0x66, 0xFD, dest, arg);
2097}
2098void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {
2099 WriteSSEOp(0x66, 0xFE, dest, arg);
2100}
2101void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {
2102 WriteSSEOp(0x66, 0xD4, dest, arg);
2103}
2104
2105void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {
2106 WriteSSEOp(0x66, 0xEC, dest, arg);
2107}
2108void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {
2109 WriteSSEOp(0x66, 0xED, dest, arg);
2110}
2111void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {
2112 WriteSSEOp(0x66, 0xDC, dest, arg);
2113}
2114void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {
2115 WriteSSEOp(0x66, 0xDD, dest, arg);
2116}
2117
2118void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {
2119 WriteSSEOp(0x66, 0xF8, dest, arg);
2120}
2121void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {
2122 WriteSSEOp(0x66, 0xF9, dest, arg);
2123}
2124void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {
2125 WriteSSEOp(0x66, 0xFA, dest, arg);
2126}
2127void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {
2128 WriteSSEOp(0x66, 0xFB, dest, arg);
2129}
2130
2131void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {
2132 WriteSSEOp(0x66, 0xE8, dest, arg);
2133}
2134void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {
2135 WriteSSEOp(0x66, 0xE9, dest, arg);
2136}
2137void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {
2138 WriteSSEOp(0x66, 0xD8, dest, arg);
2139}
2140void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {
2141 WriteSSEOp(0x66, 0xD9, dest, arg);
2142}
2143
2144void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {
2145 WriteSSEOp(0x66, 0xE0, dest, arg);
2146}
2147void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {
2148 WriteSSEOp(0x66, 0xE3, dest, arg);
2149}
2150
2151void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {
2152 WriteSSEOp(0x66, 0x74, dest, arg);
2153}
2154void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {
2155 WriteSSEOp(0x66, 0x75, dest, arg);
2156}
2157void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {
2158 WriteSSEOp(0x66, 0x76, dest, arg);
2159}
2160
2161void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {
2162 WriteSSEOp(0x66, 0x64, dest, arg);
2163}
2164void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {
2165 WriteSSEOp(0x66, 0x65, dest, arg);
2166}
2167void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {
2168 WriteSSEOp(0x66, 0x66, dest, arg);
2169}
2170
2171void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2172 WriteSSEOp(0x66, 0xC5, dest, arg, 1);
2173 Write8(subreg);
2174}
2175void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2176 WriteSSEOp(0x66, 0xC4, dest, arg, 1);
2177 Write8(subreg);
2178}
2179
2180void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {
2181 WriteSSEOp(0x66, 0xF5, dest, arg);
2182}
2183void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {
2184 WriteSSEOp(0x66, 0xF6, dest, arg);
2185}
2186
2187void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {
2188 WriteSSEOp(0x66, 0xEE, dest, arg);
2189}
2190void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {
2191 WriteSSEOp(0x66, 0xDE, dest, arg);
2192}
2193void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {
2194 WriteSSEOp(0x66, 0xEA, dest, arg);
2195}
2196void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {
2197 WriteSSEOp(0x66, 0xDA, dest, arg);
2198}
2199
2200void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {
2201 WriteSSEOp(0x66, 0xD7, dest, arg);
2202}
2203void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2204 WriteSSEOp(0x66, 0x70, regOp, arg, 1);
2205 Write8(shuffle);
2206}
2207void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2208 WriteSSEOp(0xF2, 0x70, regOp, arg, 1);
2209 Write8(shuffle);
2210}
2211void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2212 WriteSSEOp(0xF3, 0x70, regOp, arg, 1);
2213 Write8(shuffle);
2214}
1865 2215
1866// VEX 2216// VEX
1867void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} 2217void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1868void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} 2218 WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);
1869void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} 2219}
1870void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} 2220void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1871void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} 2221 WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);
1872void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} 2222}
1873void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} 2223void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1874void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} 2224 WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);
1875void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} 2225}
1876void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} 2226void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1877void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} 2227 WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);
1878void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} 2228}
1879 2229void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1880void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } 2230 WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);
1881void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } 2231}
1882void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } 2232void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1883void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } 2233 WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);
1884void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } 2234}
1885void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } 2235void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1886void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } 2236 WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);
1887void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } 2237}
1888 2238void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1889void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } 2239 WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);
1890void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } 2240}
1891void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } 2241void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1892void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } 2242 WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);
1893 2243}
1894void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } 2244void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {
1895void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } 2245 WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1);
1896void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } 2246 Write8(shuffle);
1897void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } 2247}
1898void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } 2248void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1899void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } 2249 WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);
1900void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } 2250}
1901void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } 2251void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1902void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } 2252 WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);
1903void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } 2253}
1904void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } 2254
1905void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } 2255void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1906void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } 2256 WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg);
1907void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } 2257}
1908void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } 2258void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1909void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } 2259 WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg);
1910void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } 2260}
1911void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } 2261void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1912void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } 2262 WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg);
1913void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } 2263}
1914void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } 2264void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1915void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } 2265 WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg);
1916void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } 2266}
1917void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } 2267void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1918void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } 2268 WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg);
1919void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } 2269}
1920void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } 2270void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1921void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } 2271 WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg);
1922void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } 2272}
1923void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } 2273void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1924void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } 2274 WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg);
1925void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } 2275}
1926void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } 2276void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1927void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } 2277 WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg);
1928void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } 2278}
1929void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } 2279
1930void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } 2280void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1931void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } 2281 WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg);
1932void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } 2282}
1933void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } 2283void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1934void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } 2284 WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg);
1935void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } 2285}
1936void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } 2286void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1937void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } 2287 WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg);
1938void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } 2288}
1939void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } 2289void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1940void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } 2290 WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg);
1941void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } 2291}
1942void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } 2292
1943void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } 2293void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1944void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } 2294 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg);
1945void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } 2295}
1946void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } 2296void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1947void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } 2297 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg);
1948void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } 2298}
1949void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } 2299void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1950void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } 2300 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg);
1951void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } 2301}
1952void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } 2302void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1953void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } 2303 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1);
1954 2304}
1955void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} 2305void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1956void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} 2306 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1);
1957void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} 2307}
1958void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} 2308void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1959void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} 2309 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1);
1960void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} 2310}
1961void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} 2311void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1962void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} 2312 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg);
1963void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} 2313}
1964void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} 2314void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
1965void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} 2315 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg);
1966void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} 2316}
1967void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} 2317void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2318 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg);
2319}
2320void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2321 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1);
2322}
2323void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2324 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1);
2325}
2326void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2327 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1);
2328}
2329void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2330 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg);
2331}
2332void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2333 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg);
2334}
2335void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2336 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg);
2337}
2338void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2339 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1);
2340}
2341void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2342 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1);
2343}
2344void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2345 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1);
2346}
2347void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2348 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg);
2349}
2350void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2351 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg);
2352}
2353void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2354 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg);
2355}
2356void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2357 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1);
2358}
2359void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2360 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1);
2361}
2362void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2363 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1);
2364}
2365void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2366 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg);
2367}
2368void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2369 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg);
2370}
2371void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2372 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg);
2373}
2374void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2375 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1);
2376}
2377void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2378 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1);
2379}
2380void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2381 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1);
2382}
2383void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2384 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg);
2385}
2386void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2387 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg);
2388}
2389void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2390 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg);
2391}
2392void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2393 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1);
2394}
2395void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2396 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1);
2397}
2398void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2399 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1);
2400}
2401void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2402 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg);
2403}
2404void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2405 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg);
2406}
2407void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2408 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg);
2409}
2410void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2411 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1);
2412}
2413void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2414 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1);
2415}
2416void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2417 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1);
2418}
2419void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2420 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg);
2421}
2422void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2423 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg);
2424}
2425void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2426 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg);
2427}
2428void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2429 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1);
2430}
2431void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2432 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1);
2433}
2434void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2435 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1);
2436}
2437void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2438 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg);
2439}
2440void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2441 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg);
2442}
2443void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2444 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg);
2445}
2446void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2447 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1);
2448}
2449void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2450 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1);
2451}
2452void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2453 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1);
2454}
2455void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2456 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg);
2457}
2458void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2459 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg);
2460}
2461void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2462 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg);
2463}
2464void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2465 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1);
2466}
2467void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2468 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1);
2469}
2470void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2471 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1);
2472}
2473
2474void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2475 WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);
2476}
2477void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2478 WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);
2479}
2480void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2481 WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);
2482}
2483void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {
2484 WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1);
2485 Write8(rotate);
2486}
2487void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2488 WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);
2489}
2490void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2491 WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);
2492}
2493void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2494 WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);
2495}
2496void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2497 WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);
2498}
2499void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {
2500 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);
2501}
2502void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {
2503 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);
2504}
2505void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {
2506 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);
2507}
2508void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2509 WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);
2510}
2511void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2512 WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);
2513}
1968 2514
1969// Prefixes 2515// Prefixes
1970 2516
1971void XEmitter::LOCK() { Write8(0xF0); } 2517void XEmitter::LOCK() {
1972void XEmitter::REP() { Write8(0xF3); } 2518 Write8(0xF0);
1973void XEmitter::REPNE() { Write8(0xF2); } 2519}
1974void XEmitter::FSOverride() { Write8(0x64); } 2520void XEmitter::REP() {
1975void XEmitter::GSOverride() { Write8(0x65); } 2521 Write8(0xF3);
2522}
2523void XEmitter::REPNE() {
2524 Write8(0xF2);
2525}
2526void XEmitter::FSOverride() {
2527 Write8(0x64);
2528}
2529void XEmitter::GSOverride() {
2530 Write8(0x65);
2531}
1976 2532
1977void XEmitter::FWAIT() 2533void XEmitter::FWAIT() {
1978{
1979 Write8(0x9B); 2534 Write8(0x9B);
1980} 2535}
1981 2536
1982// TODO: make this more generic 2537// TODO: make this more generic
1983void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) 2538void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) {
1984{
1985 int mf = 0; 2539 int mf = 0;
1986 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); 2540 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID),
1987 switch (bits) 2541 "WriteFloatLoadStore: 80 bits not supported for this instruction");
1988 { 2542 switch (bits) {
1989 case 32: mf = 0; break; 2543 case 32:
1990 case 64: mf = 4; break; 2544 mf = 0;
1991 case 80: mf = 2; break; 2545 break;
1992 default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); 2546 case 64:
2547 mf = 4;
2548 break;
2549 case 80:
2550 mf = 2;
2551 break;
2552 default:
2553 ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
1993 } 2554 }
1994 Write8(0xd9 | mf); 2555 Write8(0xd9 | mf);
1995 // x87 instructions use the reg field of the ModR/M byte as opcode: 2556 // x87 instructions use the reg field of the ModR/M byte as opcode:
1996 if (bits == 80) 2557 if (bits == 80)
1997 op = op_80b; 2558 op = op_80b;
1998 arg.WriteRest(this, 0, (X64Reg) op); 2559 arg.WriteRest(this, 0, (X64Reg)op);
1999} 2560}
2000 2561
2001void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} 2562void XEmitter::FLD(int bits, const OpArg& src) {
2002void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} 2563 WriteFloatLoadStore(bits, floatLD, floatLD80, src);
2003void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} 2564}
2004void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } 2565void XEmitter::FST(int bits, const OpArg& dest) {
2566 WriteFloatLoadStore(bits, floatST, floatINVALID, dest);
2567}
2568void XEmitter::FSTP(int bits, const OpArg& dest) {
2569 WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);
2570}
2571void XEmitter::FNSTSW_AX() {
2572 Write8(0xDF);
2573 Write8(0xE0);
2574}
2005 2575
2006void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } 2576void XEmitter::RDTSC() {
2577 Write8(0x0F);
2578 Write8(0x31);
2579}
2007 2580
2008void XCodeBlock::PoisonMemory() { 2581void XCodeBlock::PoisonMemory() {
2009 // x86/64: 0xCC = breakpoint 2582 // x86/64: 0xCC = breakpoint
2010 memset(region, 0xCC, region_size); 2583 memset(region, 0xCC, region_size);
2011} 2584}
2012
2013} 2585}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 60a77dfe1..467f7812f 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -21,8 +21,8 @@
21 21
22#include "common/assert.h" 22#include "common/assert.h"
23#include "common/bit_set.h" 23#include "common/bit_set.h"
24#include "common/common_types.h"
25#include "common/code_block.h" 24#include "common/code_block.h"
25#include "common/common_types.h"
26 26
27#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64) 27#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64)
28#define _ARCH_64 28#define _ARCH_64
@@ -34,75 +34,145 @@
34#define PTRBITS 32 34#define PTRBITS 32
35#endif 35#endif
36 36
37namespace Gen 37namespace Gen {
38{ 38
39 39enum X64Reg {
40enum X64Reg 40 EAX = 0,
41{ 41 EBX = 3,
42 EAX = 0, EBX = 3, ECX = 1, EDX = 2, 42 ECX = 1,
43 ESI = 6, EDI = 7, EBP = 5, ESP = 4, 43 EDX = 2,
44 44 ESI = 6,
45 RAX = 0, RBX = 3, RCX = 1, RDX = 2, 45 EDI = 7,
46 RSI = 6, RDI = 7, RBP = 5, RSP = 4, 46 EBP = 5,
47 R8 = 8, R9 = 9, R10 = 10,R11 = 11, 47 ESP = 4,
48 R12 = 12,R13 = 13,R14 = 14,R15 = 15, 48
49 49 RAX = 0,
50 AL = 0, BL = 3, CL = 1, DL = 2, 50 RBX = 3,
51 SIL = 6, DIL = 7, BPL = 5, SPL = 4, 51 RCX = 1,
52 AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106, 52 RDX = 2,
53 53 RSI = 6,
54 AX = 0, BX = 3, CX = 1, DX = 2, 54 RDI = 7,
55 SI = 6, DI = 7, BP = 5, SP = 4, 55 RBP = 5,
56 56 RSP = 4,
57 XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 57 R8 = 8,
58 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, 58 R9 = 9,
59 59 R10 = 10,
60 YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 60 R11 = 11,
61 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15, 61 R12 = 12,
62 R13 = 13,
63 R14 = 14,
64 R15 = 15,
65
66 AL = 0,
67 BL = 3,
68 CL = 1,
69 DL = 2,
70 SIL = 6,
71 DIL = 7,
72 BPL = 5,
73 SPL = 4,
74 AH = 0x104,
75 BH = 0x107,
76 CH = 0x105,
77 DH = 0x106,
78
79 AX = 0,
80 BX = 3,
81 CX = 1,
82 DX = 2,
83 SI = 6,
84 DI = 7,
85 BP = 5,
86 SP = 4,
87
88 XMM0 = 0,
89 XMM1,
90 XMM2,
91 XMM3,
92 XMM4,
93 XMM5,
94 XMM6,
95 XMM7,
96 XMM8,
97 XMM9,
98 XMM10,
99 XMM11,
100 XMM12,
101 XMM13,
102 XMM14,
103 XMM15,
104
105 YMM0 = 0,
106 YMM1,
107 YMM2,
108 YMM3,
109 YMM4,
110 YMM5,
111 YMM6,
112 YMM7,
113 YMM8,
114 YMM9,
115 YMM10,
116 YMM11,
117 YMM12,
118 YMM13,
119 YMM14,
120 YMM15,
62 121
63 INVALID_REG = 0xFFFFFFFF 122 INVALID_REG = 0xFFFFFFFF
64}; 123};
65 124
66enum CCFlags 125enum CCFlags {
67{ 126 CC_O = 0,
68 CC_O = 0, 127 CC_NO = 1,
69 CC_NO = 1, 128 CC_B = 2,
70 CC_B = 2, CC_C = 2, CC_NAE = 2, 129 CC_C = 2,
71 CC_NB = 3, CC_NC = 3, CC_AE = 3, 130 CC_NAE = 2,
72 CC_Z = 4, CC_E = 4, 131 CC_NB = 3,
73 CC_NZ = 5, CC_NE = 5, 132 CC_NC = 3,
74 CC_BE = 6, CC_NA = 6, 133 CC_AE = 3,
75 CC_NBE = 7, CC_A = 7, 134 CC_Z = 4,
76 CC_S = 8, 135 CC_E = 4,
77 CC_NS = 9, 136 CC_NZ = 5,
78 CC_P = 0xA, CC_PE = 0xA, 137 CC_NE = 5,
79 CC_NP = 0xB, CC_PO = 0xB, 138 CC_BE = 6,
80 CC_L = 0xC, CC_NGE = 0xC, 139 CC_NA = 6,
81 CC_NL = 0xD, CC_GE = 0xD, 140 CC_NBE = 7,
82 CC_LE = 0xE, CC_NG = 0xE, 141 CC_A = 7,
83 CC_NLE = 0xF, CC_G = 0xF 142 CC_S = 8,
143 CC_NS = 9,
144 CC_P = 0xA,
145 CC_PE = 0xA,
146 CC_NP = 0xB,
147 CC_PO = 0xB,
148 CC_L = 0xC,
149 CC_NGE = 0xC,
150 CC_NL = 0xD,
151 CC_GE = 0xD,
152 CC_LE = 0xE,
153 CC_NG = 0xE,
154 CC_NLE = 0xF,
155 CC_G = 0xF
84}; 156};
85 157
86enum 158enum {
87{
88 NUMGPRs = 16, 159 NUMGPRs = 16,
89 NUMXMMs = 16, 160 NUMXMMs = 16,
90}; 161};
91 162
92enum 163enum {
93{
94 SCALE_NONE = 0, 164 SCALE_NONE = 0,
95 SCALE_1 = 1, 165 SCALE_1 = 1,
96 SCALE_2 = 2, 166 SCALE_2 = 2,
97 SCALE_4 = 4, 167 SCALE_4 = 4,
98 SCALE_8 = 8, 168 SCALE_8 = 8,
99 SCALE_ATREG = 16, 169 SCALE_ATREG = 16,
100 //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG 170 // SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
101 SCALE_NOBASE_2 = 34, 171 SCALE_NOBASE_2 = 34,
102 SCALE_NOBASE_4 = 36, 172 SCALE_NOBASE_4 = 36,
103 SCALE_NOBASE_8 = 40, 173 SCALE_NOBASE_8 = 40,
104 SCALE_RIP = 0xFF, 174 SCALE_RIP = 0xFF,
105 SCALE_IMM8 = 0xF0, 175 SCALE_IMM8 = 0xF0,
106 SCALE_IMM16 = 0xF1, 176 SCALE_IMM16 = 0xF1,
107 SCALE_IMM32 = 0xF2, 177 SCALE_IMM32 = 0xF2,
108 SCALE_IMM64 = 0xF3, 178 SCALE_IMM64 = 0xF3,
@@ -114,7 +184,7 @@ enum NormalOp {
114 nrmSUB, 184 nrmSUB,
115 nrmSBB, 185 nrmSBB,
116 nrmAND, 186 nrmAND,
117 nrmOR , 187 nrmOR,
118 nrmXOR, 188 nrmXOR,
119 nrmMOV, 189 nrmMOV,
120 nrmTEST, 190 nrmTEST,
@@ -157,68 +227,74 @@ enum FloatRound {
157class XEmitter; 227class XEmitter;
158 228
159// RIP addressing does not benefit from micro op fusion on Core arch 229// RIP addressing does not benefit from micro op fusion on Core arch
160struct OpArg 230struct OpArg {
161{
162 friend class XEmitter; 231 friend class XEmitter;
163 232
164 constexpr OpArg() = default; // dummy op arg, used for storage 233 constexpr OpArg() = default; // dummy op arg, used for storage
165 constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX) 234 constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
166 : scale(static_cast<u8>(scale_)) 235 : scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)),
167 , offsetOrBaseReg(static_cast<u16>(rmReg)) 236 indexReg(static_cast<u16>(scaledReg)), offset(offset_) {
168 , indexReg(static_cast<u16>(scaledReg))
169 , offset(offset_)
170 {
171 } 237 }
172 238
173 constexpr bool operator==(const OpArg &b) const 239 constexpr bool operator==(const OpArg& b) const {
174 { 240 return operandReg == b.operandReg && scale == b.scale &&
175 return operandReg == b.operandReg && 241 offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset;
176 scale == b.scale &&
177 offsetOrBaseReg == b.offsetOrBaseReg &&
178 indexReg == b.indexReg &&
179 offset == b.offset;
180 } 242 }
181 243
182 void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; 244 void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const;
183 void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; 245 void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
184 void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; 246 int W = 0) const;
185 void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); 247 void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG,
186 void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const; 248 bool warn_64bit_offset = true) const;
187 249 void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits);
188 constexpr bool IsImm() const { return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64; } 250 void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
189 constexpr bool IsSimpleReg() const { return scale == SCALE_NONE; } 251 int bits) const;
190 constexpr bool IsSimpleReg(X64Reg reg) const 252
191 { 253 constexpr bool IsImm() const {
254 return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 ||
255 scale == SCALE_IMM64;
256 }
257 constexpr bool IsSimpleReg() const {
258 return scale == SCALE_NONE;
259 }
260 constexpr bool IsSimpleReg(X64Reg reg) const {
192 return IsSimpleReg() && GetSimpleReg() == reg; 261 return IsSimpleReg() && GetSimpleReg() == reg;
193 } 262 }
194 263
195 int GetImmBits() const 264 int GetImmBits() const {
196 { 265 switch (scale) {
197 switch (scale) 266 case SCALE_IMM8:
198 { 267 return 8;
199 case SCALE_IMM8: return 8; 268 case SCALE_IMM16:
200 case SCALE_IMM16: return 16; 269 return 16;
201 case SCALE_IMM32: return 32; 270 case SCALE_IMM32:
202 case SCALE_IMM64: return 64; 271 return 32;
203 default: return -1; 272 case SCALE_IMM64:
273 return 64;
274 default:
275 return -1;
204 } 276 }
205 } 277 }
206 278
207 void SetImmBits(int bits) { 279 void SetImmBits(int bits) {
208 switch (bits) 280 switch (bits) {
209 { 281 case 8:
210 case 8: scale = SCALE_IMM8; break; 282 scale = SCALE_IMM8;
211 case 16: scale = SCALE_IMM16; break; 283 break;
212 case 32: scale = SCALE_IMM32; break; 284 case 16:
213 case 64: scale = SCALE_IMM64; break; 285 scale = SCALE_IMM16;
286 break;
287 case 32:
288 scale = SCALE_IMM32;
289 break;
290 case 64:
291 scale = SCALE_IMM64;
292 break;
214 } 293 }
215 } 294 }
216 295
217 constexpr X64Reg GetSimpleReg() const 296 constexpr X64Reg GetSimpleReg() const {
218 { 297 return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG;
219 return scale == SCALE_NONE
220 ? static_cast<X64Reg>(offsetOrBaseReg)
221 : INVALID_REG;
222 } 298 }
223 299
224 constexpr u32 GetImmValue() const { 300 constexpr u32 GetImmValue() const {
@@ -234,41 +310,50 @@ private:
234 u8 scale = 0; 310 u8 scale = 0;
235 u16 offsetOrBaseReg = 0; 311 u16 offsetOrBaseReg = 0;
236 u16 indexReg = 0; 312 u16 indexReg = 0;
237 u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available. 313 u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
238 u16 operandReg = 0; 314 u16 operandReg = 0;
239}; 315};
240 316
241template <typename T> 317template <typename T>
242inline OpArg M(const T *ptr) { return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); } 318inline OpArg M(const T* ptr) {
243constexpr OpArg R(X64Reg value) { return OpArg(0, SCALE_NONE, value); } 319 return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP));
244constexpr OpArg MatR(X64Reg value) { return OpArg(0, SCALE_ATREG, value); } 320}
321constexpr OpArg R(X64Reg value) {
322 return OpArg(0, SCALE_NONE, value);
323}
324constexpr OpArg MatR(X64Reg value) {
325 return OpArg(0, SCALE_ATREG, value);
326}
245 327
246constexpr OpArg MDisp(X64Reg value, int offset) 328constexpr OpArg MDisp(X64Reg value, int offset) {
247{
248 return OpArg(static_cast<u32>(offset), SCALE_ATREG, value); 329 return OpArg(static_cast<u32>(offset), SCALE_ATREG, value);
249} 330}
250 331
251constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) 332constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) {
252{
253 return OpArg(offset, scale, base, scaled); 333 return OpArg(offset, scale, base, scaled);
254} 334}
255 335
256constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) 336constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) {
257{ 337 return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled)
258 return scale == SCALE_1 338 : OpArg(offset, scale | 0x20, RAX, scaled);
259 ? OpArg(offset, SCALE_ATREG, scaled)
260 : OpArg(offset, scale | 0x20, RAX, scaled);
261} 339}
262 340
263constexpr OpArg MRegSum(X64Reg base, X64Reg offset) 341constexpr OpArg MRegSum(X64Reg base, X64Reg offset) {
264{
265 return MComplex(base, offset, 1, 0); 342 return MComplex(base, offset, 1, 0);
266} 343}
267 344
268constexpr OpArg Imm8 (u8 imm) { return OpArg(imm, SCALE_IMM8); } 345constexpr OpArg Imm8(u8 imm) {
269constexpr OpArg Imm16(u16 imm) { return OpArg(imm, SCALE_IMM16); } //rarely used 346 return OpArg(imm, SCALE_IMM8);
270constexpr OpArg Imm32(u32 imm) { return OpArg(imm, SCALE_IMM32); } 347}
271constexpr OpArg Imm64(u64 imm) { return OpArg(imm, SCALE_IMM64); } 348constexpr OpArg Imm16(u16 imm) {
349 return OpArg(imm, SCALE_IMM16);
350} // rarely used
351constexpr OpArg Imm32(u32 imm) {
352 return OpArg(imm, SCALE_IMM32);
353}
354constexpr OpArg Imm64(u64 imm) {
355 return OpArg(imm, SCALE_IMM64);
356}
272constexpr OpArg UImmAuto(u32 imm) { 357constexpr OpArg UImmAuto(u32 imm) {
273 return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8); 358 return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
274} 359}
@@ -277,8 +362,7 @@ constexpr OpArg SImmAuto(s32 imm) {
277} 362}
278 363
279template <typename T> 364template <typename T>
280OpArg ImmPtr(const T* imm) 365OpArg ImmPtr(const T* imm) {
281{
282#ifdef _ARCH_64 366#ifdef _ARCH_64
283 return Imm64(reinterpret_cast<u64>(imm)); 367 return Imm64(reinterpret_cast<u64>(imm));
284#else 368#else
@@ -286,36 +370,31 @@ OpArg ImmPtr(const T* imm)
286#endif 370#endif
287} 371}
288 372
289inline u32 PtrOffset(const void* ptr, const void* base) 373inline u32 PtrOffset(const void* ptr, const void* base) {
290{
291#ifdef _ARCH_64 374#ifdef _ARCH_64
292 s64 distance = (s64)ptr-(s64)base; 375 s64 distance = (s64)ptr - (s64)base;
293 if (distance >= 0x80000000LL || 376 if (distance >= 0x80000000LL || distance < -0x80000000LL) {
294 distance < -0x80000000LL)
295 {
296 ASSERT_MSG(0, "pointer offset out of range"); 377 ASSERT_MSG(0, "pointer offset out of range");
297 return 0; 378 return 0;
298 } 379 }
299 380
300 return (u32)distance; 381 return (u32)distance;
301#else 382#else
302 return (u32)ptr-(u32)base; 383 return (u32)ptr - (u32)base;
303#endif 384#endif
304} 385}
305 386
306//usage: int a[]; ARRAY_OFFSET(a,10) 387// usage: int a[]; ARRAY_OFFSET(a,10)
307#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0])) 388#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0]))
308//usage: struct {int e;} s; STRUCT_OFFSET(s,e) 389// usage: struct {int e;} s; STRUCT_OFFSET(s,e)
309#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) 390#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
310 391
311struct FixupBranch 392struct FixupBranch {
312{ 393 u8* ptr;
313 u8 *ptr; 394 int type; // 0 = 8bit 1 = 32bit
314 int type; //0 = 8bit 1 = 32bit
315}; 395};
316 396
317enum SSECompare 397enum SSECompare {
318{
319 EQ = 0, 398 EQ = 0,
320 LT, 399 LT,
321 LE, 400 LE,
@@ -326,11 +405,10 @@ enum SSECompare
326 ORD, 405 ORD,
327}; 406};
328 407
329class XEmitter 408class XEmitter {
330{ 409 friend struct OpArg; // for Write8 etc
331 friend struct OpArg; // for Write8 etc
332private: 410private:
333 u8 *code; 411 u8* code;
334 bool flags_locked; 412 bool flags_locked;
335 413
336 void CheckFlags(); 414 void CheckFlags();
@@ -347,14 +425,19 @@ private:
347 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); 425 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
348 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); 426 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
349 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); 427 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
350 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); 428 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
351 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); 429 int extrabytes = 0);
352 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); 430 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
353 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); 431 int extrabytes = 0);
432 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
433 int extrabytes = 0);
434 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
435 int extrabytes = 0);
354 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); 436 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
355 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); 437 void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
356 438
357 void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); 439 void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
440 size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
358 441
359protected: 442protected:
360 void Write8(u8 value); 443 void Write8(u8 value);
@@ -363,26 +446,38 @@ protected:
363 void Write64(u64 value); 446 void Write64(u64 value);
364 447
365public: 448public:
366 XEmitter() { code = nullptr; flags_locked = false; } 449 XEmitter() {
367 XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; } 450 code = nullptr;
368 virtual ~XEmitter() {} 451 flags_locked = false;
452 }
453 XEmitter(u8* code_ptr) {
454 code = code_ptr;
455 flags_locked = false;
456 }
457 virtual ~XEmitter() {
458 }
369 459
370 void WriteModRM(int mod, int rm, int reg); 460 void WriteModRM(int mod, int rm, int reg);
371 void WriteSIB(int scale, int index, int base); 461 void WriteSIB(int scale, int index, int base);
372 462
373 void SetCodePtr(u8 *ptr); 463 void SetCodePtr(u8* ptr);
374 void ReserveCodeSpace(int bytes); 464 void ReserveCodeSpace(int bytes);
375 const u8 *AlignCode4(); 465 const u8* AlignCode4();
376 const u8 *AlignCode16(); 466 const u8* AlignCode16();
377 const u8 *AlignCodePage(); 467 const u8* AlignCodePage();
378 const u8 *GetCodePtr() const; 468 const u8* GetCodePtr() const;
379 u8 *GetWritableCodePtr(); 469 u8* GetWritableCodePtr();
380 470
381 void LockFlags() { flags_locked = true; } 471 void LockFlags() {
382 void UnlockFlags() { flags_locked = false; } 472 flags_locked = true;
473 }
474 void UnlockFlags() {
475 flags_locked = false;
476 }
383 477
384 // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU 478 // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
385 // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., 479 // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other
480 // string instr.,
386 // INC and DEC are slow on Intel Core, but not on AMD. They create a 481 // INC and DEC are slow on Intel Core, but not on AMD. They create a
387 // false flag dependency because they only update a subset of the flags. 482 // false flag dependency because they only update a subset of the flags.
388 // XCHG is SLOW and should be avoided. 483 // XCHG is SLOW and should be avoided.
@@ -401,11 +496,11 @@ public:
401 void CLC(); 496 void CLC();
402 void CMC(); 497 void CMC();
403 498
404 // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD! 499 // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and
500 // AMD!
405 void LAHF(); // 3 cycle vector path 501 void LAHF(); // 3 cycle vector path
406 void SAHF(); // direct path fast 502 void SAHF(); // direct path fast
407 503
408
409 // Stack control 504 // Stack control
410 void PUSH(X64Reg reg); 505 void PUSH(X64Reg reg);
411 void POP(X64Reg reg); 506 void POP(X64Reg reg);
@@ -422,7 +517,7 @@ public:
422 517
423 void JMP(const u8* addr, bool force5Bytes = false); 518 void JMP(const u8* addr, bool force5Bytes = false);
424 void JMPptr(const OpArg& arg); 519 void JMPptr(const OpArg& arg);
425 void JMPself(); //infinite loop! 520 void JMPself(); // infinite loop!
426#ifdef CALL 521#ifdef CALL
427#undef CALL 522#undef CALL
428#endif 523#endif
@@ -450,12 +545,11 @@ public:
450 void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit 545 void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
451 546
452 // Cache control 547 // Cache control
453 enum PrefetchLevel 548 enum PrefetchLevel {
454 { 549 PF_NTA, // Non-temporal (data used once and only once)
455 PF_NTA, //Non-temporal (data used once and only once) 550 PF_T0, // All cache levels
456 PF_T0, //All cache levels 551 PF_T1, // Levels 2+ (aliased to T0 on AMD)
457 PF_T1, //Levels 2+ (aliased to T0 on AMD) 552 PF_T2, // Levels 3+ (aliased to T0 on AMD)
458 PF_T2, //Levels 3+ (aliased to T0 on AMD)
459 }; 553 };
460 void PREFETCH(PrefetchLevel level, OpArg arg); 554 void PREFETCH(PrefetchLevel level, OpArg arg);
461 void MOVNTI(int bits, const OpArg& dest, X64Reg src); 555 void MOVNTI(int bits, const OpArg& dest, X64Reg src);
@@ -464,8 +558,8 @@ public:
464 void MOVNTPD(const OpArg& arg, X64Reg regOp); 558 void MOVNTPD(const OpArg& arg, X64Reg regOp);
465 559
466 // Multiplication / division 560 // Multiplication / division
467 void MUL(int bits, const OpArg& src); //UNSIGNED 561 void MUL(int bits, const OpArg& src); // UNSIGNED
468 void IMUL(int bits, const OpArg& src); //SIGNED 562 void IMUL(int bits, const OpArg& src); // SIGNED
469 void IMUL(int bits, X64Reg regOp, const OpArg& src); 563 void IMUL(int bits, X64Reg regOp, const OpArg& src);
470 void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); 564 void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
471 void DIV(int bits, const OpArg& src); 565 void DIV(int bits, const OpArg& src);
@@ -492,11 +586,19 @@ public:
492 586
493 // Extend EAX into EDX in various ways 587 // Extend EAX into EDX in various ways
494 void CWD(int bits = 16); 588 void CWD(int bits = 16);
495 void CDQ() {CWD(32);} 589 void CDQ() {
496 void CQO() {CWD(64);} 590 CWD(32);
591 }
592 void CQO() {
593 CWD(64);
594 }
497 void CBW(int bits = 8); 595 void CBW(int bits = 8);
498 void CWDE() {CBW(16);} 596 void CWDE() {
499 void CDQE() {CBW(32);} 597 CBW(16);
598 }
599 void CDQE() {
600 CBW(32);
601 }
500 602
501 // Load effective address 603 // Load effective address
502 void LEA(int bits, X64Reg dest, OpArg src); 604 void LEA(int bits, X64Reg dest, OpArg src);
@@ -511,7 +613,7 @@ public:
511 void CMP(int bits, const OpArg& a1, const OpArg& a2); 613 void CMP(int bits, const OpArg& a1, const OpArg& a2);
512 614
513 // Bit operations 615 // Bit operations
514 void NOT (int bits, const OpArg& src); 616 void NOT(int bits, const OpArg& src);
515 void OR(int bits, const OpArg& a1, const OpArg& a2); 617 void OR(int bits, const OpArg& a1, const OpArg& a2);
516 void XOR(int bits, const OpArg& a1, const OpArg& a2); 618 void XOR(int bits, const OpArg& a1, const OpArg& a2);
517 void MOV(int bits, const OpArg& a1, const OpArg& a2); 619 void MOV(int bits, const OpArg& a1, const OpArg& a2);
@@ -525,7 +627,8 @@ public:
525 void BSWAP(int bits, X64Reg reg); 627 void BSWAP(int bits, X64Reg reg);
526 628
527 // Sign/zero extension 629 // Sign/zero extension
528 void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary 630 void MOVSX(int dbits, int sbits, X64Reg dest,
631 OpArg src); // automatically uses MOVSXD if necessary
529 void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); 632 void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
530 633
531 // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe. 634 // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe.
@@ -593,13 +696,27 @@ public:
593 void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); 696 void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
594 void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); 697 void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
595 698
596 void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } 699 void CMPEQSS(X64Reg regOp, const OpArg& arg) {
597 void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } 700 CMPSS(regOp, arg, CMP_EQ);
598 void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } 701 }
599 void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } 702 void CMPLTSS(X64Reg regOp, const OpArg& arg) {
600 void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } 703 CMPSS(regOp, arg, CMP_LT);
601 void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } 704 }
602 void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); } 705 void CMPLESS(X64Reg regOp, const OpArg& arg) {
706 CMPSS(regOp, arg, CMP_LE);
707 }
708 void CMPUNORDSS(X64Reg regOp, const OpArg& arg) {
709 CMPSS(regOp, arg, CMP_UNORD);
710 }
711 void CMPNEQSS(X64Reg regOp, const OpArg& arg) {
712 CMPSS(regOp, arg, CMP_NEQ);
713 }
714 void CMPNLTSS(X64Reg regOp, const OpArg& arg) {
715 CMPSS(regOp, arg, CMP_NLT);
716 }
717 void CMPORDSS(X64Reg regOp, const OpArg& arg) {
718 CMPSS(regOp, arg, CMP_ORD);
719 }
603 720
604 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) 721 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
605 void ADDPS(X64Reg regOp, const OpArg& arg); 722 void ADDPS(X64Reg regOp, const OpArg& arg);
@@ -638,10 +755,12 @@ public:
638 // SSE/SSE2: Useful alternative to shuffle in some cases. 755 // SSE/SSE2: Useful alternative to shuffle in some cases.
639 void MOVDDUP(X64Reg regOp, const OpArg& arg); 756 void MOVDDUP(X64Reg regOp, const OpArg& arg);
640 757
641 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. 758 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily
759 // on Ivy.
642 void HADDPS(X64Reg dest, const OpArg& src); 760 void HADDPS(X64Reg dest, const OpArg& src);
643 761
644 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". 762 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg
763 // contains both a read mask and a write "mask".
645 void DPPS(X64Reg dest, const OpArg& src, u8 arg); 764 void DPPS(X64Reg dest, const OpArg& src, u8 arg);
646 765
647 void UNPCKLPS(X64Reg dest, const OpArg& src); 766 void UNPCKLPS(X64Reg dest, const OpArg& src);
@@ -694,11 +813,13 @@ public:
694 void MOVD_xmm(const OpArg& arg, X64Reg src); 813 void MOVD_xmm(const OpArg& arg, X64Reg src);
695 void MOVQ_xmm(OpArg arg, X64Reg src); 814 void MOVQ_xmm(OpArg arg, X64Reg src);
696 815
697 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. 816 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in
817 // question.
698 void MOVMSKPS(X64Reg dest, const OpArg& arg); 818 void MOVMSKPS(X64Reg dest, const OpArg& arg);
699 void MOVMSKPD(X64Reg dest, const OpArg& arg); 819 void MOVMSKPD(X64Reg dest, const OpArg& arg);
700 820
701 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. 821 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a
822 // weird one.
702 void MASKMOVDQU(X64Reg dest, X64Reg src); 823 void MASKMOVDQU(X64Reg dest, X64Reg src);
703 void LDDQU(X64Reg dest, const OpArg& src); 824 void LDDQU(X64Reg dest, const OpArg& src);
704 825
@@ -729,10 +850,10 @@ public:
729 void PACKUSDW(X64Reg dest, const OpArg& arg); 850 void PACKUSDW(X64Reg dest, const OpArg& arg);
730 void PACKUSWB(X64Reg dest, const OpArg& arg); 851 void PACKUSWB(X64Reg dest, const OpArg& arg);
731 852
732 void PUNPCKLBW(X64Reg dest, const OpArg &arg); 853 void PUNPCKLBW(X64Reg dest, const OpArg& arg);
733 void PUNPCKLWD(X64Reg dest, const OpArg &arg); 854 void PUNPCKLWD(X64Reg dest, const OpArg& arg);
734 void PUNPCKLDQ(X64Reg dest, const OpArg &arg); 855 void PUNPCKLDQ(X64Reg dest, const OpArg& arg);
735 void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); 856 void PUNPCKLQDQ(X64Reg dest, const OpArg& arg);
736 857
737 void PTEST(X64Reg dest, const OpArg& arg); 858 void PTEST(X64Reg dest, const OpArg& arg);
738 void PAND(X64Reg dest, const OpArg& arg); 859 void PAND(X64Reg dest, const OpArg& arg);
@@ -839,25 +960,57 @@ public:
839 void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); 960 void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
840 void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); 961 void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
841 962
842 void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } 963 void ROUNDNEARSS(X64Reg dest, const OpArg& arg) {
843 void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } 964 ROUNDSS(dest, arg, FROUND_NEAREST);
844 void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } 965 }
845 void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } 966 void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) {
967 ROUNDSS(dest, arg, FROUND_FLOOR);
968 }
969 void ROUNDCEILSS(X64Reg dest, const OpArg& arg) {
970 ROUNDSS(dest, arg, FROUND_CEIL);
971 }
972 void ROUNDZEROSS(X64Reg dest, const OpArg& arg) {
973 ROUNDSS(dest, arg, FROUND_ZERO);
974 }
846 975
847 void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } 976 void ROUNDNEARSD(X64Reg dest, const OpArg& arg) {
848 void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } 977 ROUNDSD(dest, arg, FROUND_NEAREST);
849 void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } 978 }
850 void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } 979 void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) {
980 ROUNDSD(dest, arg, FROUND_FLOOR);
981 }
982 void ROUNDCEILSD(X64Reg dest, const OpArg& arg) {
983 ROUNDSD(dest, arg, FROUND_CEIL);
984 }
985 void ROUNDZEROSD(X64Reg dest, const OpArg& arg) {
986 ROUNDSD(dest, arg, FROUND_ZERO);
987 }
851 988
852 void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } 989 void ROUNDNEARPS(X64Reg dest, const OpArg& arg) {
853 void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } 990 ROUNDPS(dest, arg, FROUND_NEAREST);
854 void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } 991 }
855 void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } 992 void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) {
993 ROUNDPS(dest, arg, FROUND_FLOOR);
994 }
995 void ROUNDCEILPS(X64Reg dest, const OpArg& arg) {
996 ROUNDPS(dest, arg, FROUND_CEIL);
997 }
998 void ROUNDZEROPS(X64Reg dest, const OpArg& arg) {
999 ROUNDPS(dest, arg, FROUND_ZERO);
1000 }
856 1001
857 void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } 1002 void ROUNDNEARPD(X64Reg dest, const OpArg& arg) {
858 void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } 1003 ROUNDPD(dest, arg, FROUND_NEAREST);
859 void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } 1004 }
860 void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); } 1005 void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) {
1006 ROUNDPD(dest, arg, FROUND_FLOOR);
1007 }
1008 void ROUNDCEILPD(X64Reg dest, const OpArg& arg) {
1009 ROUNDPD(dest, arg, FROUND_CEIL);
1010 }
1011 void ROUNDZEROPD(X64Reg dest, const OpArg& arg) {
1012 ROUNDPD(dest, arg, FROUND_ZERO);
1013 }
861 1014
862 // AVX 1015 // AVX
863 void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); 1016 void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
@@ -981,7 +1134,6 @@ public:
981 void ABI_CallFunctionC16(const void* func, u16 param1); 1134 void ABI_CallFunctionC16(const void* func, u16 param1);
982 void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); 1135 void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
983 1136
984
985 // These only support u32 parameters, but that's enough for a lot of uses. 1137 // These only support u32 parameters, but that's enough for a lot of uses.
986 // These will destroy the 1 or 2 first "parameter regs". 1138 // These will destroy the 1 or 2 first "parameter regs".
987 void ABI_CallFunctionC(const void* func, u32 param1); 1139 void ABI_CallFunctionC(const void* func, u32 param1);
@@ -1012,29 +1164,38 @@ public:
1012 * 1164 *
1013 * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs) 1165 * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
1014 * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8 1166 * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
1015 * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack 1167 * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the
1168 * stack
1016 * @return Size of the shadow space, i.e., offset of the frame 1169 * @return Size of the shadow space, i.e., offset of the frame
1017 */ 1170 */
1018 size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); 1171 size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
1172 size_t needed_frame_size = 0);
1019 1173
1020 /** 1174 /**
1021 * Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before 1175 * Restores specified registers and adjusts the stack to its original alignment, i.e., the
1176 * alignment before
1022 * the matching PushRegistersAndAdjustStack. 1177 * the matching PushRegistersAndAdjustStack.
1023 * 1178 *
1024 * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs) 1179 * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are
1025 * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8 1180 * GPRs)
1181 * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must
1182 * be 0 or 8
1026 * @param needed_frame_size Additional space that was needed 1183 * @param needed_frame_size Additional space that was needed
1027 * @warning Stack must be currently 16-byte aligned 1184 * @warning Stack must be currently 16-byte aligned
1028 */ 1185 */
1029 void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); 1186 void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
1030 1187 size_t needed_frame_size = 0);
1031 #ifdef _M_IX86
1032 static int ABI_GetNumXMMRegs() { return 8; }
1033 #else
1034 static int ABI_GetNumXMMRegs() { return 16; }
1035 #endif
1036}; // class XEmitter
1037 1188
1189#ifdef _M_IX86
1190 static int ABI_GetNumXMMRegs() {
1191 return 8;
1192 }
1193#else
1194 static int ABI_GetNumXMMRegs() {
1195 return 16;
1196 }
1197#endif
1198}; // class XEmitter
1038 1199
1039// Everything that needs to generate X86 code should inherit from this. 1200// Everything that needs to generate X86 code should inherit from this.
1040// You get memory management for free, plus, you can use all the MOV etc functions without 1201// You get memory management for free, plus, you can use all the MOV etc functions without
@@ -1045,4 +1206,4 @@ public:
1045 void PoisonMemory() override; 1206 void PoisonMemory() override;
1046}; 1207};
1047 1208
1048} // namespace 1209} // namespace