summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
authorGravatar bunnei2015-07-21 20:08:49 -0400
committerGravatar bunnei2015-08-15 17:33:45 -0400
commitddbeebb887cff61b087a48738650832bc62c9e83 (patch)
tree130ff470aa19d7cdbdd2a8183ae4fcb12e061fc8 /src/common
parentCommon: Ported over Dolphin's code for x86 CPU capability detection. (diff)
downloadyuzu-ddbeebb887cff61b087a48738650832bc62c9e83.tar.gz
yuzu-ddbeebb887cff61b087a48738650832bc62c9e83.tar.xz
yuzu-ddbeebb887cff61b087a48738650832bc62c9e83.zip
Common: Ported over boilerplate x86 JIT code from Dolphin/PPSSPP.
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt14
-rw-r--r--src/common/abi.cpp680
-rw-r--r--src/common/abi.h78
-rw-r--r--src/common/code_block.h87
-rw-r--r--src/common/common_funcs.h2
-rw-r--r--src/common/fake_emitter.h465
-rw-r--r--src/common/platform.h2
-rw-r--r--src/common/x64_emitter.cpp1989
-rw-r--r--src/common/x64_emitter.h1067
9 files changed, 4380 insertions, 4 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index cef5081c5..600193858 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -2,6 +2,7 @@
2configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) 2configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY)
3 3
4set(SRCS 4set(SRCS
5 abi.cpp
5 break_points.cpp 6 break_points.cpp
6 emu_window.cpp 7 emu_window.cpp
7 file_util.cpp 8 file_util.cpp
@@ -20,10 +21,12 @@ set(SRCS
20 ) 21 )
21 22
22set(HEADERS 23set(HEADERS
24 abi.h
23 assert.h 25 assert.h
24 bit_field.h 26 bit_field.h
25 break_points.h 27 break_points.h
26 chunk_file.h 28 chunk_file.h
29 code_block.h
27 color.h 30 color.h
28 common_funcs.h 31 common_funcs.h
29 common_paths.h 32 common_paths.h
@@ -58,10 +61,17 @@ set(HEADERS
58 61
59if(_M_X86) 62if(_M_X86)
60 set(SRCS ${SRCS} 63 set(SRCS ${SRCS}
61 cpu_detect_x86.cpp) 64 cpu_detect_x86.cpp
65 x64_emitter.cpp)
66
67 set(HEADERS ${HEADERS}
68 x64_emitter.h)
62else() 69else()
63 set(SRCS ${SRCS} 70 set(SRCS ${SRCS}
64 cpu_detect_generic.cpp) 71 cpu_detect_generic.cpp)
72
73 set(HEADERS ${HEADERS}
74 fake_emitter.h)
65endif() 75endif()
66 76
67create_directory_groups(${SRCS} ${HEADERS}) 77create_directory_groups(${SRCS} ${HEADERS})
diff --git a/src/common/abi.cpp b/src/common/abi.cpp
new file mode 100644
index 000000000..d1892ad48
--- /dev/null
+++ b/src/common/abi.cpp
@@ -0,0 +1,680 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include "x64_emitter.h"
19#include "abi.h"
20
21using namespace Gen;
22
23// Shared code between Win64 and Unix64
24
25// Sets up a __cdecl function.
26void XEmitter::ABI_EmitPrologue(int maxCallParams)
27{
28#ifdef _M_IX86
29 // Don't really need to do anything
30#elif defined(_M_X86_64)
31#if _WIN32
32 int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8;
33 // Set up a stack frame so that we can call functions
34 // TODO: use maxCallParams
35 SUB(64, R(RSP), Imm8(stacksize));
36#endif
37#else
38#error Arch not supported
39#endif
40}
41
42void XEmitter::ABI_EmitEpilogue(int maxCallParams)
43{
44#ifdef _M_IX86
45 RET();
46#elif defined(_M_X86_64)
47#ifdef _WIN32
48 int stacksize = ((maxCallParams+1)&~1)*8 + 8;
49 ADD(64, R(RSP), Imm8(stacksize));
50#endif
51 RET();
52#else
53#error Arch not supported
54
55
56#endif
57}
58
59#ifdef _M_IX86 // All32
60
61// Shared code between Win32 and Unix32
62void XEmitter::ABI_CallFunction(const void *func) {
63 ABI_AlignStack(0);
64 CALL(func);
65 ABI_RestoreStack(0);
66}
67
68void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
69 ABI_AlignStack(1 * 2);
70 PUSH(16, Imm16(param1));
71 CALL(func);
72 ABI_RestoreStack(1 * 2);
73}
74
75void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
76 ABI_AlignStack(1 * 2 + 1 * 4);
77 PUSH(16, Imm16(param2));
78 PUSH(32, Imm32(param1));
79 CALL(func);
80 ABI_RestoreStack(1 * 2 + 1 * 4);
81}
82
83void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
84 ABI_AlignStack(1 * 4);
85 PUSH(32, Imm32(param1));
86 CALL(func);
87 ABI_RestoreStack(1 * 4);
88}
89
90void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
91 ABI_AlignStack(2 * 4);
92 PUSH(32, Imm32(param2));
93 PUSH(32, Imm32(param1));
94 CALL(func);
95 ABI_RestoreStack(2 * 4);
96}
97
98void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
99 ABI_AlignStack(3 * 4);
100 PUSH(32, Imm32(param3));
101 PUSH(32, Imm32(param2));
102 PUSH(32, Imm32(param1));
103 CALL(func);
104 ABI_RestoreStack(3 * 4);
105}
106
107void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
108 ABI_AlignStack(3 * 4);
109 PUSH(32, ImmPtr(param3));
110 PUSH(32, Imm32(param2));
111 PUSH(32, Imm32(param1));
112 CALL(func);
113 ABI_RestoreStack(3 * 4);
114}
115
116void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) {
117 ABI_AlignStack(4 * 4);
118 PUSH(32, ImmPtr(param4));
119 PUSH(32, Imm32(param3));
120 PUSH(32, Imm32(param2));
121 PUSH(32, Imm32(param1));
122 CALL(func);
123 ABI_RestoreStack(4 * 4);
124}
125
126void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
127 ABI_AlignStack(1 * 4);
128 PUSH(32, ImmPtr(param1));
129 CALL(func);
130 ABI_RestoreStack(1 * 4);
131}
132
133void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
134 ABI_AlignStack(2 * 4);
135 PUSH(32, arg2);
136 PUSH(32, ImmPtr(param1));
137 CALL(func);
138 ABI_RestoreStack(2 * 4);
139}
140
141void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
142 ABI_AlignStack(3 * 4);
143 PUSH(32, arg3);
144 PUSH(32, arg2);
145 PUSH(32, ImmPtr(param1));
146 CALL(func);
147 ABI_RestoreStack(3 * 4);
148}
149
150void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
151 ABI_AlignStack(3 * 4);
152 PUSH(32, Imm32(param3));
153 PUSH(32, ImmPtr(param2));
154 PUSH(32, ImmPtr(param1));
155 CALL(func);
156 ABI_RestoreStack(3 * 4);
157}
158
159// Pass a register as a parameter.
160void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
161 ABI_AlignStack(1 * 4);
162 PUSH(32, R(reg1));
163 CALL(func);
164 ABI_RestoreStack(1 * 4);
165}
166
167// Pass two registers as parameters.
168void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
169{
170 ABI_AlignStack(2 * 4);
171 PUSH(32, R(reg2));
172 PUSH(32, R(reg1));
173 CALL(func);
174 ABI_RestoreStack(2 * 4);
175}
176
177void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
178{
179 ABI_AlignStack(2 * 4);
180 PUSH(32, Imm32(param2));
181 PUSH(32, arg1);
182 CALL(func);
183 ABI_RestoreStack(2 * 4);
184}
185
186void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
187{
188 ABI_AlignStack(3 * 4);
189 PUSH(32, Imm32(param3));
190 PUSH(32, Imm32(param2));
191 PUSH(32, arg1);
192 CALL(func);
193 ABI_RestoreStack(3 * 4);
194}
195
196void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
197{
198 ABI_AlignStack(1 * 4);
199 PUSH(32, arg1);
200 CALL(func);
201 ABI_RestoreStack(1 * 4);
202}
203
204void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
205{
206 ABI_AlignStack(2 * 4);
207 PUSH(32, arg2);
208 PUSH(32, arg1);
209 CALL(func);
210 ABI_RestoreStack(2 * 4);
211}
212
213void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
214 // Note: 4 * 4 = 16 bytes, so alignment is preserved.
215 PUSH(EBP);
216 PUSH(EBX);
217 PUSH(ESI);
218 PUSH(EDI);
219}
220
221void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
222 POP(EDI);
223 POP(ESI);
224 POP(EBX);
225 POP(EBP);
226}
227
228unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
229 frameSize += 4; // reserve space for return address
230 unsigned int alignedSize =
231#ifdef __GNUC__
232 (frameSize + 15) & -16;
233#else
234 (frameSize + 3) & -4;
235#endif
236 return alignedSize;
237}
238
239
240void XEmitter::ABI_AlignStack(unsigned int frameSize) {
241// Mac OS X requires the stack to be 16-byte aligned before every call.
242// Linux requires the stack to be 16-byte aligned before calls that put SSE
243// vectors on the stack, but since we do not keep track of which calls do that,
244// it is effectively every call as well.
245// Windows binaries compiled with MSVC do not have such a restriction*, but I
246// expect that GCC on Windows acts the same as GCC on Linux in this respect.
247// It would be nice if someone could verify this.
248// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times.
249 unsigned int fillSize =
250 ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4);
251 if (fillSize != 0) {
252 SUB(32, R(ESP), Imm8(fillSize));
253 }
254}
255
256void XEmitter::ABI_RestoreStack(unsigned int frameSize) {
257 unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize);
258 alignedSize -= 4; // return address is POPped at end of call
259 if (alignedSize != 0) {
260 ADD(32, R(ESP), Imm8(alignedSize));
261 }
262}
263
264#else //64bit
265
266// Common functions
267void XEmitter::ABI_CallFunction(const void *func) {
268 u64 distance = u64(func) - (u64(code) + 5);
269 if (distance >= 0x0000000080000000ULL
270 && distance < 0xFFFFFFFF80000000ULL) {
271 // Far call
272 MOV(64, R(RAX), ImmPtr(func));
273 CALLptr(R(RAX));
274 } else {
275 CALL(func);
276 }
277}
278
279void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
280 MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
281 u64 distance = u64(func) - (u64(code) + 5);
282 if (distance >= 0x0000000080000000ULL
283 && distance < 0xFFFFFFFF80000000ULL) {
284 // Far call
285 MOV(64, R(RAX), ImmPtr(func));
286 CALLptr(R(RAX));
287 } else {
288 CALL(func);
289 }
290}
291
292void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
293 MOV(32, R(ABI_PARAM1), Imm32(param1));
294 MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
295 u64 distance = u64(func) - (u64(code) + 5);
296 if (distance >= 0x0000000080000000ULL
297 && distance < 0xFFFFFFFF80000000ULL) {
298 // Far call
299 MOV(64, R(RAX), ImmPtr(func));
300 CALLptr(R(RAX));
301 } else {
302 CALL(func);
303 }
304}
305
306void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
307 MOV(32, R(ABI_PARAM1), Imm32(param1));
308 u64 distance = u64(func) - (u64(code) + 5);
309 if (distance >= 0x0000000080000000ULL
310 && distance < 0xFFFFFFFF80000000ULL) {
311 // Far call
312 MOV(64, R(RAX), ImmPtr(func));
313 CALLptr(R(RAX));
314 } else {
315 CALL(func);
316 }
317}
318
319void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
320 MOV(32, R(ABI_PARAM1), Imm32(param1));
321 MOV(32, R(ABI_PARAM2), Imm32(param2));
322 u64 distance = u64(func) - (u64(code) + 5);
323 if (distance >= 0x0000000080000000ULL
324 && distance < 0xFFFFFFFF80000000ULL) {
325 // Far call
326 MOV(64, R(RAX), ImmPtr(func));
327 CALLptr(R(RAX));
328 } else {
329 CALL(func);
330 }
331}
332
333void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
334 MOV(32, R(ABI_PARAM1), Imm32(param1));
335 MOV(32, R(ABI_PARAM2), Imm32(param2));
336 MOV(32, R(ABI_PARAM3), Imm32(param3));
337 u64 distance = u64(func) - (u64(code) + 5);
338 if (distance >= 0x0000000080000000ULL
339 && distance < 0xFFFFFFFF80000000ULL) {
340 // Far call
341 MOV(64, R(RAX), ImmPtr(func));
342 CALLptr(R(RAX));
343 } else {
344 CALL(func);
345 }
346}
347
348void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
349 MOV(32, R(ABI_PARAM1), Imm32(param1));
350 MOV(32, R(ABI_PARAM2), Imm32(param2));
351 MOV(64, R(ABI_PARAM3), ImmPtr(param3));
352 u64 distance = u64(func) - (u64(code) + 5);
353 if (distance >= 0x0000000080000000ULL
354 && distance < 0xFFFFFFFF80000000ULL) {
355 // Far call
356 MOV(64, R(RAX), ImmPtr(func));
357 CALLptr(R(RAX));
358 } else {
359 CALL(func);
360 }
361}
362
363void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
364 MOV(32, R(ABI_PARAM1), Imm32(param1));
365 MOV(32, R(ABI_PARAM2), Imm32(param2));
366 MOV(32, R(ABI_PARAM3), Imm32(param3));
367 MOV(64, R(ABI_PARAM4), ImmPtr(param4));
368 u64 distance = u64(func) - (u64(code) + 5);
369 if (distance >= 0x0000000080000000ULL
370 && distance < 0xFFFFFFFF80000000ULL) {
371 // Far call
372 MOV(64, R(RAX), ImmPtr(func));
373 CALLptr(R(RAX));
374 } else {
375 CALL(func);
376 }
377}
378
379void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
380 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
381 u64 distance = u64(func) - (u64(code) + 5);
382 if (distance >= 0x0000000080000000ULL
383 && distance < 0xFFFFFFFF80000000ULL) {
384 // Far call
385 MOV(64, R(RAX), ImmPtr(func));
386 CALLptr(R(RAX));
387 } else {
388 CALL(func);
389 }
390}
391
392void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
393 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
394 if (!arg2.IsSimpleReg(ABI_PARAM2))
395 MOV(32, R(ABI_PARAM2), arg2);
396 u64 distance = u64(func) - (u64(code) + 5);
397 if (distance >= 0x0000000080000000ULL
398 && distance < 0xFFFFFFFF80000000ULL) {
399 // Far call
400 MOV(64, R(RAX), ImmPtr(func));
401 CALLptr(R(RAX));
402 } else {
403 CALL(func);
404 }
405}
406
407void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
408 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
409 if (!arg2.IsSimpleReg(ABI_PARAM2))
410 MOV(32, R(ABI_PARAM2), arg2);
411 if (!arg3.IsSimpleReg(ABI_PARAM3))
412 MOV(32, R(ABI_PARAM3), arg3);
413 u64 distance = u64(func) - (u64(code) + 5);
414 if (distance >= 0x0000000080000000ULL
415 && distance < 0xFFFFFFFF80000000ULL) {
416 // Far call
417 MOV(64, R(RAX), ImmPtr(func));
418 CALLptr(R(RAX));
419 } else {
420 CALL(func);
421 }
422}
423
424void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
425 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
426 MOV(64, R(ABI_PARAM2), ImmPtr(param2));
427 MOV(32, R(ABI_PARAM3), Imm32(param3));
428 u64 distance = u64(func) - (u64(code) + 5);
429 if (distance >= 0x0000000080000000ULL
430 && distance < 0xFFFFFFFF80000000ULL) {
431 // Far call
432 MOV(64, R(RAX), ImmPtr(func));
433 CALLptr(R(RAX));
434 } else {
435 CALL(func);
436 }
437}
438
439// Pass a register as a parameter.
440void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
441 if (reg1 != ABI_PARAM1)
442 MOV(32, R(ABI_PARAM1), R(reg1));
443 u64 distance = u64(func) - (u64(code) + 5);
444 if (distance >= 0x0000000080000000ULL
445 && distance < 0xFFFFFFFF80000000ULL) {
446 // Far call
447 MOV(64, R(RAX), ImmPtr(func));
448 CALLptr(R(RAX));
449 } else {
450 CALL(func);
451 }
452}
453
454// Pass two registers as parameters.
455void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
456 if (reg2 != ABI_PARAM1) {
457 if (reg1 != ABI_PARAM1)
458 MOV(64, R(ABI_PARAM1), R(reg1));
459 if (reg2 != ABI_PARAM2)
460 MOV(64, R(ABI_PARAM2), R(reg2));
461 } else {
462 if (reg2 != ABI_PARAM2)
463 MOV(64, R(ABI_PARAM2), R(reg2));
464 if (reg1 != ABI_PARAM1)
465 MOV(64, R(ABI_PARAM1), R(reg1));
466 }
467 u64 distance = u64(func) - (u64(code) + 5);
468 if (distance >= 0x0000000080000000ULL
469 && distance < 0xFFFFFFFF80000000ULL) {
470 // Far call
471 MOV(64, R(RAX), ImmPtr(func));
472 CALLptr(R(RAX));
473 } else {
474 CALL(func);
475 }
476}
477
478void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
479{
480 if (!arg1.IsSimpleReg(ABI_PARAM1))
481 MOV(32, R(ABI_PARAM1), arg1);
482 MOV(32, R(ABI_PARAM2), Imm32(param2));
483 u64 distance = u64(func) - (u64(code) + 5);
484 if (distance >= 0x0000000080000000ULL
485 && distance < 0xFFFFFFFF80000000ULL) {
486 // Far call
487 MOV(64, R(RAX), ImmPtr(func));
488 CALLptr(R(RAX));
489 } else {
490 CALL(func);
491 }
492}
493
494void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
495{
496 if (!arg1.IsSimpleReg(ABI_PARAM1))
497 MOV(32, R(ABI_PARAM1), arg1);
498 MOV(32, R(ABI_PARAM2), Imm32(param2));
499 MOV(64, R(ABI_PARAM3), Imm64(param3));
500 u64 distance = u64(func) - (u64(code) + 5);
501 if (distance >= 0x0000000080000000ULL
502 && distance < 0xFFFFFFFF80000000ULL) {
503 // Far call
504 MOV(64, R(RAX), ImmPtr(func));
505 CALLptr(R(RAX));
506 } else {
507 CALL(func);
508 }
509}
510
511void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
512{
513 if (!arg1.IsSimpleReg(ABI_PARAM1))
514 MOV(32, R(ABI_PARAM1), arg1);
515 u64 distance = u64(func) - (u64(code) + 5);
516 if (distance >= 0x0000000080000000ULL
517 && distance < 0xFFFFFFFF80000000ULL) {
518 // Far call
519 MOV(64, R(RAX), ImmPtr(func));
520 CALLptr(R(RAX));
521 } else {
522 CALL(func);
523 }
524}
525
526void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
527{
528 if (!arg1.IsSimpleReg(ABI_PARAM1))
529 MOV(32, R(ABI_PARAM1), arg1);
530 if (!arg2.IsSimpleReg(ABI_PARAM2))
531 MOV(32, R(ABI_PARAM2), arg2);
532 u64 distance = u64(func) - (u64(code) + 5);
533 if (distance >= 0x0000000080000000ULL
534 && distance < 0xFFFFFFFF80000000ULL) {
535 // Far call
536 MOV(64, R(RAX), ImmPtr(func));
537 CALLptr(R(RAX));
538 } else {
539 CALL(func);
540 }
541}
542
543unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
544 return frameSize;
545}
546
547#ifdef _WIN32
548
549// The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs.
550// But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs.
551// Let's just save all 16.
552const int XMM_STACK_SPACE = 16 * 16;
553
554// Win64 Specific Code
555void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
556 //we only want to do this once
557 PUSH(RBX);
558 PUSH(RSI);
559 PUSH(RDI);
560 PUSH(RBP);
561 PUSH(R12);
562 PUSH(R13);
563 PUSH(R14);
564 PUSH(R15);
565 ABI_AlignStack(0);
566
567 // Do this after aligning, because before it's offset by 8.
568 SUB(64, R(RSP), Imm32(XMM_STACK_SPACE));
569 for (int i = 0; i < 16; ++i)
570 MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i));
571}
572
573void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
574 for (int i = 0; i < 16; ++i)
575 MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16));
576 ADD(64, R(RSP), Imm32(XMM_STACK_SPACE));
577
578 ABI_RestoreStack(0);
579 POP(R15);
580 POP(R14);
581 POP(R13);
582 POP(R12);
583 POP(RBP);
584 POP(RDI);
585 POP(RSI);
586 POP(RBX);
587}
588
589// Win64 Specific Code
590void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
591 PUSH(RCX);
592 PUSH(RDX);
593 PUSH(RSI);
594 PUSH(RDI);
595 PUSH(R8);
596 PUSH(R9);
597 PUSH(R10);
598 PUSH(R11);
599 // TODO: Callers preserve XMM4-5 (XMM0-3 are args.)
600 ABI_AlignStack(0);
601}
602
603void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
604 ABI_RestoreStack(0);
605 POP(R11);
606 POP(R10);
607 POP(R9);
608 POP(R8);
609 POP(RDI);
610 POP(RSI);
611 POP(RDX);
612 POP(RCX);
613}
614
615void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
616 SUB(64, R(RSP), Imm8(0x28));
617}
618
619void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
620 ADD(64, R(RSP), Imm8(0x28));
621}
622
623#else
624// Unix64 Specific Code
625void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
626 PUSH(RBX);
627 PUSH(RBP);
628 PUSH(R12);
629 PUSH(R13);
630 PUSH(R14);
631 PUSH(R15);
632 PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
633 // TODO: XMM?
634}
635
636void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
637 POP(R15);
638 POP(R15);
639 POP(R14);
640 POP(R13);
641 POP(R12);
642 POP(RBP);
643 POP(RBX);
644}
645
646void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
647 PUSH(RCX);
648 PUSH(RDX);
649 PUSH(RSI);
650 PUSH(RDI);
651 PUSH(R8);
652 PUSH(R9);
653 PUSH(R10);
654 PUSH(R11);
655 PUSH(R11);
656}
657
658void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
659 POP(R11);
660 POP(R11);
661 POP(R10);
662 POP(R9);
663 POP(R8);
664 POP(RDI);
665 POP(RSI);
666 POP(RDX);
667 POP(RCX);
668}
669
670void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
671 SUB(64, R(RSP), Imm8(0x08));
672}
673
674void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
675 ADD(64, R(RSP), Imm8(0x08));
676}
677
678#endif // WIN32
679
680#endif // 32bit
diff --git a/src/common/abi.h b/src/common/abi.h
new file mode 100644
index 000000000..bb9f7c95f
--- /dev/null
+++ b/src/common/abi.h
@@ -0,0 +1,78 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#pragma once
19
20#include "common_types.h"
21
22// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code.
23// All convensions return values in EAX (+ possibly EDX).
24
25// Linux 32-bit, Windows 32-bit (cdecl, System V):
26// * Caller pushes left to right
27// * Caller fixes stack after call
28// * function subtract from stack for local storage only.
29// Scratch: EAX ECX EDX
30// Callee-save: EBX ESI EDI EBP
31// Parameters: -
32
33// Windows 64-bit
34// * 4-reg "fastcall" variant, very new-skool stack handling
35// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
36// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
37// Scratch: RAX RCX RDX R8 R9 R10 R11
38// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
39// Parameters: RCX RDX R8 R9, further MOV-ed
40
41// Linux 64-bit
42// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
43// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
44// Callee-save: RBX RBP R12 R13 R14 R15
45// Parameters: RDI RSI RDX RCX R8 R9
46
47#ifdef _M_IX86 // 32 bit calling convention, shared by all
48
49// 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to
50// choose regs to put stuff in.
51#define ABI_PARAM1 RCX
52#define ABI_PARAM2 RDX
53
54// There are no ABI_PARAM* here, since args are pushed.
55// 32-bit bog standard cdecl, shared between linux and windows
56// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about.
57
58#elif _M_X86_64 // 64 bit calling convention
59
60#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
61
62#define ABI_PARAM1 RCX
63#define ABI_PARAM2 RDX
64#define ABI_PARAM3 R8
65#define ABI_PARAM4 R9
66
67#else //64-bit Unix (hopefully MacOSX too)
68
69#define ABI_PARAM1 RDI
70#define ABI_PARAM2 RSI
71#define ABI_PARAM3 RDX
72#define ABI_PARAM4 RCX
73#define ABI_PARAM5 R8
74#define ABI_PARAM6 R9
75
76#endif // WIN32
77
78#endif // X86
diff --git a/src/common/code_block.h b/src/common/code_block.h
new file mode 100644
index 000000000..9ef7296d3
--- /dev/null
+++ b/src/common/code_block.h
@@ -0,0 +1,87 @@
1// Copyright 2013 Dolphin Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common_types.h"
8#include "memory_util.h"
9
10// Everything that needs to generate code should inherit from this.
11// You get memory management for free, plus, you can use all emitter functions without
12// having to prefix them with gen-> or something similar.
13// Example implementation:
14// class JIT : public CodeBlock<ARMXEmitter> {}
15template<class T> class CodeBlock : public T, NonCopyable
16{
17private:
18 // A privately used function to set the executable RAM space to something invalid.
19 // For debugging usefulness it should be used to set the RAM to a host specific breakpoint instruction
20 virtual void PoisonMemory() = 0;
21
22protected:
23 u8 *region;
24 size_t region_size;
25
26public:
27 CodeBlock() : region(nullptr), region_size(0) {}
28 virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
29
30 // Call this before you generate any code.
31 void AllocCodeSpace(int size)
32 {
33 region_size = size;
34 region = (u8*)AllocateExecutableMemory(region_size);
35 T::SetCodePtr(region);
36 }
37
38 // Always clear code space with breakpoints, so that if someone accidentally executes
39 // uninitialized, it just breaks into the debugger.
40 void ClearCodeSpace()
41 {
42 PoisonMemory();
43 ResetCodePtr();
44 }
45
46 // Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
47 void FreeCodeSpace()
48 {
49#ifdef __SYMBIAN32__
50 ResetExecutableMemory(region);
51#else
52 FreeMemoryPages(region, region_size);
53#endif
54 region = nullptr;
55 region_size = 0;
56 }
57
58 bool IsInSpace(const u8 *ptr)
59 {
60 return (ptr >= region) && (ptr < (region + region_size));
61 }
62
63 // Cannot currently be undone. Will write protect the entire code region.
64 // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
65 void WriteProtect()
66 {
67 WriteProtectMemory(region, region_size, true);
68 }
69
70 void ResetCodePtr()
71 {
72 T::SetCodePtr(region);
73 }
74
75 size_t GetSpaceLeft() const
76 {
77 return region_size - (T::GetCodePtr() - region);
78 }
79
80 u8 *GetBasePtr() {
81 return region;
82 }
83
84 size_t GetOffset(const u8 *ptr) const {
85 return ptr - region;
86 }
87};
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 83b47f61e..6fd2b06b2 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -35,7 +35,7 @@
35 35
36#ifndef _MSC_VER 36#ifndef _MSC_VER
37 37
38#if defined(__x86_64__) || defined(_M_X64) 38#if defined(__x86_64__) || defined(_M_X86_64)
39#define Crash() __asm__ __volatile__("int $3") 39#define Crash() __asm__ __volatile__("int $3")
40#elif defined(_M_ARM) 40#elif defined(_M_ARM)
41#define Crash() __asm__ __volatile__("trap") 41#define Crash() __asm__ __volatile__("trap")
diff --git a/src/common/fake_emitter.h b/src/common/fake_emitter.h
new file mode 100644
index 000000000..d6d96a51e
--- /dev/null
+++ b/src/common/fake_emitter.h
@@ -0,0 +1,465 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
19
20#pragma once
21
22#include <vector>
23#include <stdint.h>
24
25#include "assert.h"
26#include "common_types.h"
27
28// TODO: Check if Pandora still needs signal.h/kill here. Symbian doesn't.
29
30// VCVT flags
31#define TO_FLOAT 0
32#define TO_INT 1 << 0
33#define IS_SIGNED 1 << 1
34#define ROUND_TO_ZERO 1 << 2
35
36namespace FakeGen
37{
38enum FakeReg
39{
40 // GPRs
41 R0 = 0, R1, R2, R3, R4, R5,
42 R6, R7, R8, R9, R10, R11,
43
44 // SPRs
45 // R13 - R15 are SP, LR, and PC.
46 // Almost always referred to by name instead of register number
47 R12 = 12, R13 = 13, R14 = 14, R15 = 15,
48 R_IP = 12, R_SP = 13, R_LR = 14, R_PC = 15,
49
50
51 // VFP single precision registers
52 S0, S1, S2, S3, S4, S5, S6,
53 S7, S8, S9, S10, S11, S12, S13,
54 S14, S15, S16, S17, S18, S19, S20,
55 S21, S22, S23, S24, S25, S26, S27,
56 S28, S29, S30, S31,
57
58 // VFP Double Precision registers
59 D0, D1, D2, D3, D4, D5, D6, D7,
60 D8, D9, D10, D11, D12, D13, D14, D15,
61 D16, D17, D18, D19, D20, D21, D22, D23,
62 D24, D25, D26, D27, D28, D29, D30, D31,
63
64 // ASIMD Quad-Word registers
65 Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
66 Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
67
68 // for NEON VLD/VST instructions
69 REG_UPDATE = R13,
70 INVALID_REG = 0xFFFFFFFF
71};
72
73enum CCFlags
74{
75 CC_EQ = 0, // Equal
76 CC_NEQ, // Not equal
77 CC_CS, // Carry Set
78 CC_CC, // Carry Clear
79 CC_MI, // Minus (Negative)
80 CC_PL, // Plus
81 CC_VS, // Overflow
82 CC_VC, // No Overflow
83 CC_HI, // Unsigned higher
84 CC_LS, // Unsigned lower or same
85 CC_GE, // Signed greater than or equal
86 CC_LT, // Signed less than
87 CC_GT, // Signed greater than
88 CC_LE, // Signed less than or equal
89 CC_AL, // Always (unconditional) 14
90 CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
91 CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
92};
93const u32 NO_COND = 0xE0000000;
94
95enum ShiftType
96{
97 ST_LSL = 0,
98 ST_ASL = 0,
99 ST_LSR = 1,
100 ST_ASR = 2,
101 ST_ROR = 3,
102 ST_RRX = 4
103};
104enum IntegerSize
105{
106 I_I8 = 0,
107 I_I16,
108 I_I32,
109 I_I64
110};
111
112enum
113{
114 NUMGPRs = 13,
115};
116
117class FakeXEmitter;
118
119enum OpType
120{
121 TYPE_IMM = 0,
122 TYPE_REG,
123 TYPE_IMMSREG,
124 TYPE_RSR,
125 TYPE_MEM
126};
127
128// This is no longer a proper operand2 class. Need to split up.
129class Operand2
130{
131 friend class FakeXEmitter;
132protected:
133 u32 Value;
134
135private:
136 OpType Type;
137
138 // IMM types
139 u8 Rotation; // Only for u8 values
140
141 // Register types
142 u8 IndexOrShift;
143 ShiftType Shift;
144public:
145 OpType GetType()
146 {
147 return Type;
148 }
149 Operand2() {}
150 Operand2(u32 imm, OpType type = TYPE_IMM)
151 {
152 Type = type;
153 Value = imm;
154 Rotation = 0;
155 }
156
157 Operand2(FakeReg Reg)
158 {
159 Type = TYPE_REG;
160 Value = Reg;
161 Rotation = 0;
162 }
163 Operand2(u8 imm, u8 rotation)
164 {
165 Type = TYPE_IMM;
166 Value = imm;
167 Rotation = rotation;
168 }
169 Operand2(FakeReg base, ShiftType type, FakeReg shift) // RSR
170 {
171 Type = TYPE_RSR;
172 ASSERT_MSG(type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount");
173 IndexOrShift = shift;
174 Shift = type;
175 Value = base;
176 }
177
178 Operand2(FakeReg base, ShiftType type, u8 shift)// For IMM shifted register
179 {
180 if(shift == 32) shift = 0;
181 switch (type)
182 {
183 case ST_LSL:
184 ASSERT_MSG(shift < 32, "Invalid Operand2: LSL %u", shift);
185 break;
186 case ST_LSR:
187 ASSERT_MSG(shift <= 32, "Invalid Operand2: LSR %u", shift);
188 if (!shift)
189 type = ST_LSL;
190 if (shift == 32)
191 shift = 0;
192 break;
193 case ST_ASR:
194 ASSERT_MSG(shift < 32, "Invalid Operand2: ASR %u", shift);
195 if (!shift)
196 type = ST_LSL;
197 if (shift == 32)
198 shift = 0;
199 break;
200 case ST_ROR:
201 ASSERT_MSG(shift < 32, "Invalid Operand2: ROR %u", shift);
202 if (!shift)
203 type = ST_LSL;
204 break;
205 case ST_RRX:
206 ASSERT_MSG(shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount");
207 type = ST_ROR;
208 break;
209 }
210 IndexOrShift = shift;
211 Shift = type;
212 Value = base;
213 Type = TYPE_IMMSREG;
214 }
215 u32 GetData()
216 {
217 switch(Type)
218 {
219 case TYPE_IMM:
220 return Imm12Mod(); // This'll need to be changed later
221 case TYPE_REG:
222 return Rm();
223 case TYPE_IMMSREG:
224 return IMMSR();
225 case TYPE_RSR:
226 return RSR();
227 default:
228 ASSERT_MSG(false, "GetData with Invalid Type");
229 return 0;
230 }
231 }
232 u32 IMMSR() // IMM shifted register
233 {
234 ASSERT_MSG(Type == TYPE_IMMSREG, "IMMSR must be imm shifted register");
235 return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value);
236 }
237 u32 RSR() // Register shifted register
238 {
239 ASSERT_MSG(Type == TYPE_RSR, "RSR must be RSR Of Course");
240 return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value;
241 }
242 u32 Rm()
243 {
244 ASSERT_MSG(Type == TYPE_REG, "Rm must be with Reg");
245 return Value;
246 }
247
248 u32 Imm5()
249 {
250 ASSERT_MSG((Type == TYPE_IMM), "Imm5 not IMM value");
251 return ((Value & 0x0000001F) << 7);
252 }
253 u32 Imm8()
254 {
255 ASSERT_MSG((Type == TYPE_IMM), "Imm8Rot not IMM value");
256 return Value & 0xFF;
257 }
258 u32 Imm8Rot() // IMM8 with Rotation
259 {
260 ASSERT_MSG((Type == TYPE_IMM), "Imm8Rot not IMM value");
261 ASSERT_MSG((Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation);
262 return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF);
263 }
264 u32 Imm12()
265 {
266 ASSERT_MSG((Type == TYPE_IMM), "Imm12 not IMM");
267 return (Value & 0x00000FFF);
268 }
269
270 u32 Imm12Mod()
271 {
272 // This is an IMM12 with the top four bits being rotation and the
273 // bottom eight being an IMM. This is for instructions that need to
274 // expand a 8bit IMM to a 32bit value and gives you some rotation as
275 // well.
276 // Each rotation rotates to the right by 2 bits
277 ASSERT_MSG((Type == TYPE_IMM), "Imm12Mod not IMM");
278 return ((Rotation & 0xF) << 8) | (Value & 0xFF);
279 }
280 u32 Imm16()
281 {
282 ASSERT_MSG((Type == TYPE_IMM), "Imm16 not IMM");
283 return ( (Value & 0xF000) << 4) | (Value & 0x0FFF);
284 }
285 u32 Imm16Low()
286 {
287 return Imm16();
288 }
289 u32 Imm16High() // Returns high 16bits
290 {
291 ASSERT_MSG((Type == TYPE_IMM), "Imm16 not IMM");
292 return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF);
293 }
294 u32 Imm24()
295 {
296 ASSERT_MSG((Type == TYPE_IMM), "Imm16 not IMM");
297 return (Value & 0x0FFFFFFF);
298 }
299};
300
301// Use these when you don't know if an imm can be represented as an operand2.
302// This lets you generate both an optimal and a fallback solution by checking
303// the return value, which will be false if these fail to find a Operand2 that
304// represents your 32-bit imm value.
305bool TryMakeOperand2(u32 imm, Operand2 &op2);
306bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse);
307bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated);
308
309// Use this only when you know imm can be made into an Operand2.
310Operand2 AssumeMakeOperand2(u32 imm);
311
312inline Operand2 R(FakeReg Reg) { return Operand2(Reg, TYPE_REG); }
313inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); }
314inline Operand2 Mem(void *ptr) { return Operand2((u32)(uintptr_t)ptr, TYPE_IMM); }
315//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
316#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str)))
317
318
319struct FixupBranch
320{
321 u8 *ptr;
322 u32 condition; // Remembers our codition at the time
323 int type; //0 = B 1 = BL
324};
325
326typedef const u8* JumpTarget;
327
328// XXX: Stop polluting the global namespace
329const u32 I_8 = (1 << 0);
330const u32 I_16 = (1 << 1);
331const u32 I_32 = (1 << 2);
332const u32 I_64 = (1 << 3);
333const u32 I_SIGNED = (1 << 4);
334const u32 I_UNSIGNED = (1 << 5);
335const u32 F_32 = (1 << 6);
336const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL
337
338u32 EncodeVd(FakeReg Vd);
339u32 EncodeVn(FakeReg Vn);
340u32 EncodeVm(FakeReg Vm);
341
342u32 encodedSize(u32 value);
343
344// Subtracts the base from the register to give us the real one
345FakeReg SubBase(FakeReg Reg);
346
347// See A.7.1 in the Fakev7-A
348// VMUL F32 scalars can only be up to D15[0], D15[1] - higher scalars cannot be individually addressed
349FakeReg DScalar(FakeReg dreg, int subScalar);
350FakeReg QScalar(FakeReg qreg, int subScalar);
351
352enum NEONAlignment {
353 ALIGN_NONE = 0,
354 ALIGN_64 = 1,
355 ALIGN_128 = 2,
356 ALIGN_256 = 3
357};
358
359
360class NEONXEmitter;
361
362class FakeXEmitter
363{
364 friend struct OpArg; // for Write8 etc
365private:
366 u8 *code, *startcode;
367 u8 *lastCacheFlushEnd;
368 u32 condition;
369
370protected:
371 inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
372
373public:
374 FakeXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
375 condition = CC_AL << 28;
376 }
377 FakeXEmitter(u8 *code_ptr) {
378 code = code_ptr;
379 lastCacheFlushEnd = code_ptr;
380 startcode = code_ptr;
381 condition = CC_AL << 28;
382 }
383 virtual ~FakeXEmitter() {}
384
385 void SetCodePtr(u8 *ptr) {}
386 void ReserveCodeSpace(u32 bytes) {}
387 const u8 *AlignCode16() { return nullptr; }
388 const u8 *AlignCodePage() { return nullptr; }
389 const u8 *GetCodePtr() const { return nullptr; }
390 void FlushIcache() {}
391 void FlushIcacheSection(u8 *start, u8 *end) {}
392 u8 *GetWritableCodePtr() { return nullptr; }
393
394 CCFlags GetCC() { return CCFlags(condition >> 28); }
395 void SetCC(CCFlags cond = CC_AL) {}
396
397 // Special purpose instructions
398
399 // Do nothing
400 void NOP(int count = 1) {} //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
401
402#ifdef CALL
403#undef CALL
404#endif
405
406 void QuickCallFunction(FakeReg scratchreg, const void *func);
407 template <typename T> void QuickCallFunction(FakeReg scratchreg, T func) {
408 QuickCallFunction(scratchreg, (const void *)func);
409 }
410}; // class FakeXEmitter
411
412
413// Everything that needs to generate machine code should inherit from this.
414// You get memory management for free, plus, you can use all the MOV etc functions without
415// having to prefix them with gen-> or something similar.
416class FakeXCodeBlock : public FakeXEmitter
417{
418protected:
419 u8 *region;
420 size_t region_size;
421
422public:
423 FakeXCodeBlock() : region(NULL), region_size(0) {}
424 virtual ~FakeXCodeBlock() { if (region) FreeCodeSpace(); }
425
426 // Call this before you generate any code.
427 void AllocCodeSpace(int size) { }
428
429 // Always clear code space with breakpoints, so that if someone accidentally executes
430 // uninitialized, it just breaks into the debugger.
431 void ClearCodeSpace() { }
432
433 // Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
434 void FreeCodeSpace() { }
435
436 bool IsInSpace(const u8 *ptr) const
437 {
438 return ptr >= region && ptr < region + region_size;
439 }
440
441 // Cannot currently be undone. Will write protect the entire code region.
442 // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
443 void WriteProtect() { }
444 void UnWriteProtect() { }
445
446 void ResetCodePtr()
447 {
448 SetCodePtr(region);
449 }
450
451 size_t GetSpaceLeft() const
452 {
453 return region_size - (GetCodePtr() - region);
454 }
455
456 u8 *GetBasePtr() {
457 return region;
458 }
459
460 size_t GetOffset(const u8 *ptr) const {
461 return ptr - region;
462 }
463};
464
465} // namespace
diff --git a/src/common/platform.h b/src/common/platform.h
index 0a912dda3..08aaa03a4 100644
--- a/src/common/platform.h
+++ b/src/common/platform.h
@@ -27,7 +27,7 @@
27//////////////////////////////////////////////////////////////////////////////////////////////////// 27////////////////////////////////////////////////////////////////////////////////////////////////////
28// Platform detection 28// Platform detection
29 29
30#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) 30#if defined(__x86_64__) || defined(_M_X86_64) || defined(__aarch64__)
31 #define EMU_ARCH_BITS 64 31 #define EMU_ARCH_BITS 64
32#elif defined(__i386) || defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) 32#elif defined(__i386) || defined(_M_IX86) || defined(__arm__) || defined(_M_ARM)
33 #define EMU_ARCH_BITS 32 33 #define EMU_ARCH_BITS 32
diff --git a/src/common/x64_emitter.cpp b/src/common/x64_emitter.cpp
new file mode 100644
index 000000000..19db2e484
--- /dev/null
+++ b/src/common/x64_emitter.cpp
@@ -0,0 +1,1989 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include <cstring>
19
20#include "logging/log.h"
21
22#include "assert.h"
23#include "x64_emitter.h"
24#include "abi.h"
25#include "cpu_detect.h"
26#include "memory_util.h"
27
28#define PRIx64 "llx"
29
30// Minimize the diff against Dolphin
31#define DYNA_REC JIT
32
33namespace Gen
34{
35
36struct NormalOpDef
37{
38 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
39};
40
41// 0xCC is code for invalid combination of immediates
42static const NormalOpDef normalops[11] =
43{
44 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD
45 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC
46
47 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB
48 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB
49
50 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND
51 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR
52
53 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR
54 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV
55
56 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from)
57 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP
58
59 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG
60};
61
62enum NormalSSEOps
63{
64 sseCMP = 0xC2,
65 sseADD = 0x58, //ADD
66 sseSUB = 0x5C, //SUB
67 sseAND = 0x54, //AND
68 sseANDN = 0x55, //ANDN
69 sseOR = 0x56,
70 sseXOR = 0x57,
71 sseMUL = 0x59, //MUL
72 sseDIV = 0x5E, //DIV
73 sseMIN = 0x5D, //MIN
74 sseMAX = 0x5F, //MAX
75 sseCOMIS = 0x2F, //COMIS
76 sseUCOMIS = 0x2E, //UCOMIS
77 sseSQRT = 0x51, //SQRT
78 sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!)
79 sseRCP = 0x53, //RCP
80 sseMOVAPfromRM = 0x28, //MOVAP from RM
81 sseMOVAPtoRM = 0x29, //MOVAP to RM
82 sseMOVUPfromRM = 0x10, //MOVUP from RM
83 sseMOVUPtoRM = 0x11, //MOVUP to RM
84 sseMOVLPfromRM= 0x12,
85 sseMOVLPtoRM = 0x13,
86 sseMOVHPfromRM= 0x16,
87 sseMOVHPtoRM = 0x17,
88 sseMOVHLPS = 0x12,
89 sseMOVLHPS = 0x16,
90 sseMOVDQfromRM = 0x6F,
91 sseMOVDQtoRM = 0x7F,
92 sseMASKMOVDQU = 0xF7,
93 sseLDDQU = 0xF0,
94 sseSHUF = 0xC6,
95 sseMOVNTDQ = 0xE7,
96 sseMOVNTP = 0x2B,
97 sseHADD = 0x7C,
98};
99
100
101void XEmitter::SetCodePtr(u8 *ptr)
102{
103 code = ptr;
104}
105
106const u8 *XEmitter::GetCodePtr() const
107{
108 return code;
109}
110
111u8 *XEmitter::GetWritableCodePtr()
112{
113 return code;
114}
115
116void XEmitter::ReserveCodeSpace(int bytes)
117{
118 for (int i = 0; i < bytes; i++)
119 *code++ = 0xCC;
120}
121
122const u8 *XEmitter::AlignCode4()
123{
124 int c = int((u64)code & 3);
125 if (c)
126 ReserveCodeSpace(4-c);
127 return code;
128}
129
130const u8 *XEmitter::AlignCode16()
131{
132 int c = int((u64)code & 15);
133 if (c)
134 ReserveCodeSpace(16-c);
135 return code;
136}
137
138const u8 *XEmitter::AlignCodePage()
139{
140 int c = int((u64)code & 4095);
141 if (c)
142 ReserveCodeSpace(4096-c);
143 return code;
144}
145
146// This operation modifies flags; check to see the flags are locked.
147// If the flags are locked, we should immediately and loudly fail before
148// causing a subtle JIT bug.
149void XEmitter::CheckFlags()
150{
151 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
152}
153
154void XEmitter::WriteModRM(int mod, int reg, int rm)
155{
156 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
157}
158
159void XEmitter::WriteSIB(int scale, int index, int base)
160{
161 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
162}
163
164void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const
165{
166 if (customOp == -1) customOp = operandReg;
167#ifdef _M_X86_64
168 u8 op = 0x40;
169 // REX.W (whether operation is a 64-bit operation)
170 if (opBits == 64) op |= 8;
171 // REX.R (whether ModR/M reg field refers to R8-R15.
172 if (customOp & 8) op |= 4;
173 // REX.X (whether ModR/M SIB index field refers to R8-R15)
174 if (indexReg & 8) op |= 2;
175 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
176 if (offsetOrBaseReg & 8) op |= 1;
177 // Write REX if wr have REX bits to write, or if the operation accesses
178 // SIL, DIL, BPL, or SPL.
179 if (op != 0x40 ||
180 (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
181 (opBits == 8 && (customOp & 0x10c) == 4))
182 {
183 emit->Write8(op);
184 // Check the operation doesn't access AH, BH, CH, or DH.
185 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
186 DEBUG_ASSERT((customOp & 0x100) == 0);
187 }
188#else
189 DEBUG_ASSERT(opBits != 64);
190 DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1);
191 DEBUG_ASSERT((indexReg & 8) == 0);
192 DEBUG_ASSERT((offsetOrBaseReg & 8) == 0);
193 DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1);
194 DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4);
195#endif
196}
197
198void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const
199{
200 int R = !(regOp1 & 8);
201 int X = !(indexReg & 8);
202 int B = !(offsetOrBaseReg & 8);
203
204 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
205
206 // do we need any VEX fields that only appear in the three-byte form?
207 if (X == 1 && B == 1 && W == 0 && mmmmm == 1)
208 {
209 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 1) | pp;
210 emit->Write8(0xC5);
211 emit->Write8(RvvvvLpp);
212 }
213 else
214 {
215 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
216 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 1) | pp;
217 emit->Write8(0xC4);
218 emit->Write8(RXBmmmmm);
219 emit->Write8(WvvvvLpp);
220 }
221}
222
223void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
224 bool warn_64bit_offset) const
225{
226 if (_operandReg == INVALID_REG)
227 _operandReg = (X64Reg)this->operandReg;
228 int mod = 0;
229 int ireg = indexReg;
230 bool SIB = false;
231 int _offsetOrBaseReg = this->offsetOrBaseReg;
232
233 if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address
234 {
235 // Oh, RIP addressing.
236 _offsetOrBaseReg = 5;
237 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
238 //TODO : add some checks
239#ifdef _M_X86_64
240 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
241 s64 distance = (s64)offset - (s64)ripAddr;
242 ASSERT_MSG(
243 (distance < 0x80000000LL &&
244 distance >= -0x80000000LL) ||
245 !warn_64bit_offset,
246 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")",
247 ripAddr, offset);
248 s32 offs = (s32)distance;
249 emit->Write32((u32)offs);
250#else
251 emit->Write32((u32)offset);
252#endif
253 return;
254 }
255
256 if (scale == 0)
257 {
258 // Oh, no memory, Just a reg.
259 mod = 3; //11
260 }
261 else if (scale >= 1)
262 {
263 //Ah good, no scaling.
264 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5))
265 {
266 //Okay, we're good. No SIB necessary.
267 int ioff = (int)offset;
268 if (ioff == 0)
269 {
270 mod = 0;
271 }
272 else if (ioff<-128 || ioff>127)
273 {
274 mod = 2; //32-bit displacement
275 }
276 else
277 {
278 mod = 1; //8-bit displacement
279 }
280 }
281 else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)
282 {
283 SIB = true;
284 mod = 0;
285 _offsetOrBaseReg = 5;
286 }
287 else //if (scale != SCALE_ATREG)
288 {
289 if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :(
290 {
291 //So we have to fake it with SIB encoding :(
292 SIB = true;
293 }
294
295 if (scale >= SCALE_1 && scale < SCALE_ATREG)
296 {
297 SIB = true;
298 }
299
300 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4))
301 {
302 SIB = true;
303 ireg = _offsetOrBaseReg;
304 }
305
306 //Okay, we're fine. Just disp encoding.
307 //We need displacement. Which size?
308 int ioff = (int)(s64)offset;
309 if (ioff < -128 || ioff > 127)
310 {
311 mod = 2; //32-bit displacement
312 }
313 else
314 {
315 mod = 1; //8-bit displacement
316 }
317 }
318 }
319
320 // Okay. Time to do the actual writing
321 // ModRM byte:
322 int oreg = _offsetOrBaseReg;
323 if (SIB)
324 oreg = 4;
325
326 // TODO(ector): WTF is this if about? I don't remember writing it :-)
327 //if (RIP)
328 // oreg = 5;
329
330 emit->WriteModRM(mod, _operandReg&7, oreg&7);
331
332 if (SIB)
333 {
334 //SIB byte
335 int ss;
336 switch (scale)
337 {
338 case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP
339 case SCALE_1: ss = 0; break;
340 case SCALE_2: ss = 1; break;
341 case SCALE_4: ss = 2; break;
342 case SCALE_8: ss = 3; break;
343 case SCALE_NOBASE_2: ss = 1; break;
344 case SCALE_NOBASE_4: ss = 2; break;
345 case SCALE_NOBASE_8: ss = 3; break;
346 case SCALE_ATREG: ss = 0; break;
347 default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break;
348 }
349 emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7)));
350 }
351
352 if (mod == 1) //8-bit disp
353 {
354 emit->Write8((u8)(s8)(s32)offset);
355 }
356 else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp
357 {
358 emit->Write32((u32)offset);
359 }
360}
361
362// W = operand extended width (1 if 64-bit)
363// R = register# upper bit
364// X = scale amnt upper bit
365// B = base register# upper bit
366void XEmitter::Rex(int w, int r, int x, int b)
367{
368 w = w ? 1 : 0;
369 r = r ? 1 : 0;
370 x = x ? 1 : 0;
371 b = b ? 1 : 0;
372 u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b));
373 if (rx != 0x40)
374 Write8(rx);
375}
376
377void XEmitter::JMP(const u8 *addr, bool force5Bytes)
378{
379 u64 fn = (u64)addr;
380 if (!force5Bytes)
381 {
382 s64 distance = (s64)(fn - ((u64)code + 2));
383 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
384 "Jump target too far away, needs force5Bytes = true");
385 //8 bits will do
386 Write8(0xEB);
387 Write8((u8)(s8)distance);
388 }
389 else
390 {
391 s64 distance = (s64)(fn - ((u64)code + 5));
392
393 ASSERT_MSG(
394 distance >= -0x80000000LL && distance < 0x80000000LL,
395 "Jump target too far away, needs indirect register");
396 Write8(0xE9);
397 Write32((u32)(s32)distance);
398 }
399}
400
401void XEmitter::JMPptr(const OpArg &arg2)
402{
403 OpArg arg = arg2;
404 if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
405 arg.operandReg = 4;
406 arg.WriteRex(this, 0, 0);
407 Write8(0xFF);
408 arg.WriteRest(this);
409}
410
411//Can be used to trap other processors, before overwriting their code
412// not used in dolphin
413void XEmitter::JMPself()
414{
415 Write8(0xEB);
416 Write8(0xFE);
417}
418
419void XEmitter::CALLptr(OpArg arg)
420{
421 if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument");
422 arg.operandReg = 2;
423 arg.WriteRex(this, 0, 0);
424 Write8(0xFF);
425 arg.WriteRest(this);
426}
427
428void XEmitter::CALL(const void *fnptr)
429{
430 u64 distance = u64(fnptr) - (u64(code) + 5);
431 ASSERT_MSG(
432 distance < 0x0000000080000000ULL ||
433 distance >= 0xFFFFFFFF80000000ULL,
434 "CALL out of range (%p calls %p)", code, fnptr);
435 Write8(0xE8);
436 Write32(u32(distance));
437}
438
439FixupBranch XEmitter::J(bool force5bytes)
440{
441 FixupBranch branch;
442 branch.type = force5bytes ? 1 : 0;
443 branch.ptr = code + (force5bytes ? 5 : 2);
444 if (!force5bytes)
445 {
446 //8 bits will do
447 Write8(0xEB);
448 Write8(0);
449 }
450 else
451 {
452 Write8(0xE9);
453 Write32(0);
454 }
455 return branch;
456}
457
458FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes)
459{
460 FixupBranch branch;
461 branch.type = force5bytes ? 1 : 0;
462 branch.ptr = code + (force5bytes ? 6 : 2);
463 if (!force5bytes)
464 {
465 //8 bits will do
466 Write8(0x70 + conditionCode);
467 Write8(0);
468 }
469 else
470 {
471 Write8(0x0F);
472 Write8(0x80 + conditionCode);
473 Write32(0);
474 }
475 return branch;
476}
477
478void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
479{
480 u64 fn = (u64)addr;
481 s64 distance = (s64)(fn - ((u64)code + 2));
482 if (distance < -0x80 || distance >= 0x80 || force5bytes)
483 {
484 distance = (s64)(fn - ((u64)code + 6));
485 ASSERT_MSG(
486 distance >= -0x80000000LL && distance < 0x80000000LL,
487 "Jump target too far away, needs indirect register");
488 Write8(0x0F);
489 Write8(0x80 + conditionCode);
490 Write32((u32)(s32)distance);
491 }
492 else
493 {
494 Write8(0x70 + conditionCode);
495 Write8((u8)(s8)distance);
496 }
497}
498
499void XEmitter::SetJumpTarget(const FixupBranch &branch)
500{
501 if (branch.type == 0)
502 {
503 s64 distance = (s64)(code - branch.ptr);
504 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
505 branch.ptr[-1] = (u8)(s8)distance;
506 }
507 else if (branch.type == 1)
508 {
509 s64 distance = (s64)(code - branch.ptr);
510 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
511 ((s32*)branch.ptr)[-1] = (s32)distance;
512 }
513}
514
515// INC/DEC considered harmful on newer CPUs due to partial flag set.
516// Use ADD, SUB instead.
517
518/*
519void XEmitter::INC(int bits, OpArg arg)
520{
521 if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
522 arg.operandReg = 0;
523 if (bits == 16) {Write8(0x66);}
524 arg.WriteRex(this, bits, bits);
525 Write8(bits == 8 ? 0xFE : 0xFF);
526 arg.WriteRest(this);
527}
528void XEmitter::DEC(int bits, OpArg arg)
529{
530 if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
531 arg.operandReg = 1;
532 if (bits == 16) {Write8(0x66);}
533 arg.WriteRex(this, bits, bits);
534 Write8(bits == 8 ? 0xFE : 0xFF);
535 arg.WriteRest(this);
536}
537*/
538
539//Single byte opcodes
540//There is no PUSHAD/POPAD in 64-bit mode.
541void XEmitter::INT3() {Write8(0xCC);}
542void XEmitter::RET() {Write8(0xC3);}
543void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret
544
545// The first sign of decadence: optimized NOPs.
546void XEmitter::NOP(size_t size)
547{
548 DEBUG_ASSERT((int)size > 0);
549 while (true)
550 {
551 switch (size)
552 {
553 case 0:
554 return;
555 case 1:
556 Write8(0x90);
557 return;
558 case 2:
559 Write8(0x66); Write8(0x90);
560 return;
561 case 3:
562 Write8(0x0F); Write8(0x1F); Write8(0x00);
563 return;
564 case 4:
565 Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00);
566 return;
567 case 5:
568 Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00);
569 Write8(0x00);
570 return;
571 case 6:
572 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44);
573 Write8(0x00); Write8(0x00);
574 return;
575 case 7:
576 Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00);
577 Write8(0x00); Write8(0x00); Write8(0x00);
578 return;
579 case 8:
580 Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00);
581 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00);
582 return;
583 case 9:
584 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84);
585 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00);
586 Write8(0x00);
587 return;
588 case 10:
589 Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F);
590 Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00);
591 Write8(0x00); Write8(0x00);
592 return;
593 default:
594 // Even though x86 instructions are allowed to be up to 15 bytes long,
595 // AMD advises against using NOPs longer than 11 bytes because they
596 // carry a performance penalty on CPUs older than AMD family 16h.
597 Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F);
598 Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00);
599 Write8(0x00); Write8(0x00); Write8(0x00);
600 size -= 11;
601 continue;
602 }
603 }
604}
605
606void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu
607void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry
608void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry
609void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry
610
611//TODO: xchg ah, al ???
612void XEmitter::XCHG_AHAL()
613{
614 Write8(0x86);
615 Write8(0xe0);
616 // alt. 86 c4
617}
618
619//These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
620void XEmitter::LAHF() {Write8(0x9F);}
621void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);}
622
623void XEmitter::PUSHF() {Write8(0x9C);}
624void XEmitter::POPF() {CheckFlags(); Write8(0x9D);}
625
626void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);}
627void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);}
628void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);}
629
630void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg)
631{
632 if (bits == 16)
633 Write8(0x66);
634 Rex(bits == 64, 0, 0, (int)reg >> 3);
635 Write8(byte + ((int)reg & 7));
636}
637
638void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg)
639{
640 if (bits == 16)
641 Write8(0x66);
642 Rex(bits==64, 0, 0, (int)reg >> 3);
643 Write8(byte1);
644 Write8(byte2 + ((int)reg & 7));
645}
646
647void XEmitter::CWD(int bits)
648{
649 if (bits == 16)
650 Write8(0x66);
651 Rex(bits == 64, 0, 0, 0);
652 Write8(0x99);
653}
654
655void XEmitter::CBW(int bits)
656{
657 if (bits == 8)
658 Write8(0x66);
659 Rex(bits == 32, 0, 0, 0);
660 Write8(0x98);
661}
662
663//Simple opcodes
664
665
666//push/pop do not need wide to be 64-bit
667void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
668void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);}
669
670void XEmitter::PUSH(int bits, const OpArg &reg)
671{
672 if (reg.IsSimpleReg())
673 PUSH(reg.GetSimpleReg());
674 else if (reg.IsImm())
675 {
676 switch (reg.GetImmBits())
677 {
678 case 8:
679 Write8(0x6A);
680 Write8((u8)(s8)reg.offset);
681 break;
682 case 16:
683 Write8(0x66);
684 Write8(0x68);
685 Write16((u16)(s16)(s32)reg.offset);
686 break;
687 case 32:
688 Write8(0x68);
689 Write32((u32)reg.offset);
690 break;
691 default:
692 ASSERT_MSG(0, "PUSH - Bad imm bits");
693 break;
694 }
695 }
696 else
697 {
698 if (bits == 16)
699 Write8(0x66);
700 reg.WriteRex(this, bits, bits);
701 Write8(0xFF);
702 reg.WriteRest(this, 0, (X64Reg)6);
703 }
704}
705
706void XEmitter::POP(int /*bits*/, const OpArg &reg)
707{
708 if (reg.IsSimpleReg())
709 POP(reg.GetSimpleReg());
710 else
711 ASSERT_MSG(0, "POP - Unsupported encoding");
712}
713
714void XEmitter::BSWAP(int bits, X64Reg reg)
715{
716 if (bits >= 32)
717 {
718 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
719 }
720 else if (bits == 16)
721 {
722 ROL(16, R(reg), Imm8(8));
723 }
724 else if (bits == 8)
725 {
726 // Do nothing - can't bswap a single byte...
727 }
728 else
729 {
730 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
731 }
732}
733
734// Undefined opcode - reserved
735// If we ever need a way to always cause a non-breakpoint hard exception...
736void XEmitter::UD2()
737{
738 Write8(0x0F);
739 Write8(0x0B);
740}
741
742void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg)
743{
744 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
745 arg.operandReg = (u8)level;
746 arg.WriteRex(this, 0, 0);
747 Write8(0x0F);
748 Write8(0x18);
749 arg.WriteRest(this);
750}
751
752void XEmitter::SETcc(CCFlags flag, OpArg dest)
753{
754 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
755 dest.operandReg = 0;
756 dest.WriteRex(this, 0, 8);
757 Write8(0x0F);
758 Write8(0x90 + (u8)flag);
759 dest.WriteRest(this);
760}
761
762void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag)
763{
764 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
765 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
766 if (bits == 16)
767 Write8(0x66);
768 src.operandReg = dest;
769 src.WriteRex(this, bits, bits);
770 Write8(0x0F);
771 Write8(0x40 + (u8)flag);
772 src.WriteRest(this);
773}
774
775void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
776{
777 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
778 CheckFlags();
779 src.operandReg = ext;
780 if (bits == 16)
781 Write8(0x66);
782 src.WriteRex(this, bits, bits, 0);
783 if (bits == 8)
784 {
785 Write8(0xF6);
786 }
787 else
788 {
789 Write8(0xF7);
790 }
791 src.WriteRest(this);
792}
793
794void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);}
795void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);}
796void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);}
797void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
798void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
799void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
800
801void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
802{
803 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
804 CheckFlags();
805 src.operandReg = (u8)dest;
806 if (bits == 16)
807 Write8(0x66);
808 if (rep)
809 Write8(0xF3);
810 src.WriteRex(this, bits, bits);
811 Write8(0x0F);
812 Write8(byte2);
813 src.WriteRest(this);
814}
815
816void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
817{
818 if (bits <= 16)
819 ASSERT_MSG(0, "MOVNTI - bits<=16");
820 WriteBitSearchType(bits, src, dest, 0xC3);
821}
822
823void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
824void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
825
826void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
827{
828 CheckFlags();
829 if (!Common::cpu_info.bBMI1)
830 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
831 WriteBitSearchType(bits, dest, src, 0xBC, true);
832}
833void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
834{
835 CheckFlags();
836 if (!Common::cpu_info.bLZCNT)
837 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
838 WriteBitSearchType(bits, dest, src, 0xBD, true);
839}
840
841void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
842{
843 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
844 if (dbits == sbits)
845 {
846 MOV(dbits, R(dest), src);
847 return;
848 }
849 src.operandReg = (u8)dest;
850 if (dbits == 16)
851 Write8(0x66);
852 src.WriteRex(this, dbits, sbits);
853 if (sbits == 8)
854 {
855 Write8(0x0F);
856 Write8(0xBE);
857 }
858 else if (sbits == 16)
859 {
860 Write8(0x0F);
861 Write8(0xBF);
862 }
863 else if (sbits == 32 && dbits == 64)
864 {
865 Write8(0x63);
866 }
867 else
868 {
869 Crash();
870 }
871 src.WriteRest(this);
872}
873
874void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src)
875{
876 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
877 if (dbits == sbits)
878 {
879 MOV(dbits, R(dest), src);
880 return;
881 }
882 src.operandReg = (u8)dest;
883 if (dbits == 16)
884 Write8(0x66);
885 //the 32bit result is automatically zero extended to 64bit
886 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
887 if (sbits == 8)
888 {
889 Write8(0x0F);
890 Write8(0xB6);
891 }
892 else if (sbits == 16)
893 {
894 Write8(0x0F);
895 Write8(0xB7);
896 }
897 else if (sbits == 32 && dbits == 64)
898 {
899 Write8(0x8B);
900 }
901 else
902 {
903 ASSERT_MSG(0, "MOVZX - Invalid size");
904 }
905 src.WriteRest(this);
906}
907
908void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src)
909{
910 ASSERT_MSG(Common::cpu_info.bMOVBE, "Generating MOVBE on a system that does not support it.");
911 if (bits == 8)
912 {
913 MOV(bits, dest, src);
914 return;
915 }
916
917 if (bits == 16)
918 Write8(0x66);
919
920 if (dest.IsSimpleReg())
921 {
922 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
923 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
924 Write8(0x0F); Write8(0x38); Write8(0xF0);
925 src.WriteRest(this, 0, dest.GetSimpleReg());
926 }
927 else if (src.IsSimpleReg())
928 {
929 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
930 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
931 Write8(0x0F); Write8(0x38); Write8(0xF1);
932 dest.WriteRest(this, 0, src.GetSimpleReg());
933 }
934 else
935 {
936 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
937 }
938}
939
940
941void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
942{
943 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
944 src.operandReg = (u8)dest;
945 if (bits == 16)
946 Write8(0x66); //TODO: performance warning
947 src.WriteRex(this, bits, bits);
948 Write8(0x8D);
949 src.WriteRest(this, 0, INVALID_REG, bits == 64);
950}
951
952//shift can be either imm8 or cl
953void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
954{
955 CheckFlags();
956 bool writeImm = false;
957 if (dest.IsImm())
958 {
959 ASSERT_MSG(0, "WriteShift - can't shift imms");
960 }
961 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
962 {
963 ASSERT_MSG(0, "WriteShift - illegal argument");
964 }
965 dest.operandReg = ext;
966 if (bits == 16)
967 Write8(0x66);
968 dest.WriteRex(this, bits, bits, 0);
969 if (shift.GetImmBits() == 8)
970 {
971 //ok an imm
972 u8 imm = (u8)shift.offset;
973 if (imm == 1)
974 {
975 Write8(bits == 8 ? 0xD0 : 0xD1);
976 }
977 else
978 {
979 writeImm = true;
980 Write8(bits == 8 ? 0xC0 : 0xC1);
981 }
982 }
983 else
984 {
985 Write8(bits == 8 ? 0xD2 : 0xD3);
986 }
987 dest.WriteRest(this, writeImm ? 1 : 0);
988 if (writeImm)
989 Write8((u8)shift.offset);
990}
991
992// large rotates and shift are slower on intel than amd
993// intel likes to rotate by 1, and the op is smaller too
994void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);}
995void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);}
996void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);}
997void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);}
998void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);}
999void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);}
1000void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);}
1001
1002// index can be either imm8 or register, don't use memory destination because it's slow
1003void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
1004{
1005 CheckFlags();
1006 if (dest.IsImm())
1007 {
1008 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1009 }
1010 if ((index.IsImm() && index.GetImmBits() != 8))
1011 {
1012 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1013 }
1014 if (bits == 16)
1015 Write8(0x66);
1016 if (index.IsImm())
1017 {
1018 dest.WriteRex(this, bits, bits);
1019 Write8(0x0F); Write8(0xBA);
1020 dest.WriteRest(this, 1, (X64Reg)ext);
1021 Write8((u8)index.offset);
1022 }
1023 else
1024 {
1025 X64Reg operand = index.GetSimpleReg();
1026 dest.WriteRex(this, bits, bits, operand);
1027 Write8(0x0F); Write8(0x83 + 8*ext);
1028 dest.WriteRest(this, 1, operand);
1029 }
1030}
1031
1032void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);}
1033void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);}
1034void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);}
1035void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);}
1036
1037//shift can be either imm8 or cl
1038void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
1039{
1040 CheckFlags();
1041 if (dest.IsImm())
1042 {
1043 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1044 }
1045 if (!src.IsSimpleReg())
1046 {
1047 ASSERT_MSG(0, "SHRD - must use simple register as source");
1048 }
1049 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
1050 {
1051 ASSERT_MSG(0, "SHRD - illegal shift");
1052 }
1053 if (bits == 16)
1054 Write8(0x66);
1055 X64Reg operand = src.GetSimpleReg();
1056 dest.WriteRex(this, bits, bits, operand);
1057 if (shift.GetImmBits() == 8)
1058 {
1059 Write8(0x0F); Write8(0xAC);
1060 dest.WriteRest(this, 1, operand);
1061 Write8((u8)shift.offset);
1062 }
1063 else
1064 {
1065 Write8(0x0F); Write8(0xAD);
1066 dest.WriteRest(this, 0, operand);
1067 }
1068}
1069
1070void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift)
1071{
1072 CheckFlags();
1073 if (dest.IsImm())
1074 {
1075 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1076 }
1077 if (!src.IsSimpleReg())
1078 {
1079 ASSERT_MSG(0, "SHLD - must use simple register as source");
1080 }
1081 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
1082 {
1083 ASSERT_MSG(0, "SHLD - illegal shift");
1084 }
1085 if (bits == 16)
1086 Write8(0x66);
1087 X64Reg operand = src.GetSimpleReg();
1088 dest.WriteRex(this, bits, bits, operand);
1089 if (shift.GetImmBits() == 8)
1090 {
1091 Write8(0x0F); Write8(0xA4);
1092 dest.WriteRest(this, 1, operand);
1093 Write8((u8)shift.offset);
1094 }
1095 else
1096 {
1097 Write8(0x0F); Write8(0xA5);
1098 dest.WriteRest(this, 0, operand);
1099 }
1100}
1101
1102void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits)
1103{
1104 if (bits == 16)
1105 emit->Write8(0x66);
1106
1107 this->operandReg = (u8)_operandReg;
1108 WriteRex(emit, bits, bits);
1109 emit->Write8(op);
1110 WriteRest(emit);
1111}
1112
1113//operand can either be immediate or register
1114void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const
1115{
1116 X64Reg _operandReg;
1117 if (IsImm())
1118 {
1119 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1120 }
1121
1122 if (bits == 16)
1123 emit->Write8(0x66);
1124
1125 int immToWrite = 0;
1126
1127 if (operand.IsImm())
1128 {
1129 WriteRex(emit, bits, bits);
1130
1131 if (!toRM)
1132 {
1133 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1134 }
1135
1136 if (operand.scale == SCALE_IMM8 && bits == 8)
1137 {
1138 // op al, imm8
1139 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC)
1140 {
1141 emit->Write8(normalops[op].eaximm8);
1142 emit->Write8((u8)operand.offset);
1143 return;
1144 }
1145 // mov reg, imm8
1146 if (!scale && op == nrmMOV)
1147 {
1148 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1149 emit->Write8((u8)operand.offset);
1150 return;
1151 }
1152 // op r/m8, imm8
1153 emit->Write8(normalops[op].imm8);
1154 immToWrite = 8;
1155 }
1156 else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1157 (operand.scale == SCALE_IMM32 && bits == 32) ||
1158 (operand.scale == SCALE_IMM32 && bits == 64))
1159 {
1160 // Try to save immediate size if we can, but first check to see
1161 // if the instruction supports simm8.
1162 // op r/m, imm8
1163 if (normalops[op].simm8 != 0xCC &&
1164 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1165 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset)))
1166 {
1167 emit->Write8(normalops[op].simm8);
1168 immToWrite = 8;
1169 }
1170 else
1171 {
1172 // mov reg, imm
1173 if (!scale && op == nrmMOV && bits != 64)
1174 {
1175 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1176 if (bits == 16)
1177 emit->Write16((u16)operand.offset);
1178 else
1179 emit->Write32((u32)operand.offset);
1180 return;
1181 }
1182 // op eax, imm
1183 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC)
1184 {
1185 emit->Write8(normalops[op].eaximm32);
1186 if (bits == 16)
1187 emit->Write16((u16)operand.offset);
1188 else
1189 emit->Write32((u32)operand.offset);
1190 return;
1191 }
1192 // op r/m, imm
1193 emit->Write8(normalops[op].imm32);
1194 immToWrite = bits == 16 ? 16 : 32;
1195 }
1196 }
1197 else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1198 (operand.scale == SCALE_IMM8 && bits == 32) ||
1199 (operand.scale == SCALE_IMM8 && bits == 64))
1200 {
1201 // op r/m, imm8
1202 emit->Write8(normalops[op].simm8);
1203 immToWrite = 8;
1204 }
1205 else if (operand.scale == SCALE_IMM64 && bits == 64)
1206 {
1207 if (scale)
1208 {
1209 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1210 }
1211 // mov reg64, imm64
1212 else if (op == nrmMOV)
1213 {
1214 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1215 emit->Write64((u64)operand.offset);
1216 return;
1217 }
1218 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1219 }
1220 else
1221 {
1222 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1223 }
1224 _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM
1225 }
1226 else
1227 {
1228 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1229 WriteRex(emit, bits, bits, _operandReg);
1230 // op r/m, reg
1231 if (toRM)
1232 {
1233 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1234 }
1235 // op reg, r/m
1236 else
1237 {
1238 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1239 }
1240 }
1241 WriteRest(emit, immToWrite >> 3, _operandReg);
1242 switch (immToWrite)
1243 {
1244 case 0:
1245 break;
1246 case 8:
1247 emit->Write8((u8)operand.offset);
1248 break;
1249 case 16:
1250 emit->Write16((u16)operand.offset);
1251 break;
1252 case 32:
1253 emit->Write32((u32)operand.offset);
1254 break;
1255 default:
1256 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1257 }
1258}
1259
1260void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2)
1261{
1262 if (a1.IsImm())
1263 {
1264 //Booh! Can't write to an imm
1265 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1266 return;
1267 }
1268 if (a2.IsImm())
1269 {
1270 a1.WriteNormalOp(emit, true, op, a2, bits);
1271 }
1272 else
1273 {
1274 if (a1.IsSimpleReg())
1275 {
1276 a2.WriteNormalOp(emit, false, op, a1, bits);
1277 }
1278 else
1279 {
1280 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory");
1281 a1.WriteNormalOp(emit, true, op, a2, bits);
1282 }
1283 }
1284}
1285
1286void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
1287void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
1288void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
1289void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
1290void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
1291void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
1292void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
1293void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
1294{
1295 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1296 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1297 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1298}
1299void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
1300void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
1301void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
1302
1303void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
1304{
1305 CheckFlags();
1306 if (bits == 8)
1307 {
1308 ASSERT_MSG(0, "IMUL - illegal bit size!");
1309 return;
1310 }
1311
1312 if (a1.IsImm())
1313 {
1314 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1315 return;
1316 }
1317
1318 if (!a2.IsImm())
1319 {
1320 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1321 return;
1322 }
1323
1324 if (bits == 16)
1325 Write8(0x66);
1326 a1.WriteRex(this, bits, bits, regOp);
1327
1328 if (a2.GetImmBits() == 8 ||
1329 (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1330 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset))
1331 {
1332 Write8(0x6B);
1333 a1.WriteRest(this, 1, regOp);
1334 Write8((u8)a2.offset);
1335 }
1336 else
1337 {
1338 Write8(0x69);
1339 if (a2.GetImmBits() == 16 && bits == 16)
1340 {
1341 a1.WriteRest(this, 2, regOp);
1342 Write16((u16)a2.offset);
1343 }
1344 else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64))
1345 {
1346 a1.WriteRest(this, 4, regOp);
1347 Write32((u32)a2.offset);
1348 }
1349 else
1350 {
1351 ASSERT_MSG(0, "IMUL - unhandled case!");
1352 }
1353 }
1354}
1355
1356void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a)
1357{
1358 CheckFlags();
1359 if (bits == 8)
1360 {
1361 ASSERT_MSG(0, "IMUL - illegal bit size!");
1362 return;
1363 }
1364
1365 if (a.IsImm())
1366 {
1367 IMUL(bits, regOp, R(regOp), a) ;
1368 return;
1369 }
1370
1371 if (bits == 16)
1372 Write8(0x66);
1373 a.WriteRex(this, bits, bits, regOp);
1374 Write8(0x0F);
1375 Write8(0xAF);
1376 a.WriteRest(this, 0, regOp);
1377}
1378
1379
1380void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1381{
1382 if (opPrefix)
1383 Write8(opPrefix);
1384 arg.operandReg = regOp;
1385 arg.WriteRex(this, 0, 0);
1386 Write8(0x0F);
1387 if (op > 0xFF)
1388 Write8((op >> 8) & 0xFF);
1389 Write8(op & 0xFF);
1390 arg.WriteRest(this, extrabytes);
1391}
1392
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1394{
1395 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1396}
1397
1398static int GetVEXmmmmm(u16 op)
1399{
1400 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1401 if ((op >> 8) == 0x3A)
1402 return 3;
1403 else if ((op >> 8) == 0x38)
1404 return 2;
1405 else
1406 return 1;
1407}
1408
1409static int GetVEXpp(u8 opPrefix)
1410{
1411 if (opPrefix == 0x66)
1412 return 1;
1413 else if (opPrefix == 0xF3)
1414 return 2;
1415 else if (opPrefix == 0xF2)
1416 return 3;
1417 else
1418 return 0;
1419}
1420
1421void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1422{
1423 if (!Common::cpu_info.bAVX)
1424 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1425 int mmmmm = GetVEXmmmmm(op);
1426 int pp = GetVEXpp(opPrefix);
1427 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here
1428 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1429 Write8(op & 0xFF);
1430 arg.WriteRest(this, extrabytes, regOp1);
1431}
1432
1433// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1434void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1435{
1436 if (size != 32 && size != 64)
1437 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1438 int mmmmm = GetVEXmmmmm(op);
1439 int pp = GetVEXpp(opPrefix);
1440 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
1441 Write8(op & 0xFF);
1442 arg.WriteRest(this, extrabytes, regOp1);
1443}
1444
1445void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1446{
1447 CheckFlags();
1448 if (!Common::cpu_info.bBMI1)
1449 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1450 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1451}
1452
1453void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1454{
1455 CheckFlags();
1456 if (!Common::cpu_info.bBMI2)
1457 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1458 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1459}
1460
1461void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);}
1462void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);}
1463
1464void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg)
1465{
1466#ifdef _M_X86_64
1467 // Alternate encoding
1468 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1469 arg.operandReg = dest;
1470 Write8(0x66);
1471 arg.WriteRex(this, 64, 0);
1472 Write8(0x0f);
1473 Write8(0x6E);
1474 arg.WriteRest(this, 0);
1475#else
1476 arg.operandReg = dest;
1477 Write8(0xF3);
1478 Write8(0x0f);
1479 Write8(0x7E);
1480 arg.WriteRest(this, 0);
1481#endif
1482}
1483
1484void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src)
1485{
1486 if (src > 7 || arg.IsSimpleReg())
1487 {
1488 // Alternate encoding
1489 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1490 arg.operandReg = src;
1491 Write8(0x66);
1492 arg.WriteRex(this, 64, 0);
1493 Write8(0x0f);
1494 Write8(0x7E);
1495 arg.WriteRest(this, 0);
1496 }
1497 else
1498 {
1499 arg.operandReg = src;
1500 arg.WriteRex(this, 0, 0);
1501 Write8(0x66);
1502 Write8(0x0f);
1503 Write8(0xD6);
1504 arg.WriteRest(this, 0);
1505 }
1506}
1507
1508void XEmitter::WriteMXCSR(OpArg arg, int ext)
1509{
1510 if (arg.IsImm() || arg.IsSimpleReg())
1511 ASSERT_MSG(0, "MXCSR - invalid operand");
1512
1513 arg.operandReg = ext;
1514 arg.WriteRex(this, 0, 0);
1515 Write8(0x0F);
1516 Write8(0xAE);
1517 arg.WriteRest(this);
1518}
1519
1520void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);}
1521void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);}
1522
1523void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
1524void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
1525void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
1526
1527void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
1528void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
1529void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
1530void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
1531void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
1532void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
1533void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
1534void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
1535void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
1536void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
1537void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
1538void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
1539void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
1540void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
1541void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
1542void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
1543void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
1544
1545void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
1546void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
1547void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
1548void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
1549void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
1550void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
1551void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
1552void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
1553void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
1554void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
1555void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
1556void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
1557void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
1558void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
1559void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
1560void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
1561void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
1562void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
1563void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
1564void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
1565void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
1566void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
1567void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
1568void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
1569void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
1570void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
1571void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
1572void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
1573
1574void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
1575
1576void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
1577void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
1578void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
1579void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
1580
1581void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
1582void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
1583void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
1584void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
1585
1586void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
1587void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
1588void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
1589void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
1590
1591void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
1592void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
1593void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
1594void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
1595
1596void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
1597void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
1598void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
1599void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
1600
1601void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
1602void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
1603void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
1604void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
1605
1606void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
1607void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
1608void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
1609void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
1610
1611void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
1612void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
1613
1614void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
1615void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
1616
1617void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
1618void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
1619void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
1620void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
1621void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
1622void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
1623
1624void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
1625void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
1626void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
1627void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
1628
1629void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
1630void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
1631void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
1632void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
1633
1634void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
1635
1636void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
1637void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
1638
1639void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
1640
1641// THESE TWO ARE UNTESTED.
1642void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
1643void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
1644
1645void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
1646void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
1647
1648void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
1649{
1650 if (Common::cpu_info.bSSE3)
1651 {
1652 WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup
1653 }
1654 else
1655 {
1656 // Simulate this instruction with SSE2 instructions
1657 if (!arg.IsSimpleReg(regOp))
1658 MOVSD(regOp, arg);
1659 UNPCKLPD(regOp, R(regOp));
1660 }
1661}
1662
1663//There are a few more left
1664
1665// Also some integer instructions are missing
1666void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
1667void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
1668void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
1669
1670void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
1671void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
1672void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);}
1673void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);}
1674
1675void XEmitter::PSRLW(X64Reg reg, int shift)
1676{
1677 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1678 Write8(shift);
1679}
1680
1681void XEmitter::PSRLD(X64Reg reg, int shift)
1682{
1683 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1684 Write8(shift);
1685}
1686
1687void XEmitter::PSRLQ(X64Reg reg, int shift)
1688{
1689 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1690 Write8(shift);
1691}
1692
1693void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
1694{
1695 WriteSSEOp(0x66, 0xd3, reg, arg);
1696}
1697
1698void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1699 WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
1700 Write8(shift);
1701}
1702
1703void XEmitter::PSLLW(X64Reg reg, int shift)
1704{
1705 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1706 Write8(shift);
1707}
1708
1709void XEmitter::PSLLD(X64Reg reg, int shift)
1710{
1711 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1712 Write8(shift);
1713}
1714
1715void XEmitter::PSLLQ(X64Reg reg, int shift)
1716{
1717 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1718 Write8(shift);
1719}
1720
1721void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1722 WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg));
1723 Write8(shift);
1724}
1725
1726void XEmitter::PSRAW(X64Reg reg, int shift)
1727{
1728 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1729 Write8(shift);
1730}
1731
1732void XEmitter::PSRAD(X64Reg reg, int shift)
1733{
1734 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1735 Write8(shift);
1736}
1737
1738void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1739{
1740 if (!Common::cpu_info.bSSSE3)
1741 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1742 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1743}
1744
1745void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1746{
1747 if (!Common::cpu_info.bSSE4_1)
1748 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1749 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1750}
1751
1752void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
1753void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
1754void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
1755void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
1756
1757void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
1758void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
1759void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
1760void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
1761void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
1762void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
1763void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
1764void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
1765
1766void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
1767void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
1768void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
1769void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
1770void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
1771void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
1772void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
1773void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
1774void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
1775void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
1776void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
1777void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
1778
1779void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
1780void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
1781void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
1782void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
1783void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
1784
1785void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
1786void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
1787void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
1788void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
1789
1790void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
1791void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
1792void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
1793void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
1794
1795void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
1796void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
1797void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
1798void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
1799
1800void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
1801void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
1802void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
1803void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
1804
1805void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
1806void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
1807void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
1808void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
1809
1810void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
1811void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
1812void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
1813void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
1814
1815void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
1816void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
1817
1818void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
1819void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
1820void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
1821
1822void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
1823void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
1824void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
1825
1826void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
1827void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
1828
1829void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
1830void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
1831
1832void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
1833void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
1834void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
1835void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
1836
1837void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
1838void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
1839void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
1840void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
1841
1842// VEX
1843void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
1844void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
1845void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
1846void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
1847void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
1848void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
1849void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
1850void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
1851void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
1852void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
1853void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
1854void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
1855
1856void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
1857void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
1858void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
1859void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
1860void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
1861void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
1862void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
1863void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
1864
1865void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
1866void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
1867void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
1868void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
1869
1870void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
1871void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
1872void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
1873void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
1874void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
1875void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
1876void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
1877void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
1878void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
1879void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
1880void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
1881void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
1882void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
1883void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
1884void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
1885void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
1886void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
1887void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
1888void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
1889void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
1890void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
1891void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
1892void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
1893void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
1894void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
1895void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
1896void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
1897void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
1898void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
1899void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
1900void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
1901void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
1902void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
1903void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
1904void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
1905void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
1906void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
1907void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
1908void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
1909void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
1910void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
1911void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
1912void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
1913void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
1914void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
1915void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
1916void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
1917void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
1918void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
1919void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
1920void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
1921void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
1922void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
1923void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
1924void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
1925void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
1926void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
1927void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
1928void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
1929void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
1930
1931void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
1932void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
1933void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
1934void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
1935void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
1936void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
1937void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
1938void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
1939void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
1940void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
1941void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
1942void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
1943void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
1944
1945// Prefixes
1946
1947void XEmitter::LOCK() { Write8(0xF0); }
1948void XEmitter::REP() { Write8(0xF3); }
1949void XEmitter::REPNE() { Write8(0xF2); }
1950void XEmitter::FSOverride() { Write8(0x64); }
1951void XEmitter::GSOverride() { Write8(0x65); }
1952
1953void XEmitter::FWAIT()
1954{
1955 Write8(0x9B);
1956}
1957
1958// TODO: make this more generic
1959void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg)
1960{
1961 int mf = 0;
1962 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
1963 switch (bits)
1964 {
1965 case 32: mf = 0; break;
1966 case 64: mf = 4; break;
1967 case 80: mf = 2; break;
1968 default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
1969 }
1970 Write8(0xd9 | mf);
1971 // x87 instructions use the reg field of the ModR/M byte as opcode:
1972 if (bits == 80)
1973 op = op_80b;
1974 arg.WriteRest(this, 0, (X64Reg) op);
1975}
1976
1977void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
1978void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
1979void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
1980void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
1981
1982void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
1983
1984void XCodeBlock::PoisonMemory() {
1985 // x86/64: 0xCC = breakpoint
1986 memset(region, 0xCC, region_size);
1987}
1988
1989}
diff --git a/src/common/x64_emitter.h b/src/common/x64_emitter.h
new file mode 100644
index 000000000..369bfaa08
--- /dev/null
+++ b/src/common/x64_emitter.h
@@ -0,0 +1,1067 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#pragma once
19
20#include "assert.h"
21#include "common_types.h"
22#include "code_block.h"
23
24#if defined(_M_X86_64) && !defined(_ARCH_64)
25#define _ARCH_64
26#endif
27
28#ifdef _ARCH_64
29#define PTRBITS 64
30#else
31#define PTRBITS 32
32#endif
33
34namespace Gen
35{
36
37enum X64Reg
38{
39 EAX = 0, EBX = 3, ECX = 1, EDX = 2,
40 ESI = 6, EDI = 7, EBP = 5, ESP = 4,
41
42 RAX = 0, RBX = 3, RCX = 1, RDX = 2,
43 RSI = 6, RDI = 7, RBP = 5, RSP = 4,
44 R8 = 8, R9 = 9, R10 = 10,R11 = 11,
45 R12 = 12,R13 = 13,R14 = 14,R15 = 15,
46
47 AL = 0, BL = 3, CL = 1, DL = 2,
48 SIL = 6, DIL = 7, BPL = 5, SPL = 4,
49 AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106,
50
51 AX = 0, BX = 3, CX = 1, DX = 2,
52 SI = 6, DI = 7, BP = 5, SP = 4,
53
54 XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
55 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
56
57 YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
58 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15,
59
60 INVALID_REG = 0xFFFFFFFF
61};
62
63enum CCFlags
64{
65 CC_O = 0,
66 CC_NO = 1,
67 CC_B = 2, CC_C = 2, CC_NAE = 2,
68 CC_NB = 3, CC_NC = 3, CC_AE = 3,
69 CC_Z = 4, CC_E = 4,
70 CC_NZ = 5, CC_NE = 5,
71 CC_BE = 6, CC_NA = 6,
72 CC_NBE = 7, CC_A = 7,
73 CC_S = 8,
74 CC_NS = 9,
75 CC_P = 0xA, CC_PE = 0xA,
76 CC_NP = 0xB, CC_PO = 0xB,
77 CC_L = 0xC, CC_NGE = 0xC,
78 CC_NL = 0xD, CC_GE = 0xD,
79 CC_LE = 0xE, CC_NG = 0xE,
80 CC_NLE = 0xF, CC_G = 0xF
81};
82
83enum
84{
85 NUMGPRs = 16,
86 NUMXMMs = 16,
87};
88
89enum
90{
91 SCALE_NONE = 0,
92 SCALE_1 = 1,
93 SCALE_2 = 2,
94 SCALE_4 = 4,
95 SCALE_8 = 8,
96 SCALE_ATREG = 16,
97 //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
98 SCALE_NOBASE_2 = 34,
99 SCALE_NOBASE_4 = 36,
100 SCALE_NOBASE_8 = 40,
101 SCALE_RIP = 0xFF,
102 SCALE_IMM8 = 0xF0,
103 SCALE_IMM16 = 0xF1,
104 SCALE_IMM32 = 0xF2,
105 SCALE_IMM64 = 0xF3,
106};
107
108enum NormalOp {
109 nrmADD,
110 nrmADC,
111 nrmSUB,
112 nrmSBB,
113 nrmAND,
114 nrmOR ,
115 nrmXOR,
116 nrmMOV,
117 nrmTEST,
118 nrmCMP,
119 nrmXCHG,
120};
121
122enum {
123 CMP_EQ = 0,
124 CMP_LT = 1,
125 CMP_LE = 2,
126 CMP_UNORD = 3,
127 CMP_NEQ = 4,
128 CMP_NLT = 5,
129 CMP_NLE = 6,
130 CMP_ORD = 7,
131};
132
133enum FloatOp {
134 floatLD = 0,
135 floatST = 2,
136 floatSTP = 3,
137 floatLD80 = 5,
138 floatSTP80 = 7,
139
140 floatINVALID = -1,
141};
142
143enum FloatRound {
144 FROUND_NEAREST = 0,
145 FROUND_FLOOR = 1,
146 FROUND_CEIL = 2,
147 FROUND_ZERO = 3,
148 FROUND_MXCSR = 4,
149
150 FROUND_RAISE_PRECISION = 0,
151 FROUND_IGNORE_PRECISION = 8,
152};
153
154class XEmitter;
155
156// RIP addressing does not benefit from micro op fusion on Core arch
157struct OpArg
158{
159 OpArg() {} // dummy op arg, used for storage
160 OpArg(u64 _offset, int _scale, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
161 {
162 operandReg = 0;
163 scale = (u8)_scale;
164 offsetOrBaseReg = (u16)rmReg;
165 indexReg = (u16)scaledReg;
166 //if scale == 0 never mind offsetting
167 offset = _offset;
168 }
169 bool operator==(const OpArg &b) const
170 {
171 return operandReg == b.operandReg && scale == b.scale && offsetOrBaseReg == b.offsetOrBaseReg &&
172 indexReg == b.indexReg && offset == b.offset;
173 }
174 void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
175 void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
176 void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
177 void WriteFloatModRM(XEmitter *emit, FloatOp op);
178 void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
179 // This one is public - must be written to
180 u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
181 u16 operandReg;
182
183 void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const;
184 bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;}
185 bool IsSimpleReg() const {return scale == SCALE_NONE;}
186 bool IsSimpleReg(X64Reg reg) const
187 {
188 if (!IsSimpleReg())
189 return false;
190 return GetSimpleReg() == reg;
191 }
192
193 bool CanDoOpWith(const OpArg &other) const
194 {
195 if (IsSimpleReg()) return true;
196 if (!IsSimpleReg() && !other.IsSimpleReg() && !other.IsImm()) return false;
197 return true;
198 }
199
200 int GetImmBits() const
201 {
202 switch (scale)
203 {
204 case SCALE_IMM8: return 8;
205 case SCALE_IMM16: return 16;
206 case SCALE_IMM32: return 32;
207 case SCALE_IMM64: return 64;
208 default: return -1;
209 }
210 }
211
212 void SetImmBits(int bits) {
213 switch (bits)
214 {
215 case 8: scale = SCALE_IMM8; break;
216 case 16: scale = SCALE_IMM16; break;
217 case 32: scale = SCALE_IMM32; break;
218 case 64: scale = SCALE_IMM64; break;
219 }
220 }
221
222 X64Reg GetSimpleReg() const
223 {
224 if (scale == SCALE_NONE)
225 return (X64Reg)offsetOrBaseReg;
226 else
227 return INVALID_REG;
228 }
229
230 u32 GetImmValue() const {
231 return (u32)offset;
232 }
233
234 // For loops.
235 void IncreaseOffset(int sz) {
236 offset += sz;
237 }
238
239private:
240 u8 scale;
241 u16 offsetOrBaseReg;
242 u16 indexReg;
243};
244
245inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
246template <typename T>
247inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);}
248inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
249inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
250
251inline OpArg MDisp(X64Reg value, int offset)
252{
253 return OpArg((u32)offset, SCALE_ATREG, value);
254}
255
256inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
257{
258 return OpArg(offset, scale, base, scaled);
259}
260
261inline OpArg MScaled(X64Reg scaled, int scale, int offset)
262{
263 if (scale == SCALE_1)
264 return OpArg(offset, SCALE_ATREG, scaled);
265 else
266 return OpArg(offset, scale | 0x20, RAX, scaled);
267}
268
269inline OpArg MRegSum(X64Reg base, X64Reg offset)
270{
271 return MComplex(base, offset, 1, 0);
272}
273
274inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);}
275inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
276inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
277inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
278inline OpArg UImmAuto(u32 imm) {
279 return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
280}
281inline OpArg SImmAuto(s32 imm) {
282 return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8);
283}
284
285#ifdef _ARCH_64
286inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
287#else
288inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);}
289#endif
290
291inline u32 PtrOffset(const void* ptr, const void* base)
292{
293#ifdef _ARCH_64
294 s64 distance = (s64)ptr-(s64)base;
295 if (distance >= 0x80000000LL ||
296 distance < -0x80000000LL)
297 {
298 ASSERT_MSG(0, "pointer offset out of range");
299 return 0;
300 }
301
302 return (u32)distance;
303#else
304 return (u32)ptr-(u32)base;
305#endif
306}
307
308//usage: int a[]; ARRAY_OFFSET(a,10)
309#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0]))
310//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
311#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
312
313struct FixupBranch
314{
315 u8 *ptr;
316 int type; //0 = 8bit 1 = 32bit
317};
318
319enum SSECompare
320{
321 EQ = 0,
322 LT,
323 LE,
324 UNORD,
325 NEQ,
326 NLT,
327 NLE,
328 ORD,
329};
330
331typedef const u8* JumpTarget;
332
333class XEmitter
334{
335 friend struct OpArg; // for Write8 etc
336private:
337 u8 *code;
338 bool flags_locked;
339
340 void CheckFlags();
341
342 void Rex(int w, int r, int x, int b);
343 void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
344 void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
345 void WriteMulDivType(int bits, OpArg src, int ext);
346 void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
347 void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
348 void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
349 void WriteMXCSR(OpArg arg, int ext);
350 void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
351 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
352 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
353 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
354 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
355 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
356 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
357 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
358 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
359 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
360
361 void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
362
363protected:
364 inline void Write8(u8 value) {*code++ = value;}
365 inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
366 inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
367 inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
368
369public:
370 XEmitter() { code = nullptr; flags_locked = false; }
371 XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; }
372 virtual ~XEmitter() {}
373
374 void WriteModRM(int mod, int rm, int reg);
375 void WriteSIB(int scale, int index, int base);
376
377 void SetCodePtr(u8 *ptr);
378 void ReserveCodeSpace(int bytes);
379 const u8 *AlignCode4();
380 const u8 *AlignCode16();
381 const u8 *AlignCodePage();
382 const u8 *GetCodePtr() const;
383 u8 *GetWritableCodePtr();
384
385 void LockFlags() { flags_locked = true; }
386 void UnlockFlags() { flags_locked = false; }
387
388 // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
389 // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
390 // INC and DEC are slow on Intel Core, but not on AMD. They create a
391 // false flag dependency because they only update a subset of the flags.
392 // XCHG is SLOW and should be avoided.
393
394 // Debug breakpoint
395 void INT3();
396
397 // Do nothing
398 void NOP(size_t count = 1);
399
400 // Save energy in wait-loops on P4 only. Probably not too useful.
401 void PAUSE();
402
403 // Flag control
404 void STC();
405 void CLC();
406 void CMC();
407
408 // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD!
409 void LAHF(); // 3 cycle vector path
410 void SAHF(); // direct path fast
411
412
413 // Stack control
414 void PUSH(X64Reg reg);
415 void POP(X64Reg reg);
416 void PUSH(int bits, const OpArg &reg);
417 void POP(int bits, const OpArg &reg);
418 void PUSHF();
419 void POPF();
420
421 // Flow control
422 void RET();
423 void RET_FAST();
424 void UD2();
425 FixupBranch J(bool force5bytes = false);
426
427 void JMP(const u8 * addr, bool force5Bytes = false);
428 void JMP(OpArg arg);
429 void JMPptr(const OpArg &arg);
430 void JMPself(); //infinite loop!
431#ifdef CALL
432#undef CALL
433#endif
434 void CALL(const void *fnptr);
435 void CALLptr(OpArg arg);
436
437 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
438 //void J_CC(CCFlags conditionCode, JumpTarget target);
439 void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
440
441 void SetJumpTarget(const FixupBranch &branch);
442
443 void SETcc(CCFlags flag, OpArg dest);
444 // Note: CMOV brings small if any benefit on current cpus.
445 void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag);
446
447 // Fences
448 void LFENCE();
449 void MFENCE();
450 void SFENCE();
451
452 // Bit scan
453 void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
454 void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
455
456 // Cache control
457 enum PrefetchLevel
458 {
459 PF_NTA, //Non-temporal (data used once and only once)
460 PF_T0, //All cache levels
461 PF_T1, //Levels 2+ (aliased to T0 on AMD)
462 PF_T2, //Levels 3+ (aliased to T0 on AMD)
463 };
464 void PREFETCH(PrefetchLevel level, OpArg arg);
465 void MOVNTI(int bits, OpArg dest, X64Reg src);
466 void MOVNTDQ(OpArg arg, X64Reg regOp);
467 void MOVNTPS(OpArg arg, X64Reg regOp);
468 void MOVNTPD(OpArg arg, X64Reg regOp);
469
470 // Multiplication / division
471 void MUL(int bits, OpArg src); //UNSIGNED
472 void IMUL(int bits, OpArg src); //SIGNED
473 void IMUL(int bits, X64Reg regOp, OpArg src);
474 void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
475 void DIV(int bits, OpArg src);
476 void IDIV(int bits, OpArg src);
477
478 // Shift
479 void ROL(int bits, OpArg dest, OpArg shift);
480 void ROR(int bits, OpArg dest, OpArg shift);
481 void RCL(int bits, OpArg dest, OpArg shift);
482 void RCR(int bits, OpArg dest, OpArg shift);
483 void SHL(int bits, OpArg dest, OpArg shift);
484 void SHR(int bits, OpArg dest, OpArg shift);
485 void SAR(int bits, OpArg dest, OpArg shift);
486
487 // Bit Test
488 void BT(int bits, OpArg dest, OpArg index);
489 void BTS(int bits, OpArg dest, OpArg index);
490 void BTR(int bits, OpArg dest, OpArg index);
491 void BTC(int bits, OpArg dest, OpArg index);
492
493 // Double-Precision Shift
494 void SHRD(int bits, OpArg dest, OpArg src, OpArg shift);
495 void SHLD(int bits, OpArg dest, OpArg src, OpArg shift);
496
497 // Extend EAX into EDX in various ways
498 void CWD(int bits = 16);
499 inline void CDQ() {CWD(32);}
500 inline void CQO() {CWD(64);}
501 void CBW(int bits = 8);
502 inline void CWDE() {CBW(16);}
503 inline void CDQE() {CBW(32);}
504
505 // Load effective address
506 void LEA(int bits, X64Reg dest, OpArg src);
507
508 // Integer arithmetic
509 void NEG (int bits, OpArg src);
510 void ADD (int bits, const OpArg &a1, const OpArg &a2);
511 void ADC (int bits, const OpArg &a1, const OpArg &a2);
512 void SUB (int bits, const OpArg &a1, const OpArg &a2);
513 void SBB (int bits, const OpArg &a1, const OpArg &a2);
514 void AND (int bits, const OpArg &a1, const OpArg &a2);
515 void CMP (int bits, const OpArg &a1, const OpArg &a2);
516
517 // Bit operations
518 void NOT (int bits, OpArg src);
519 void OR (int bits, const OpArg &a1, const OpArg &a2);
520 void XOR (int bits, const OpArg &a1, const OpArg &a2);
521 void MOV (int bits, const OpArg &a1, const OpArg &a2);
522 void TEST(int bits, const OpArg &a1, const OpArg &a2);
523
524 // Are these useful at all? Consider removing.
525 void XCHG(int bits, const OpArg &a1, const OpArg &a2);
526 void XCHG_AHAL();
527
528 // Byte swapping (32 and 64-bit only).
529 void BSWAP(int bits, X64Reg reg);
530
531 // Sign/zero extension
532 void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
533 void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
534
535 // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
536 void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
537
538 // Available only on AMD >= Phenom or Intel >= Haswell
539 void LZCNT(int bits, X64Reg dest, OpArg src);
540 // Note: this one is actually part of BMI1
541 void TZCNT(int bits, X64Reg dest, OpArg src);
542
543 // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
544 void STMXCSR(OpArg memloc);
545 void LDMXCSR(OpArg memloc);
546
547 // Prefixes
548 void LOCK();
549 void REP();
550 void REPNE();
551 void FSOverride();
552 void GSOverride();
553
554 // x87
555 enum x87StatusWordBits {
556 x87_InvalidOperation = 0x1,
557 x87_DenormalizedOperand = 0x2,
558 x87_DivisionByZero = 0x4,
559 x87_Overflow = 0x8,
560 x87_Underflow = 0x10,
561 x87_Precision = 0x20,
562 x87_StackFault = 0x40,
563 x87_ErrorSummary = 0x80,
564 x87_C0 = 0x100,
565 x87_C1 = 0x200,
566 x87_C2 = 0x400,
567 x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
568 x87_C3 = 0x4000,
569 x87_FPUBusy = 0x8000,
570 };
571
572 void FLD(int bits, OpArg src);
573 void FST(int bits, OpArg dest);
574 void FSTP(int bits, OpArg dest);
575 void FNSTSW_AX();
576 void FWAIT();
577
578 // SSE/SSE2: Floating point arithmetic
579 void ADDSS(X64Reg regOp, OpArg arg);
580 void ADDSD(X64Reg regOp, OpArg arg);
581 void SUBSS(X64Reg regOp, OpArg arg);
582 void SUBSD(X64Reg regOp, OpArg arg);
583 void MULSS(X64Reg regOp, OpArg arg);
584 void MULSD(X64Reg regOp, OpArg arg);
585 void DIVSS(X64Reg regOp, OpArg arg);
586 void DIVSD(X64Reg regOp, OpArg arg);
587 void MINSS(X64Reg regOp, OpArg arg);
588 void MINSD(X64Reg regOp, OpArg arg);
589 void MAXSS(X64Reg regOp, OpArg arg);
590 void MAXSD(X64Reg regOp, OpArg arg);
591 void SQRTSS(X64Reg regOp, OpArg arg);
592 void SQRTSD(X64Reg regOp, OpArg arg);
593 void RSQRTSS(X64Reg regOp, OpArg arg);
594
595 // SSE/SSE2: Floating point bitwise (yes)
596 void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
597 void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
598
599 inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
600 inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
601 inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
602 inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
603 inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
604 inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
605 inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
606
607 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
608 void ADDPS(X64Reg regOp, OpArg arg);
609 void ADDPD(X64Reg regOp, OpArg arg);
610 void SUBPS(X64Reg regOp, OpArg arg);
611 void SUBPD(X64Reg regOp, OpArg arg);
612 void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
613 void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
614 void MULPS(X64Reg regOp, OpArg arg);
615 void MULPD(X64Reg regOp, OpArg arg);
616 void DIVPS(X64Reg regOp, OpArg arg);
617 void DIVPD(X64Reg regOp, OpArg arg);
618 void MINPS(X64Reg regOp, OpArg arg);
619 void MINPD(X64Reg regOp, OpArg arg);
620 void MAXPS(X64Reg regOp, OpArg arg);
621 void MAXPD(X64Reg regOp, OpArg arg);
622 void SQRTPS(X64Reg regOp, OpArg arg);
623 void SQRTPD(X64Reg regOp, OpArg arg);
624 void RCPPS(X64Reg regOp, OpArg arg);
625 void RSQRTPS(X64Reg regOp, OpArg arg);
626
627 // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
628 void ANDPS(X64Reg regOp, OpArg arg);
629 void ANDPD(X64Reg regOp, OpArg arg);
630 void ANDNPS(X64Reg regOp, OpArg arg);
631 void ANDNPD(X64Reg regOp, OpArg arg);
632 void ORPS(X64Reg regOp, OpArg arg);
633 void ORPD(X64Reg regOp, OpArg arg);
634 void XORPS(X64Reg regOp, OpArg arg);
635 void XORPD(X64Reg regOp, OpArg arg);
636
637 // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
638 void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
639 void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
640
641 // SSE/SSE2: Useful alternative to shuffle in some cases.
642 void MOVDDUP(X64Reg regOp, OpArg arg);
643
644 // TODO: Actually implement
645#if 0
646 // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
647 void ADDSUBPS(X64Reg dest, OpArg src);
648 void ADDSUBPD(X64Reg dest, OpArg src);
649 void HADDPD(X64Reg dest, OpArg src);
650 void HSUBPS(X64Reg dest, OpArg src);
651 void HSUBPD(X64Reg dest, OpArg src);
652
653 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
654 void DPPD(X64Reg dest, OpArg src, u8 arg);
655
656 // These are probably useful for VFPU emulation.
657 void INSERTPS(X64Reg dest, OpArg src, u8 arg);
658 void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
659#endif
660
661 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
662 void HADDPS(X64Reg dest, OpArg src);
663
664 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
665 void DPPS(X64Reg dest, OpArg src, u8 arg);
666
667 void UNPCKLPS(X64Reg dest, OpArg src);
668 void UNPCKHPS(X64Reg dest, OpArg src);
669 void UNPCKLPD(X64Reg dest, OpArg src);
670 void UNPCKHPD(X64Reg dest, OpArg src);
671
672 // SSE/SSE2: Compares.
673 void COMISS(X64Reg regOp, OpArg arg);
674 void COMISD(X64Reg regOp, OpArg arg);
675 void UCOMISS(X64Reg regOp, OpArg arg);
676 void UCOMISD(X64Reg regOp, OpArg arg);
677
678 // SSE/SSE2: Moves. Use the right data type for your data, in most cases.
679 void MOVAPS(X64Reg regOp, OpArg arg);
680 void MOVAPD(X64Reg regOp, OpArg arg);
681 void MOVAPS(OpArg arg, X64Reg regOp);
682 void MOVAPD(OpArg arg, X64Reg regOp);
683
684 void MOVUPS(X64Reg regOp, OpArg arg);
685 void MOVUPD(X64Reg regOp, OpArg arg);
686 void MOVUPS(OpArg arg, X64Reg regOp);
687 void MOVUPD(OpArg arg, X64Reg regOp);
688
689 void MOVDQA(X64Reg regOp, OpArg arg);
690 void MOVDQA(OpArg arg, X64Reg regOp);
691 void MOVDQU(X64Reg regOp, OpArg arg);
692 void MOVDQU(OpArg arg, X64Reg regOp);
693
694 void MOVSS(X64Reg regOp, OpArg arg);
695 void MOVSD(X64Reg regOp, OpArg arg);
696 void MOVSS(OpArg arg, X64Reg regOp);
697 void MOVSD(OpArg arg, X64Reg regOp);
698
699 void MOVLPS(X64Reg regOp, OpArg arg);
700 void MOVLPD(X64Reg regOp, OpArg arg);
701 void MOVLPS(OpArg arg, X64Reg regOp);
702 void MOVLPD(OpArg arg, X64Reg regOp);
703
704 void MOVHPS(X64Reg regOp, OpArg arg);
705 void MOVHPD(X64Reg regOp, OpArg arg);
706 void MOVHPS(OpArg arg, X64Reg regOp);
707 void MOVHPD(OpArg arg, X64Reg regOp);
708
709 void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
710 void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
711
712 void MOVD_xmm(X64Reg dest, const OpArg &arg);
713 void MOVQ_xmm(X64Reg dest, OpArg arg);
714 void MOVD_xmm(const OpArg &arg, X64Reg src);
715 void MOVQ_xmm(OpArg arg, X64Reg src);
716
717 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
718 void MOVMSKPS(X64Reg dest, OpArg arg);
719 void MOVMSKPD(X64Reg dest, OpArg arg);
720
721 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
722 void MASKMOVDQU(X64Reg dest, X64Reg src);
723 void LDDQU(X64Reg dest, OpArg src);
724
725 // SSE/SSE2: Data type conversions.
726 void CVTPS2PD(X64Reg dest, OpArg src);
727 void CVTPD2PS(X64Reg dest, OpArg src);
728 void CVTSS2SD(X64Reg dest, OpArg src);
729 void CVTSI2SS(X64Reg dest, OpArg src);
730 void CVTSD2SS(X64Reg dest, OpArg src);
731 void CVTSI2SD(X64Reg dest, OpArg src);
732 void CVTDQ2PD(X64Reg regOp, OpArg arg);
733 void CVTPD2DQ(X64Reg regOp, OpArg arg);
734 void CVTDQ2PS(X64Reg regOp, OpArg arg);
735 void CVTPS2DQ(X64Reg regOp, OpArg arg);
736
737 void CVTTPS2DQ(X64Reg regOp, OpArg arg);
738 void CVTTPD2DQ(X64Reg regOp, OpArg arg);
739
740 // Destinations are X64 regs (rax, rbx, ...) for these instructions.
741 void CVTSS2SI(X64Reg xregdest, OpArg src);
742 void CVTSD2SI(X64Reg xregdest, OpArg src);
743 void CVTTSS2SI(X64Reg xregdest, OpArg arg);
744 void CVTTSD2SI(X64Reg xregdest, OpArg arg);
745
746 // SSE2: Packed integer instructions
747 void PACKSSDW(X64Reg dest, OpArg arg);
748 void PACKSSWB(X64Reg dest, OpArg arg);
749 void PACKUSDW(X64Reg dest, OpArg arg);
750 void PACKUSWB(X64Reg dest, OpArg arg);
751
752 void PUNPCKLBW(X64Reg dest, const OpArg &arg);
753 void PUNPCKLWD(X64Reg dest, const OpArg &arg);
754 void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
755 void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
756
757 void PTEST(X64Reg dest, OpArg arg);
758 void PAND(X64Reg dest, OpArg arg);
759 void PANDN(X64Reg dest, OpArg arg);
760 void PXOR(X64Reg dest, OpArg arg);
761 void POR(X64Reg dest, OpArg arg);
762
763 void PADDB(X64Reg dest, OpArg arg);
764 void PADDW(X64Reg dest, OpArg arg);
765 void PADDD(X64Reg dest, OpArg arg);
766 void PADDQ(X64Reg dest, OpArg arg);
767
768 void PADDSB(X64Reg dest, OpArg arg);
769 void PADDSW(X64Reg dest, OpArg arg);
770 void PADDUSB(X64Reg dest, OpArg arg);
771 void PADDUSW(X64Reg dest, OpArg arg);
772
773 void PSUBB(X64Reg dest, OpArg arg);
774 void PSUBW(X64Reg dest, OpArg arg);
775 void PSUBD(X64Reg dest, OpArg arg);
776 void PSUBQ(X64Reg dest, OpArg arg);
777
778 void PSUBSB(X64Reg dest, OpArg arg);
779 void PSUBSW(X64Reg dest, OpArg arg);
780 void PSUBUSB(X64Reg dest, OpArg arg);
781 void PSUBUSW(X64Reg dest, OpArg arg);
782
783 void PAVGB(X64Reg dest, OpArg arg);
784 void PAVGW(X64Reg dest, OpArg arg);
785
786 void PCMPEQB(X64Reg dest, OpArg arg);
787 void PCMPEQW(X64Reg dest, OpArg arg);
788 void PCMPEQD(X64Reg dest, OpArg arg);
789
790 void PCMPGTB(X64Reg dest, OpArg arg);
791 void PCMPGTW(X64Reg dest, OpArg arg);
792 void PCMPGTD(X64Reg dest, OpArg arg);
793
794 void PEXTRW(X64Reg dest, OpArg arg, u8 subreg);
795 void PINSRW(X64Reg dest, OpArg arg, u8 subreg);
796
797 void PMADDWD(X64Reg dest, OpArg arg);
798 void PSADBW(X64Reg dest, OpArg arg);
799
800 void PMAXSW(X64Reg dest, OpArg arg);
801 void PMAXUB(X64Reg dest, OpArg arg);
802 void PMINSW(X64Reg dest, OpArg arg);
803 void PMINUB(X64Reg dest, OpArg arg);
804 // SSE4: More MAX/MIN instructions.
805 void PMINSB(X64Reg dest, OpArg arg);
806 void PMINSD(X64Reg dest, OpArg arg);
807 void PMINUW(X64Reg dest, OpArg arg);
808 void PMINUD(X64Reg dest, OpArg arg);
809 void PMAXSB(X64Reg dest, OpArg arg);
810 void PMAXSD(X64Reg dest, OpArg arg);
811 void PMAXUW(X64Reg dest, OpArg arg);
812 void PMAXUD(X64Reg dest, OpArg arg);
813
814 void PMOVMSKB(X64Reg dest, OpArg arg);
815 void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
816 void PSHUFB(X64Reg dest, OpArg arg);
817
818 void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
819 void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
820
821 void PSRLW(X64Reg reg, int shift);
822 void PSRLD(X64Reg reg, int shift);
823 void PSRLQ(X64Reg reg, int shift);
824 void PSRLQ(X64Reg reg, OpArg arg);
825 void PSRLDQ(X64Reg reg, int shift);
826
827 void PSLLW(X64Reg reg, int shift);
828 void PSLLD(X64Reg reg, int shift);
829 void PSLLQ(X64Reg reg, int shift);
830 void PSLLDQ(X64Reg reg, int shift);
831
832 void PSRAW(X64Reg reg, int shift);
833 void PSRAD(X64Reg reg, int shift);
834
835 // SSE4: data type conversions
836 void PMOVSXBW(X64Reg dest, OpArg arg);
837 void PMOVSXBD(X64Reg dest, OpArg arg);
838 void PMOVSXBQ(X64Reg dest, OpArg arg);
839 void PMOVSXWD(X64Reg dest, OpArg arg);
840 void PMOVSXWQ(X64Reg dest, OpArg arg);
841 void PMOVSXDQ(X64Reg dest, OpArg arg);
842 void PMOVZXBW(X64Reg dest, OpArg arg);
843 void PMOVZXBD(X64Reg dest, OpArg arg);
844 void PMOVZXBQ(X64Reg dest, OpArg arg);
845 void PMOVZXWD(X64Reg dest, OpArg arg);
846 void PMOVZXWQ(X64Reg dest, OpArg arg);
847 void PMOVZXDQ(X64Reg dest, OpArg arg);
848
849 // SSE4: variable blend instructions (xmm0 implicit argument)
850 void PBLENDVB(X64Reg dest, OpArg arg);
851 void BLENDVPS(X64Reg dest, OpArg arg);
852 void BLENDVPD(X64Reg dest, OpArg arg);
853 void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
854 void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
855
856 // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
857 void ROUNDSS(X64Reg dest, OpArg arg, u8 mode);
858 void ROUNDSD(X64Reg dest, OpArg arg, u8 mode);
859 void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
860 void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
861
862 inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
863 inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
864 inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
865 inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
866
867 inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
868 inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
869 inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
870 inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
871
872 inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
873 inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
874 inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
875 inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
876
877 inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
878 inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
879 inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
880 inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
881
882 // AVX
883 void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
884 void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
885 void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
886 void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
887 void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
888 void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
889 void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
890 void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
891 void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
892 void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
893 void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
894 void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
895
896 void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
897 void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
898 void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
899 void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
900 void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
901 void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
902 void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
903 void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
904
905 void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
906 void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
907 void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
908 void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
909
910 // FMA3
911 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
912 void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
913 void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
914 void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
915 void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
916 void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
917 void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
918 void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
919 void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
920 void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
921 void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
922 void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
923 void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
924 void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
925 void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
926 void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
927 void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
928 void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
929 void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
930 void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
931 void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
932 void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
933 void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
934 void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
935 void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
936 void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
937 void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
938 void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
939 void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
940 void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
941 void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
942 void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
943 void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
944 void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
945 void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
946 void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
947 void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
948 void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
949 void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
950 void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
951 void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
952 void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
953 void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
954 void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
955 void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
956 void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
957 void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
958 void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
959 void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
960 void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
961 void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
962 void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
963 void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
964 void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
965 void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
966 void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
967 void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
968 void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
969 void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
970 void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
971
972 // VEX GPR instructions
973 void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
974 void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
975 void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
976 void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
977 void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
978 void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
979 void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
980 void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
981 void BLSR(int bits, X64Reg regOp, OpArg arg);
982 void BLSMSK(int bits, X64Reg regOp, OpArg arg);
983 void BLSI(int bits, X64Reg regOp, OpArg arg);
984 void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
985 void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
986
987 void RDTSC();
988
989 // Utility functions
990 // The difference between this and CALL is that this aligns the stack
991 // where appropriate.
992 void ABI_CallFunction(const void *func);
993 template <typename T>
994 void ABI_CallFunction(T (*func)()) {
995 ABI_CallFunction((const void *)func);
996 }
997
998 void ABI_CallFunction(const u8 *func) {
999 ABI_CallFunction((const void *)func);
1000 }
1001 void ABI_CallFunctionC16(const void *func, u16 param1);
1002 void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
1003
1004
1005 // These only support u32 parameters, but that's enough for a lot of uses.
1006 // These will destroy the 1 or 2 first "parameter regs".
1007 void ABI_CallFunctionC(const void *func, u32 param1);
1008 void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2);
1009 void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3);
1010 void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3);
1011 void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4);
1012 void ABI_CallFunctionP(const void *func, void *param1);
1013 void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2);
1014 void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3);
1015 void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3);
1016 void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2);
1017 void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3);
1018 void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1);
1019 void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
1020
1021 // Pass a register as a parameter.
1022 void ABI_CallFunctionR(const void *func, X64Reg reg1);
1023 void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
1024
1025 template <typename Tr, typename T1>
1026 void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
1027 ABI_CallFunctionC((const void *)func, param1);
1028 }
1029
1030 // A function that doesn't have any control over what it will do to regs,
1031 // such as the dispatcher, should be surrounded by these.
1032 void ABI_PushAllCalleeSavedRegsAndAdjustStack();
1033 void ABI_PopAllCalleeSavedRegsAndAdjustStack();
1034
1035 // A function that doesn't know anything about it's surroundings, should
1036 // be surrounded by these to establish a safe environment, where it can roam free.
1037 // An example is a backpatch injected function.
1038 void ABI_PushAllCallerSavedRegsAndAdjustStack();
1039 void ABI_PopAllCallerSavedRegsAndAdjustStack();
1040
1041 unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
1042 void ABI_AlignStack(unsigned int frameSize);
1043 void ABI_RestoreStack(unsigned int frameSize);
1044
1045 // Sets up a __cdecl function.
1046 // Only x64 really needs the parameter count.
1047 void ABI_EmitPrologue(int maxCallParams);
1048 void ABI_EmitEpilogue(int maxCallParams);
1049
1050 #ifdef _M_IX86
1051 inline int ABI_GetNumXMMRegs() { return 8; }
1052 #else
1053 inline int ABI_GetNumXMMRegs() { return 16; }
1054 #endif
1055}; // class XEmitter
1056
1057
1058// Everything that needs to generate X86 code should inherit from this.
1059// You get memory management for free, plus, you can use all the MOV etc functions without
1060// having to prefix them with gen-> or something similar.
1061
1062class XCodeBlock : public CodeBlock<XEmitter> {
1063public:
1064 void PoisonMemory() override;
1065};
1066
1067} // namespace