summaryrefslogtreecommitdiff
path: root/src/common/x64/emitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/x64/emitter.cpp')
-rw-r--r--src/common/x64/emitter.cpp2583
1 files changed, 0 insertions, 2583 deletions
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
deleted file mode 100644
index f5930abec..000000000
--- a/src/common/x64/emitter.cpp
+++ /dev/null
@@ -1,2583 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include <cinttypes>
19#include <cstring>
20#include "abi.h"
21#include "common/assert.h"
22#include "common/logging/log.h"
23#include "common/memory_util.h"
24#include "cpu_detect.h"
25#include "emitter.h"
26
27namespace Gen {
28
29struct NormalOpDef {
30 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
31};
32
33// 0xCC is code for invalid combination of immediates
34static const NormalOpDef normalops[11] = {
35 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD
36 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC
37
38 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB
39 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB
40
41 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND
42 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR
43
44 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR
45 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV
46
47 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from)
48 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP
49
50 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG
51};
52
53enum NormalSSEOps {
54 sseCMP = 0xC2,
55 sseADD = 0x58, // ADD
56 sseSUB = 0x5C, // SUB
57 sseAND = 0x54, // AND
58 sseANDN = 0x55, // ANDN
59 sseOR = 0x56,
60 sseXOR = 0x57,
61 sseMUL = 0x59, // MUL
62 sseDIV = 0x5E, // DIV
63 sseMIN = 0x5D, // MIN
64 sseMAX = 0x5F, // MAX
65 sseCOMIS = 0x2F, // COMIS
66 sseUCOMIS = 0x2E, // UCOMIS
67 sseSQRT = 0x51, // SQRT
68 sseRSQRT = 0x52, // RSQRT (NO DOUBLE PRECISION!!!)
69 sseRCP = 0x53, // RCP
70 sseMOVAPfromRM = 0x28, // MOVAP from RM
71 sseMOVAPtoRM = 0x29, // MOVAP to RM
72 sseMOVUPfromRM = 0x10, // MOVUP from RM
73 sseMOVUPtoRM = 0x11, // MOVUP to RM
74 sseMOVLPfromRM = 0x12,
75 sseMOVLPtoRM = 0x13,
76 sseMOVHPfromRM = 0x16,
77 sseMOVHPtoRM = 0x17,
78 sseMOVHLPS = 0x12,
79 sseMOVLHPS = 0x16,
80 sseMOVDQfromRM = 0x6F,
81 sseMOVDQtoRM = 0x7F,
82 sseMASKMOVDQU = 0xF7,
83 sseLDDQU = 0xF0,
84 sseSHUF = 0xC6,
85 sseMOVNTDQ = 0xE7,
86 sseMOVNTP = 0x2B,
87 sseHADD = 0x7C,
88};
89
90void XEmitter::SetCodePtr(u8* ptr) {
91 code = ptr;
92}
93
94const u8* XEmitter::GetCodePtr() const {
95 return code;
96}
97
98u8* XEmitter::GetWritableCodePtr() {
99 return code;
100}
101
102void XEmitter::Write8(u8 value) {
103 *code++ = value;
104}
105
106void XEmitter::Write16(u16 value) {
107 std::memcpy(code, &value, sizeof(u16));
108 code += sizeof(u16);
109}
110
111void XEmitter::Write32(u32 value) {
112 std::memcpy(code, &value, sizeof(u32));
113 code += sizeof(u32);
114}
115
116void XEmitter::Write64(u64 value) {
117 std::memcpy(code, &value, sizeof(u64));
118 code += sizeof(u64);
119}
120
121void XEmitter::ReserveCodeSpace(int bytes) {
122 for (int i = 0; i < bytes; i++)
123 *code++ = 0xCC;
124}
125
126const u8* XEmitter::AlignCode4() {
127 int c = int((u64)code & 3);
128 if (c)
129 ReserveCodeSpace(4 - c);
130 return code;
131}
132
133const u8* XEmitter::AlignCode16() {
134 int c = int((u64)code & 15);
135 if (c)
136 ReserveCodeSpace(16 - c);
137 return code;
138}
139
140const u8* XEmitter::AlignCodePage() {
141 int c = int((u64)code & 4095);
142 if (c)
143 ReserveCodeSpace(4096 - c);
144 return code;
145}
146
147// This operation modifies flags; check to see the flags are locked.
148// If the flags are locked, we should immediately and loudly fail before
149// causing a subtle JIT bug.
150void XEmitter::CheckFlags() {
151 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
152}
153
154void XEmitter::WriteModRM(int mod, int reg, int rm) {
155 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
156}
157
158void XEmitter::WriteSIB(int scale, int index, int base) {
159 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
160}
161
162void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const {
163 if (customOp == -1)
164 customOp = operandReg;
165#ifdef ARCHITECTURE_x86_64
166 u8 op = 0x40;
167 // REX.W (whether operation is a 64-bit operation)
168 if (opBits == 64)
169 op |= 8;
170 // REX.R (whether ModR/M reg field refers to R8-R15.
171 if (customOp & 8)
172 op |= 4;
173 // REX.X (whether ModR/M SIB index field refers to R8-R15)
174 if (indexReg & 8)
175 op |= 2;
176 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
177 if (offsetOrBaseReg & 8)
178 op |= 1;
179 // Write REX if wr have REX bits to write, or if the operation accesses
180 // SIL, DIL, BPL, or SPL.
181 if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
182 (opBits == 8 && (customOp & 0x10c) == 4)) {
183 emit->Write8(op);
184 // Check the operation doesn't access AH, BH, CH, or DH.
185 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
186 DEBUG_ASSERT((customOp & 0x100) == 0);
187 }
188#else
189 DEBUG_ASSERT(opBits != 64);
190 DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1);
191 DEBUG_ASSERT((indexReg & 8) == 0);
192 DEBUG_ASSERT((offsetOrBaseReg & 8) == 0);
193 DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1);
194 DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4);
195#endif
196}
197
198void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
199 int W) const {
200 int R = !(regOp1 & 8);
201 int X = !(indexReg & 8);
202 int B = !(offsetOrBaseReg & 8);
203
204 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
205
206 // do we need any VEX fields that only appear in the three-byte form?
207 if (X == 1 && B == 1 && W == 0 && mmmmm == 1) {
208 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp;
209 emit->Write8(0xC5);
210 emit->Write8(RvvvvLpp);
211 } else {
212 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
213 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp;
214 emit->Write8(0xC4);
215 emit->Write8(RXBmmmmm);
216 emit->Write8(WvvvvLpp);
217 }
218}
219
220void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg,
221 bool warn_64bit_offset) const {
222 if (_operandReg == INVALID_REG)
223 _operandReg = (X64Reg)this->operandReg;
224 int mod = 0;
225 int ireg = indexReg;
226 bool SIB = false;
227 int _offsetOrBaseReg = this->offsetOrBaseReg;
228
229 if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address
230 {
231 // Oh, RIP addressing.
232 _offsetOrBaseReg = 5;
233 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
234// TODO : add some checks
235#ifdef ARCHITECTURE_x86_64
236 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
237 s64 distance = (s64)offset - (s64)ripAddr;
238 ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset,
239 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr,
240 offset);
241 s32 offs = (s32)distance;
242 emit->Write32((u32)offs);
243#else
244 emit->Write32((u32)offset);
245#endif
246 return;
247 }
248
249 if (scale == 0) {
250 // Oh, no memory, Just a reg.
251 mod = 3; // 11
252 } else if (scale >= 1) {
253 // Ah good, no scaling.
254 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) {
255 // Okay, we're good. No SIB necessary.
256 int ioff = (int)offset;
257 if (ioff == 0) {
258 mod = 0;
259 } else if (ioff < -128 || ioff > 127) {
260 mod = 2; // 32-bit displacement
261 } else {
262 mod = 1; // 8-bit displacement
263 }
264 } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) {
265 SIB = true;
266 mod = 0;
267 _offsetOrBaseReg = 5;
268 } else // if (scale != SCALE_ATREG)
269 {
270 if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :(
271 {
272 // So we have to fake it with SIB encoding :(
273 SIB = true;
274 }
275
276 if (scale >= SCALE_1 && scale < SCALE_ATREG) {
277 SIB = true;
278 }
279
280 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) {
281 SIB = true;
282 ireg = _offsetOrBaseReg;
283 }
284
285 // Okay, we're fine. Just disp encoding.
286 // We need displacement. Which size?
287 int ioff = (int)(s64)offset;
288 if (ioff < -128 || ioff > 127) {
289 mod = 2; // 32-bit displacement
290 } else {
291 mod = 1; // 8-bit displacement
292 }
293 }
294 }
295
296 // Okay. Time to do the actual writing
297 // ModRM byte:
298 int oreg = _offsetOrBaseReg;
299 if (SIB)
300 oreg = 4;
301
302 // TODO(ector): WTF is this if about? I don't remember writing it :-)
303 // if (RIP)
304 // oreg = 5;
305
306 emit->WriteModRM(mod, _operandReg & 7, oreg & 7);
307
308 if (SIB) {
309 // SIB byte
310 int ss;
311 switch (scale) {
312 case SCALE_NONE:
313 _offsetOrBaseReg = 4;
314 ss = 0;
315 break; // RSP
316 case SCALE_1:
317 ss = 0;
318 break;
319 case SCALE_2:
320 ss = 1;
321 break;
322 case SCALE_4:
323 ss = 2;
324 break;
325 case SCALE_8:
326 ss = 3;
327 break;
328 case SCALE_NOBASE_2:
329 ss = 1;
330 break;
331 case SCALE_NOBASE_4:
332 ss = 2;
333 break;
334 case SCALE_NOBASE_8:
335 ss = 3;
336 break;
337 case SCALE_ATREG:
338 ss = 0;
339 break;
340 default:
341 ASSERT_MSG(0, "Invalid scale for SIB byte");
342 ss = 0;
343 break;
344 }
345 emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7)));
346 }
347
348 if (mod == 1) // 8-bit disp
349 {
350 emit->Write8((u8)(s8)(s32)offset);
351 } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp
352 {
353 emit->Write32((u32)offset);
354 }
355}
356
357// W = operand extended width (1 if 64-bit)
358// R = register# upper bit
359// X = scale amnt upper bit
360// B = base register# upper bit
361void XEmitter::Rex(int w, int r, int x, int b) {
362 w = w ? 1 : 0;
363 r = r ? 1 : 0;
364 x = x ? 1 : 0;
365 b = b ? 1 : 0;
366 u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b));
367 if (rx != 0x40)
368 Write8(rx);
369}
370
371void XEmitter::JMP(const u8* addr, bool force5Bytes) {
372 u64 fn = (u64)addr;
373 if (!force5Bytes) {
374 s64 distance = (s64)(fn - ((u64)code + 2));
375 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
376 "Jump target too far away, needs force5Bytes = true");
377 // 8 bits will do
378 Write8(0xEB);
379 Write8((u8)(s8)distance);
380 } else {
381 s64 distance = (s64)(fn - ((u64)code + 5));
382
383 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
384 "Jump target too far away, needs indirect register");
385 Write8(0xE9);
386 Write32((u32)(s32)distance);
387 }
388}
389
390void XEmitter::JMPptr(const OpArg& arg2) {
391 OpArg arg = arg2;
392 if (arg.IsImm())
393 ASSERT_MSG(0, "JMPptr - Imm argument");
394 arg.operandReg = 4;
395 arg.WriteRex(this, 0, 0);
396 Write8(0xFF);
397 arg.WriteRest(this);
398}
399
400// Can be used to trap other processors, before overwriting their code
401// not used in dolphin
402void XEmitter::JMPself() {
403 Write8(0xEB);
404 Write8(0xFE);
405}
406
407void XEmitter::CALLptr(OpArg arg) {
408 if (arg.IsImm())
409 ASSERT_MSG(0, "CALLptr - Imm argument");
410 arg.operandReg = 2;
411 arg.WriteRex(this, 0, 0);
412 Write8(0xFF);
413 arg.WriteRest(this);
414}
415
416void XEmitter::CALL(const void* fnptr) {
417 u64 distance = u64(fnptr) - (u64(code) + 5);
418 ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL,
419 "CALL out of range (%p calls %p)", code, fnptr);
420 Write8(0xE8);
421 Write32(u32(distance));
422}
423
424FixupBranch XEmitter::CALL() {
425 FixupBranch branch;
426 branch.type = 1;
427 branch.ptr = code + 5;
428
429 Write8(0xE8);
430 Write32(0);
431
432 return branch;
433}
434
435FixupBranch XEmitter::J(bool force5bytes) {
436 FixupBranch branch;
437 branch.type = force5bytes ? 1 : 0;
438 branch.ptr = code + (force5bytes ? 5 : 2);
439 if (!force5bytes) {
440 // 8 bits will do
441 Write8(0xEB);
442 Write8(0);
443 } else {
444 Write8(0xE9);
445 Write32(0);
446 }
447 return branch;
448}
449
450FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) {
451 FixupBranch branch;
452 branch.type = force5bytes ? 1 : 0;
453 branch.ptr = code + (force5bytes ? 6 : 2);
454 if (!force5bytes) {
455 // 8 bits will do
456 Write8(0x70 + conditionCode);
457 Write8(0);
458 } else {
459 Write8(0x0F);
460 Write8(0x80 + conditionCode);
461 Write32(0);
462 }
463 return branch;
464}
465
466void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) {
467 u64 fn = (u64)addr;
468 s64 distance = (s64)(fn - ((u64)code + 2));
469 if (distance < -0x80 || distance >= 0x80 || force5bytes) {
470 distance = (s64)(fn - ((u64)code + 6));
471 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
472 "Jump target too far away, needs indirect register");
473 Write8(0x0F);
474 Write8(0x80 + conditionCode);
475 Write32((u32)(s32)distance);
476 } else {
477 Write8(0x70 + conditionCode);
478 Write8((u8)(s8)distance);
479 }
480}
481
482void XEmitter::SetJumpTarget(const FixupBranch& branch) {
483 if (branch.type == 0) {
484 s64 distance = (s64)(code - branch.ptr);
485 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
486 "Jump target too far away, needs force5Bytes = true");
487 branch.ptr[-1] = (u8)(s8)distance;
488 } else if (branch.type == 1) {
489 s64 distance = (s64)(code - branch.ptr);
490 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
491 "Jump target too far away, needs indirect register");
492 ((s32*)branch.ptr)[-1] = (s32)distance;
493 }
494}
495
496void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) {
497 if (branch.type == 0) {
498 s64 distance = (s64)(target - branch.ptr);
499 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
500 "Jump target too far away, needs force5Bytes = true");
501 branch.ptr[-1] = (u8)(s8)distance;
502 } else if (branch.type == 1) {
503 s64 distance = (s64)(target - branch.ptr);
504 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
505 "Jump target too far away, needs indirect register");
506 ((s32*)branch.ptr)[-1] = (s32)distance;
507 }
508}
509
510// Single byte opcodes
511// There is no PUSHAD/POPAD in 64-bit mode.
512void XEmitter::INT3() {
513 Write8(0xCC);
514}
515void XEmitter::RET() {
516 Write8(0xC3);
517}
518void XEmitter::RET_FAST() {
519 Write8(0xF3);
520 Write8(0xC3);
521} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a
522 // ret
523
524// The first sign of decadence: optimized NOPs.
525void XEmitter::NOP(size_t size) {
526 DEBUG_ASSERT((int)size > 0);
527 while (true) {
528 switch (size) {
529 case 0:
530 return;
531 case 1:
532 Write8(0x90);
533 return;
534 case 2:
535 Write8(0x66);
536 Write8(0x90);
537 return;
538 case 3:
539 Write8(0x0F);
540 Write8(0x1F);
541 Write8(0x00);
542 return;
543 case 4:
544 Write8(0x0F);
545 Write8(0x1F);
546 Write8(0x40);
547 Write8(0x00);
548 return;
549 case 5:
550 Write8(0x0F);
551 Write8(0x1F);
552 Write8(0x44);
553 Write8(0x00);
554 Write8(0x00);
555 return;
556 case 6:
557 Write8(0x66);
558 Write8(0x0F);
559 Write8(0x1F);
560 Write8(0x44);
561 Write8(0x00);
562 Write8(0x00);
563 return;
564 case 7:
565 Write8(0x0F);
566 Write8(0x1F);
567 Write8(0x80);
568 Write8(0x00);
569 Write8(0x00);
570 Write8(0x00);
571 Write8(0x00);
572 return;
573 case 8:
574 Write8(0x0F);
575 Write8(0x1F);
576 Write8(0x84);
577 Write8(0x00);
578 Write8(0x00);
579 Write8(0x00);
580 Write8(0x00);
581 Write8(0x00);
582 return;
583 case 9:
584 Write8(0x66);
585 Write8(0x0F);
586 Write8(0x1F);
587 Write8(0x84);
588 Write8(0x00);
589 Write8(0x00);
590 Write8(0x00);
591 Write8(0x00);
592 Write8(0x00);
593 return;
594 case 10:
595 Write8(0x66);
596 Write8(0x66);
597 Write8(0x0F);
598 Write8(0x1F);
599 Write8(0x84);
600 Write8(0x00);
601 Write8(0x00);
602 Write8(0x00);
603 Write8(0x00);
604 Write8(0x00);
605 return;
606 default:
607 // Even though x86 instructions are allowed to be up to 15 bytes long,
608 // AMD advises against using NOPs longer than 11 bytes because they
609 // carry a performance penalty on CPUs older than AMD family 16h.
610 Write8(0x66);
611 Write8(0x66);
612 Write8(0x66);
613 Write8(0x0F);
614 Write8(0x1F);
615 Write8(0x84);
616 Write8(0x00);
617 Write8(0x00);
618 Write8(0x00);
619 Write8(0x00);
620 Write8(0x00);
621 size -= 11;
622 continue;
623 }
624 }
625}
626
627void XEmitter::PAUSE() {
628 Write8(0xF3);
629 NOP();
630} // use in tight spinloops for energy saving on some cpu
631void XEmitter::CLC() {
632 CheckFlags();
633 Write8(0xF8);
634} // clear carry
635void XEmitter::CMC() {
636 CheckFlags();
637 Write8(0xF5);
638} // flip carry
639void XEmitter::STC() {
640 CheckFlags();
641 Write8(0xF9);
642} // set carry
643
644// TODO: xchg ah, al ???
645void XEmitter::XCHG_AHAL() {
646 Write8(0x86);
647 Write8(0xe0);
648 // alt. 86 c4
649}
650
651// These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
652void XEmitter::LAHF() {
653 Write8(0x9F);
654}
655void XEmitter::SAHF() {
656 CheckFlags();
657 Write8(0x9E);
658}
659
660void XEmitter::PUSHF() {
661 Write8(0x9C);
662}
663void XEmitter::POPF() {
664 CheckFlags();
665 Write8(0x9D);
666}
667
668void XEmitter::LFENCE() {
669 Write8(0x0F);
670 Write8(0xAE);
671 Write8(0xE8);
672}
673void XEmitter::MFENCE() {
674 Write8(0x0F);
675 Write8(0xAE);
676 Write8(0xF0);
677}
678void XEmitter::SFENCE() {
679 Write8(0x0F);
680 Write8(0xAE);
681 Write8(0xF8);
682}
683
684void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) {
685 if (bits == 16)
686 Write8(0x66);
687 Rex(bits == 64, 0, 0, (int)reg >> 3);
688 Write8(byte + ((int)reg & 7));
689}
690
691void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) {
692 if (bits == 16)
693 Write8(0x66);
694 Rex(bits == 64, 0, 0, (int)reg >> 3);
695 Write8(byte1);
696 Write8(byte2 + ((int)reg & 7));
697}
698
699void XEmitter::CWD(int bits) {
700 if (bits == 16)
701 Write8(0x66);
702 Rex(bits == 64, 0, 0, 0);
703 Write8(0x99);
704}
705
706void XEmitter::CBW(int bits) {
707 if (bits == 8)
708 Write8(0x66);
709 Rex(bits == 32, 0, 0, 0);
710 Write8(0x98);
711}
712
713// Simple opcodes
714
715// push/pop do not need wide to be 64-bit
716void XEmitter::PUSH(X64Reg reg) {
717 WriteSimple1Byte(32, 0x50, reg);
718}
719void XEmitter::POP(X64Reg reg) {
720 WriteSimple1Byte(32, 0x58, reg);
721}
722
723void XEmitter::PUSH(int bits, const OpArg& reg) {
724 if (reg.IsSimpleReg())
725 PUSH(reg.GetSimpleReg());
726 else if (reg.IsImm()) {
727 switch (reg.GetImmBits()) {
728 case 8:
729 Write8(0x6A);
730 Write8((u8)(s8)reg.offset);
731 break;
732 case 16:
733 Write8(0x66);
734 Write8(0x68);
735 Write16((u16)(s16)(s32)reg.offset);
736 break;
737 case 32:
738 Write8(0x68);
739 Write32((u32)reg.offset);
740 break;
741 default:
742 ASSERT_MSG(0, "PUSH - Bad imm bits");
743 break;
744 }
745 } else {
746 if (bits == 16)
747 Write8(0x66);
748 reg.WriteRex(this, bits, bits);
749 Write8(0xFF);
750 reg.WriteRest(this, 0, (X64Reg)6);
751 }
752}
753
754void XEmitter::POP(int /*bits*/, const OpArg& reg) {
755 if (reg.IsSimpleReg())
756 POP(reg.GetSimpleReg());
757 else
758 ASSERT_MSG(0, "POP - Unsupported encoding");
759}
760
761void XEmitter::BSWAP(int bits, X64Reg reg) {
762 if (bits >= 32) {
763 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
764 } else if (bits == 16) {
765 ROL(16, R(reg), Imm8(8));
766 } else if (bits == 8) {
767 // Do nothing - can't bswap a single byte...
768 } else {
769 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
770 }
771}
772
773// Undefined opcode - reserved
774// If we ever need a way to always cause a non-breakpoint hard exception...
775void XEmitter::UD2() {
776 Write8(0x0F);
777 Write8(0x0B);
778}
779
780void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) {
781 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
782 arg.operandReg = (u8)level;
783 arg.WriteRex(this, 0, 0);
784 Write8(0x0F);
785 Write8(0x18);
786 arg.WriteRest(this);
787}
788
789void XEmitter::SETcc(CCFlags flag, OpArg dest) {
790 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
791 dest.operandReg = 0;
792 dest.WriteRex(this, 0, 8);
793 Write8(0x0F);
794 Write8(0x90 + (u8)flag);
795 dest.WriteRest(this);
796}
797
798void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) {
799 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
800 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
801 if (bits == 16)
802 Write8(0x66);
803 src.operandReg = dest;
804 src.WriteRex(this, bits, bits);
805 Write8(0x0F);
806 Write8(0x40 + (u8)flag);
807 src.WriteRest(this);
808}
809
810void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) {
811 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
812 CheckFlags();
813 src.operandReg = ext;
814 if (bits == 16)
815 Write8(0x66);
816 src.WriteRex(this, bits, bits, 0);
817 if (bits == 8) {
818 Write8(0xF6);
819 } else {
820 Write8(0xF7);
821 }
822 src.WriteRest(this);
823}
824
825void XEmitter::MUL(int bits, const OpArg& src) {
826 WriteMulDivType(bits, src, 4);
827}
828void XEmitter::DIV(int bits, const OpArg& src) {
829 WriteMulDivType(bits, src, 6);
830}
831void XEmitter::IMUL(int bits, const OpArg& src) {
832 WriteMulDivType(bits, src, 5);
833}
834void XEmitter::IDIV(int bits, const OpArg& src) {
835 WriteMulDivType(bits, src, 7);
836}
837void XEmitter::NEG(int bits, const OpArg& src) {
838 WriteMulDivType(bits, src, 3);
839}
840void XEmitter::NOT(int bits, const OpArg& src) {
841 WriteMulDivType(bits, src, 2);
842}
843
844void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) {
845 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
846 CheckFlags();
847 src.operandReg = (u8)dest;
848 if (bits == 16)
849 Write8(0x66);
850 if (rep)
851 Write8(0xF3);
852 src.WriteRex(this, bits, bits);
853 Write8(0x0F);
854 Write8(byte2);
855 src.WriteRest(this);
856}
857
858void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) {
859 if (bits <= 16)
860 ASSERT_MSG(0, "MOVNTI - bits<=16");
861 WriteBitSearchType(bits, src, dest, 0xC3);
862}
863
864void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {
865 WriteBitSearchType(bits, dest, src, 0xBC);
866} // Bottom bit to top bit
867void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {
868 WriteBitSearchType(bits, dest, src, 0xBD);
869} // Top bit to bottom bit
870
871void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) {
872 CheckFlags();
873 if (!Common::GetCPUCaps().bmi1)
874 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
875 WriteBitSearchType(bits, dest, src, 0xBC, true);
876}
877void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) {
878 CheckFlags();
879 if (!Common::GetCPUCaps().lzcnt)
880 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
881 WriteBitSearchType(bits, dest, src, 0xBD, true);
882}
883
884void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) {
885 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
886 if (dbits == sbits) {
887 MOV(dbits, R(dest), src);
888 return;
889 }
890 src.operandReg = (u8)dest;
891 if (dbits == 16)
892 Write8(0x66);
893 src.WriteRex(this, dbits, sbits);
894 if (sbits == 8) {
895 Write8(0x0F);
896 Write8(0xBE);
897 } else if (sbits == 16) {
898 Write8(0x0F);
899 Write8(0xBF);
900 } else if (sbits == 32 && dbits == 64) {
901 Write8(0x63);
902 } else {
903 Crash();
904 }
905 src.WriteRest(this);
906}
907
908void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) {
909 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
910 if (dbits == sbits) {
911 MOV(dbits, R(dest), src);
912 return;
913 }
914 src.operandReg = (u8)dest;
915 if (dbits == 16)
916 Write8(0x66);
917 // the 32bit result is automatically zero extended to 64bit
918 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
919 if (sbits == 8) {
920 Write8(0x0F);
921 Write8(0xB6);
922 } else if (sbits == 16) {
923 Write8(0x0F);
924 Write8(0xB7);
925 } else if (sbits == 32 && dbits == 64) {
926 Write8(0x8B);
927 } else {
928 ASSERT_MSG(0, "MOVZX - Invalid size");
929 }
930 src.WriteRest(this);
931}
932
933void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) {
934 ASSERT_MSG(Common::GetCPUCaps().movbe,
935 "Generating MOVBE on a system that does not support it.");
936 if (bits == 8) {
937 MOV(bits, dest, src);
938 return;
939 }
940
941 if (bits == 16)
942 Write8(0x66);
943
944 if (dest.IsSimpleReg()) {
945 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
946 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
947 Write8(0x0F);
948 Write8(0x38);
949 Write8(0xF0);
950 src.WriteRest(this, 0, dest.GetSimpleReg());
951 } else if (src.IsSimpleReg()) {
952 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
953 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
954 Write8(0x0F);
955 Write8(0x38);
956 Write8(0xF1);
957 dest.WriteRest(this, 0, src.GetSimpleReg());
958 } else {
959 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
960 }
961}
962
963void XEmitter::LEA(int bits, X64Reg dest, OpArg src) {
964 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
965 src.operandReg = (u8)dest;
966 if (bits == 16)
967 Write8(0x66); // TODO: performance warning
968 src.WriteRex(this, bits, bits);
969 Write8(0x8D);
970 src.WriteRest(this, 0, INVALID_REG, bits == 64);
971}
972
973// shift can be either imm8 or cl
974void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) {
975 CheckFlags();
976 bool writeImm = false;
977 if (dest.IsImm()) {
978 ASSERT_MSG(0, "WriteShift - can't shift imms");
979 }
980 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
981 (shift.IsImm() && shift.GetImmBits() != 8)) {
982 ASSERT_MSG(0, "WriteShift - illegal argument");
983 }
984 dest.operandReg = ext;
985 if (bits == 16)
986 Write8(0x66);
987 dest.WriteRex(this, bits, bits, 0);
988 if (shift.GetImmBits() == 8) {
989 // ok an imm
990 u8 imm = (u8)shift.offset;
991 if (imm == 1) {
992 Write8(bits == 8 ? 0xD0 : 0xD1);
993 } else {
994 writeImm = true;
995 Write8(bits == 8 ? 0xC0 : 0xC1);
996 }
997 } else {
998 Write8(bits == 8 ? 0xD2 : 0xD3);
999 }
1000 dest.WriteRest(this, writeImm ? 1 : 0);
1001 if (writeImm)
1002 Write8((u8)shift.offset);
1003}
1004
1005// large rotates and shift are slower on intel than amd
1006// intel likes to rotate by 1, and the op is smaller too
1007void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {
1008 WriteShift(bits, dest, shift, 0);
1009}
1010void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {
1011 WriteShift(bits, dest, shift, 1);
1012}
1013void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {
1014 WriteShift(bits, dest, shift, 2);
1015}
1016void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {
1017 WriteShift(bits, dest, shift, 3);
1018}
1019void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {
1020 WriteShift(bits, dest, shift, 4);
1021}
1022void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {
1023 WriteShift(bits, dest, shift, 5);
1024}
1025void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {
1026 WriteShift(bits, dest, shift, 7);
1027}
1028
1029// index can be either imm8 or register, don't use memory destination because it's slow
1030void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) {
1031 CheckFlags();
1032 if (dest.IsImm()) {
1033 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1034 }
1035 if ((index.IsImm() && index.GetImmBits() != 8)) {
1036 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1037 }
1038 if (bits == 16)
1039 Write8(0x66);
1040 if (index.IsImm()) {
1041 dest.WriteRex(this, bits, bits);
1042 Write8(0x0F);
1043 Write8(0xBA);
1044 dest.WriteRest(this, 1, (X64Reg)ext);
1045 Write8((u8)index.offset);
1046 } else {
1047 X64Reg operand = index.GetSimpleReg();
1048 dest.WriteRex(this, bits, bits, operand);
1049 Write8(0x0F);
1050 Write8(0x83 + 8 * ext);
1051 dest.WriteRest(this, 1, operand);
1052 }
1053}
1054
1055void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {
1056 WriteBitTest(bits, dest, index, 4);
1057}
1058void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {
1059 WriteBitTest(bits, dest, index, 5);
1060}
1061void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {
1062 WriteBitTest(bits, dest, index, 6);
1063}
1064void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {
1065 WriteBitTest(bits, dest, index, 7);
1066}
1067
1068// shift can be either imm8 or cl
1069void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1070 CheckFlags();
1071 if (dest.IsImm()) {
1072 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1073 }
1074 if (!src.IsSimpleReg()) {
1075 ASSERT_MSG(0, "SHRD - must use simple register as source");
1076 }
1077 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1078 (shift.IsImm() && shift.GetImmBits() != 8)) {
1079 ASSERT_MSG(0, "SHRD - illegal shift");
1080 }
1081 if (bits == 16)
1082 Write8(0x66);
1083 X64Reg operand = src.GetSimpleReg();
1084 dest.WriteRex(this, bits, bits, operand);
1085 if (shift.GetImmBits() == 8) {
1086 Write8(0x0F);
1087 Write8(0xAC);
1088 dest.WriteRest(this, 1, operand);
1089 Write8((u8)shift.offset);
1090 } else {
1091 Write8(0x0F);
1092 Write8(0xAD);
1093 dest.WriteRest(this, 0, operand);
1094 }
1095}
1096
1097void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1098 CheckFlags();
1099 if (dest.IsImm()) {
1100 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1101 }
1102 if (!src.IsSimpleReg()) {
1103 ASSERT_MSG(0, "SHLD - must use simple register as source");
1104 }
1105 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1106 (shift.IsImm() && shift.GetImmBits() != 8)) {
1107 ASSERT_MSG(0, "SHLD - illegal shift");
1108 }
1109 if (bits == 16)
1110 Write8(0x66);
1111 X64Reg operand = src.GetSimpleReg();
1112 dest.WriteRex(this, bits, bits, operand);
1113 if (shift.GetImmBits() == 8) {
1114 Write8(0x0F);
1115 Write8(0xA4);
1116 dest.WriteRest(this, 1, operand);
1117 Write8((u8)shift.offset);
1118 } else {
1119 Write8(0x0F);
1120 Write8(0xA5);
1121 dest.WriteRest(this, 0, operand);
1122 }
1123}
1124
1125void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) {
1126 if (bits == 16)
1127 emit->Write8(0x66);
1128
1129 this->operandReg = (u8)_operandReg;
1130 WriteRex(emit, bits, bits);
1131 emit->Write8(op);
1132 WriteRest(emit);
1133}
1134
1135// operand can either be immediate or register
1136void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
1137 int bits) const {
1138 X64Reg _operandReg;
1139 if (IsImm()) {
1140 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1141 }
1142
1143 if (bits == 16)
1144 emit->Write8(0x66);
1145
1146 int immToWrite = 0;
1147
1148 if (operand.IsImm()) {
1149 WriteRex(emit, bits, bits);
1150
1151 if (!toRM) {
1152 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1153 }
1154
1155 if (operand.scale == SCALE_IMM8 && bits == 8) {
1156 // op al, imm8
1157 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) {
1158 emit->Write8(normalops[op].eaximm8);
1159 emit->Write8((u8)operand.offset);
1160 return;
1161 }
1162 // mov reg, imm8
1163 if (!scale && op == nrmMOV) {
1164 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1165 emit->Write8((u8)operand.offset);
1166 return;
1167 }
1168 // op r/m8, imm8
1169 emit->Write8(normalops[op].imm8);
1170 immToWrite = 8;
1171 } else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1172 (operand.scale == SCALE_IMM32 && bits == 32) ||
1173 (operand.scale == SCALE_IMM32 && bits == 64)) {
1174 // Try to save immediate size if we can, but first check to see
1175 // if the instruction supports simm8.
1176 // op r/m, imm8
1177 if (normalops[op].simm8 != 0xCC &&
1178 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1179 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) {
1180 emit->Write8(normalops[op].simm8);
1181 immToWrite = 8;
1182 } else {
1183 // mov reg, imm
1184 if (!scale && op == nrmMOV && bits != 64) {
1185 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1186 if (bits == 16)
1187 emit->Write16((u16)operand.offset);
1188 else
1189 emit->Write32((u32)operand.offset);
1190 return;
1191 }
1192 // op eax, imm
1193 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) {
1194 emit->Write8(normalops[op].eaximm32);
1195 if (bits == 16)
1196 emit->Write16((u16)operand.offset);
1197 else
1198 emit->Write32((u32)operand.offset);
1199 return;
1200 }
1201 // op r/m, imm
1202 emit->Write8(normalops[op].imm32);
1203 immToWrite = bits == 16 ? 16 : 32;
1204 }
1205 } else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1206 (operand.scale == SCALE_IMM8 && bits == 32) ||
1207 (operand.scale == SCALE_IMM8 && bits == 64)) {
1208 // op r/m, imm8
1209 emit->Write8(normalops[op].simm8);
1210 immToWrite = 8;
1211 } else if (operand.scale == SCALE_IMM64 && bits == 64) {
1212 if (scale) {
1213 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1214 }
1215 // mov reg64, imm64
1216 else if (op == nrmMOV) {
1217 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1218 emit->Write64((u64)operand.offset);
1219 return;
1220 }
1221 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1222 } else {
1223 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1224 }
1225 _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM
1226 } else {
1227 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1228 WriteRex(emit, bits, bits, _operandReg);
1229 // op r/m, reg
1230 if (toRM) {
1231 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1232 }
1233 // op reg, r/m
1234 else {
1235 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1236 }
1237 }
1238 WriteRest(emit, immToWrite >> 3, _operandReg);
1239 switch (immToWrite) {
1240 case 0:
1241 break;
1242 case 8:
1243 emit->Write8((u8)operand.offset);
1244 break;
1245 case 16:
1246 emit->Write16((u16)operand.offset);
1247 break;
1248 case 32:
1249 emit->Write32((u32)operand.offset);
1250 break;
1251 default:
1252 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1253 }
1254}
1255
1256void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1,
1257 const OpArg& a2) {
1258 if (a1.IsImm()) {
1259 // Booh! Can't write to an imm
1260 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1261 return;
1262 }
1263 if (a2.IsImm()) {
1264 a1.WriteNormalOp(emit, true, op, a2, bits);
1265 } else {
1266 if (a1.IsSimpleReg()) {
1267 a2.WriteNormalOp(emit, false, op, a1, bits);
1268 } else {
1269 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(),
1270 "WriteNormalOp - a1 and a2 cannot both be memory");
1271 a1.WriteNormalOp(emit, true, op, a2, bits);
1272 }
1273 }
1274}
1275
1276void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) {
1277 CheckFlags();
1278 WriteNormalOp(this, bits, nrmADD, a1, a2);
1279}
1280void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) {
1281 CheckFlags();
1282 WriteNormalOp(this, bits, nrmADC, a1, a2);
1283}
1284void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) {
1285 CheckFlags();
1286 WriteNormalOp(this, bits, nrmSUB, a1, a2);
1287}
1288void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) {
1289 CheckFlags();
1290 WriteNormalOp(this, bits, nrmSBB, a1, a2);
1291}
1292void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) {
1293 CheckFlags();
1294 WriteNormalOp(this, bits, nrmAND, a1, a2);
1295}
1296void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) {
1297 CheckFlags();
1298 WriteNormalOp(this, bits, nrmOR, a1, a2);
1299}
1300void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) {
1301 CheckFlags();
1302 WriteNormalOp(this, bits, nrmXOR, a1, a2);
1303}
1304void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) {
1305 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1306 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1307 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1308}
1309void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {
1310 CheckFlags();
1311 WriteNormalOp(this, bits, nrmTEST, a1, a2);
1312}
1313void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) {
1314 CheckFlags();
1315 WriteNormalOp(this, bits, nrmCMP, a1, a2);
1316}
1317void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {
1318 WriteNormalOp(this, bits, nrmXCHG, a1, a2);
1319}
1320
1321void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) {
1322 CheckFlags();
1323 if (bits == 8) {
1324 ASSERT_MSG(0, "IMUL - illegal bit size!");
1325 return;
1326 }
1327
1328 if (a1.IsImm()) {
1329 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1330 return;
1331 }
1332
1333 if (!a2.IsImm()) {
1334 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1335 return;
1336 }
1337
1338 if (bits == 16)
1339 Write8(0x66);
1340 a1.WriteRex(this, bits, bits, regOp);
1341
1342 if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1343 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) {
1344 Write8(0x6B);
1345 a1.WriteRest(this, 1, regOp);
1346 Write8((u8)a2.offset);
1347 } else {
1348 Write8(0x69);
1349 if (a2.GetImmBits() == 16 && bits == 16) {
1350 a1.WriteRest(this, 2, regOp);
1351 Write16((u16)a2.offset);
1352 } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) {
1353 a1.WriteRest(this, 4, regOp);
1354 Write32((u32)a2.offset);
1355 } else {
1356 ASSERT_MSG(0, "IMUL - unhandled case!");
1357 }
1358 }
1359}
1360
1361void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) {
1362 CheckFlags();
1363 if (bits == 8) {
1364 ASSERT_MSG(0, "IMUL - illegal bit size!");
1365 return;
1366 }
1367
1368 if (a.IsImm()) {
1369 IMUL(bits, regOp, R(regOp), a);
1370 return;
1371 }
1372
1373 if (bits == 16)
1374 Write8(0x66);
1375 a.WriteRex(this, bits, bits, regOp);
1376 Write8(0x0F);
1377 Write8(0xAF);
1378 a.WriteRest(this, 0, regOp);
1379}
1380
1381void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) {
1382 if (opPrefix)
1383 Write8(opPrefix);
1384 arg.operandReg = regOp;
1385 arg.WriteRex(this, 0, 0);
1386 Write8(0x0F);
1387 if (op > 0xFF)
1388 Write8((op >> 8) & 0xFF);
1389 Write8(op & 0xFF);
1390 arg.WriteRest(this, extrabytes);
1391}
1392
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1394 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1395}
1396
1397static int GetVEXmmmmm(u16 op) {
1398 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1399 if ((op >> 8) == 0x3A)
1400 return 3;
1401 if ((op >> 8) == 0x38)
1402 return 2;
1403
1404 return 1;
1405}
1406
1407static int GetVEXpp(u8 opPrefix) {
1408 if (opPrefix == 0x66)
1409 return 1;
1410 if (opPrefix == 0xF3)
1411 return 2;
1412 if (opPrefix == 0xF2)
1413 return 3;
1414
1415 return 0;
1416}
1417
1418void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
1419 int extrabytes) {
1420 if (!Common::GetCPUCaps().avx)
1421 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1422 int mmmmm = GetVEXmmmmm(op);
1423 int pp = GetVEXpp(opPrefix);
1424 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size
1425 // here
1426 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1427 Write8(op & 0xFF);
1428 arg.WriteRest(this, extrabytes, regOp1);
1429}
1430
1431// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1432void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1433 const OpArg& arg, int extrabytes) {
1434 if (size != 32 && size != 64)
1435 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1436 int mmmmm = GetVEXmmmmm(op);
1437 int pp = GetVEXpp(opPrefix);
1438 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
1439 Write8(op & 0xFF);
1440 arg.WriteRest(this, extrabytes, regOp1);
1441}
1442
1443void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1444 const OpArg& arg, int extrabytes) {
1445 CheckFlags();
1446 if (!Common::GetCPUCaps().bmi1)
1447 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1448 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1449}
1450
1451void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1452 const OpArg& arg, int extrabytes) {
1453 CheckFlags();
1454 if (!Common::GetCPUCaps().bmi2)
1455 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1456 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1457}
1458
1459void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) {
1460 WriteSSEOp(0x66, 0x6E, dest, arg, 0);
1461}
1462void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) {
1463 WriteSSEOp(0x66, 0x7E, src, arg, 0);
1464}
1465
1466void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
1467#ifdef ARCHITECTURE_x86_64
1468 // Alternate encoding
1469 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1470 arg.operandReg = dest;
1471 Write8(0x66);
1472 arg.WriteRex(this, 64, 0);
1473 Write8(0x0f);
1474 Write8(0x6E);
1475 arg.WriteRest(this, 0);
1476#else
1477 arg.operandReg = dest;
1478 Write8(0xF3);
1479 Write8(0x0f);
1480 Write8(0x7E);
1481 arg.WriteRest(this, 0);
1482#endif
1483}
1484
1485void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
1486 if (src > 7 || arg.IsSimpleReg()) {
1487 // Alternate encoding
1488 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1489 arg.operandReg = src;
1490 Write8(0x66);
1491 arg.WriteRex(this, 64, 0);
1492 Write8(0x0f);
1493 Write8(0x7E);
1494 arg.WriteRest(this, 0);
1495 } else {
1496 arg.operandReg = src;
1497 arg.WriteRex(this, 0, 0);
1498 Write8(0x66);
1499 Write8(0x0f);
1500 Write8(0xD6);
1501 arg.WriteRest(this, 0);
1502 }
1503}
1504
1505void XEmitter::WriteMXCSR(OpArg arg, int ext) {
1506 if (arg.IsImm() || arg.IsSimpleReg())
1507 ASSERT_MSG(0, "MXCSR - invalid operand");
1508
1509 arg.operandReg = ext;
1510 arg.WriteRex(this, 0, 0);
1511 Write8(0x0F);
1512 Write8(0xAE);
1513 arg.WriteRest(this);
1514}
1515
1516void XEmitter::STMXCSR(const OpArg& memloc) {
1517 WriteMXCSR(memloc, 3);
1518}
1519void XEmitter::LDMXCSR(const OpArg& memloc) {
1520 WriteMXCSR(memloc, 2);
1521}
1522
1523void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {
1524 WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);
1525}
1526void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {
1527 WriteSSEOp(0x00, sseMOVNTP, regOp, arg);
1528}
1529void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {
1530 WriteSSEOp(0x66, sseMOVNTP, regOp, arg);
1531}
1532
1533void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {
1534 WriteSSEOp(0xF3, sseADD, regOp, arg);
1535}
1536void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {
1537 WriteSSEOp(0xF2, sseADD, regOp, arg);
1538}
1539void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {
1540 WriteSSEOp(0xF3, sseSUB, regOp, arg);
1541}
1542void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {
1543 WriteSSEOp(0xF2, sseSUB, regOp, arg);
1544}
1545void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {
1546 WriteSSEOp(0xF3, sseCMP, regOp, arg, 1);
1547 Write8(compare);
1548}
1549void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {
1550 WriteSSEOp(0xF2, sseCMP, regOp, arg, 1);
1551 Write8(compare);
1552}
1553void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {
1554 WriteSSEOp(0xF3, sseMUL, regOp, arg);
1555}
1556void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {
1557 WriteSSEOp(0xF2, sseMUL, regOp, arg);
1558}
1559void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {
1560 WriteSSEOp(0xF3, sseDIV, regOp, arg);
1561}
1562void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {
1563 WriteSSEOp(0xF2, sseDIV, regOp, arg);
1564}
1565void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {
1566 WriteSSEOp(0xF3, sseMIN, regOp, arg);
1567}
1568void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {
1569 WriteSSEOp(0xF2, sseMIN, regOp, arg);
1570}
1571void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {
1572 WriteSSEOp(0xF3, sseMAX, regOp, arg);
1573}
1574void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {
1575 WriteSSEOp(0xF2, sseMAX, regOp, arg);
1576}
1577void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {
1578 WriteSSEOp(0xF3, sseSQRT, regOp, arg);
1579}
1580void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {
1581 WriteSSEOp(0xF2, sseSQRT, regOp, arg);
1582}
1583void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {
1584 WriteSSEOp(0xF3, sseRCP, regOp, arg);
1585}
1586void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {
1587 WriteSSEOp(0xF3, sseRSQRT, regOp, arg);
1588}
1589
1590void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {
1591 WriteSSEOp(0x00, sseADD, regOp, arg);
1592}
1593void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {
1594 WriteSSEOp(0x66, sseADD, regOp, arg);
1595}
1596void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {
1597 WriteSSEOp(0x00, sseSUB, regOp, arg);
1598}
1599void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {
1600 WriteSSEOp(0x66, sseSUB, regOp, arg);
1601}
1602void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {
1603 WriteSSEOp(0x00, sseCMP, regOp, arg, 1);
1604 Write8(compare);
1605}
1606void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {
1607 WriteSSEOp(0x66, sseCMP, regOp, arg, 1);
1608 Write8(compare);
1609}
1610void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {
1611 WriteSSEOp(0x00, sseAND, regOp, arg);
1612}
1613void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {
1614 WriteSSEOp(0x66, sseAND, regOp, arg);
1615}
1616void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {
1617 WriteSSEOp(0x00, sseANDN, regOp, arg);
1618}
1619void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {
1620 WriteSSEOp(0x66, sseANDN, regOp, arg);
1621}
1622void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {
1623 WriteSSEOp(0x00, sseOR, regOp, arg);
1624}
1625void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {
1626 WriteSSEOp(0x66, sseOR, regOp, arg);
1627}
1628void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {
1629 WriteSSEOp(0x00, sseXOR, regOp, arg);
1630}
1631void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {
1632 WriteSSEOp(0x66, sseXOR, regOp, arg);
1633}
1634void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {
1635 WriteSSEOp(0x00, sseMUL, regOp, arg);
1636}
1637void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {
1638 WriteSSEOp(0x66, sseMUL, regOp, arg);
1639}
1640void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {
1641 WriteSSEOp(0x00, sseDIV, regOp, arg);
1642}
1643void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {
1644 WriteSSEOp(0x66, sseDIV, regOp, arg);
1645}
1646void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {
1647 WriteSSEOp(0x00, sseMIN, regOp, arg);
1648}
1649void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {
1650 WriteSSEOp(0x66, sseMIN, regOp, arg);
1651}
1652void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {
1653 WriteSSEOp(0x00, sseMAX, regOp, arg);
1654}
1655void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {
1656 WriteSSEOp(0x66, sseMAX, regOp, arg);
1657}
1658void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {
1659 WriteSSEOp(0x00, sseSQRT, regOp, arg);
1660}
1661void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {
1662 WriteSSEOp(0x66, sseSQRT, regOp, arg);
1663}
1664void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) {
1665 WriteSSEOp(0x00, sseRCP, regOp, arg);
1666}
1667void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {
1668 WriteSSEOp(0x00, sseRSQRT, regOp, arg);
1669}
1670void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1671 WriteSSEOp(0x00, sseSHUF, regOp, arg, 1);
1672 Write8(shuffle);
1673}
1674void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1675 WriteSSEOp(0x66, sseSHUF, regOp, arg, 1);
1676 Write8(shuffle);
1677}
1678
1679void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {
1680 WriteSSEOp(0xF2, sseHADD, regOp, arg);
1681}
1682
1683void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {
1684 WriteSSEOp(0x00, sseCOMIS, regOp, arg);
1685} // weird that these should be packed
1686void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {
1687 WriteSSEOp(0x66, sseCOMIS, regOp, arg);
1688} // ordered
1689void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {
1690 WriteSSEOp(0x00, sseUCOMIS, regOp, arg);
1691} // unordered
1692void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {
1693 WriteSSEOp(0x66, sseUCOMIS, regOp, arg);
1694}
1695
1696void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {
1697 WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);
1698}
1699void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {
1700 WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);
1701}
1702void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {
1703 WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);
1704}
1705void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {
1706 WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);
1707}
1708
1709void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {
1710 WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);
1711}
1712void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {
1713 WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);
1714}
1715void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {
1716 WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);
1717}
1718void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {
1719 WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);
1720}
1721
1722void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {
1723 WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);
1724}
1725void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {
1726 WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);
1727}
1728void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {
1729 WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);
1730}
1731void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {
1732 WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);
1733}
1734
1735void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {
1736 WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);
1737}
1738void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {
1739 WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);
1740}
1741void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {
1742 WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);
1743}
1744void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {
1745 WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);
1746}
1747
1748void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) {
1749 WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg);
1750}
1751void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) {
1752 WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg);
1753}
1754void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) {
1755 WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg);
1756}
1757void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) {
1758 WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg);
1759}
1760
1761void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) {
1762 WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg);
1763}
1764void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) {
1765 WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg);
1766}
1767void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) {
1768 WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg);
1769}
1770void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) {
1771 WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg);
1772}
1773
1774void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {
1775 WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));
1776}
1777void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {
1778 WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));
1779}
1780
1781void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {
1782 WriteSSEOp(0x00, 0x5A, regOp, arg);
1783}
1784void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {
1785 WriteSSEOp(0x66, 0x5A, regOp, arg);
1786}
1787
1788void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {
1789 WriteSSEOp(0xF2, 0x5A, regOp, arg);
1790}
1791void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {
1792 WriteSSEOp(0xF3, 0x5A, regOp, arg);
1793}
1794void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {
1795 WriteSSEOp(0xF2, 0x2D, regOp, arg);
1796}
1797void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {
1798 WriteSSEOp(0xF3, 0x2D, regOp, arg);
1799}
1800void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {
1801 WriteSSEOp(0xF2, 0x2A, regOp, arg);
1802}
1803void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {
1804 WriteSSEOp(0xF3, 0x2A, regOp, arg);
1805}
1806
1807void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {
1808 WriteSSEOp(0xF3, 0xE6, regOp, arg);
1809}
1810void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {
1811 WriteSSEOp(0x00, 0x5B, regOp, arg);
1812}
1813void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {
1814 WriteSSEOp(0xF2, 0xE6, regOp, arg);
1815}
1816void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {
1817 WriteSSEOp(0x66, 0x5B, regOp, arg);
1818}
1819
1820void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {
1821 WriteSSEOp(0xF2, 0x2C, regOp, arg);
1822}
1823void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {
1824 WriteSSEOp(0xF3, 0x2C, regOp, arg);
1825}
1826void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {
1827 WriteSSEOp(0xF3, 0x5B, regOp, arg);
1828}
1829void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {
1830 WriteSSEOp(0x66, 0xE6, regOp, arg);
1831}
1832
1833void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {
1834 WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));
1835}
1836
1837void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {
1838 WriteSSEOp(0x00, 0x50, dest, arg);
1839}
1840void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {
1841 WriteSSEOp(0x66, 0x50, dest, arg);
1842}
1843
1844void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {
1845 WriteSSEOp(0xF2, sseLDDQU, dest, arg);
1846} // For integer data only
1847
1848// THESE TWO ARE UNTESTED.
1849void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {
1850 WriteSSEOp(0x00, 0x14, dest, arg);
1851}
1852void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {
1853 WriteSSEOp(0x00, 0x15, dest, arg);
1854}
1855
1856void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {
1857 WriteSSEOp(0x66, 0x14, dest, arg);
1858}
1859void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {
1860 WriteSSEOp(0x66, 0x15, dest, arg);
1861}
1862
1863void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) {
1864 if (Common::GetCPUCaps().sse3) {
1865 WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup
1866 } else {
1867 // Simulate this instruction with SSE2 instructions
1868 if (!arg.IsSimpleReg(regOp))
1869 MOVSD(regOp, arg);
1870 UNPCKLPD(regOp, R(regOp));
1871 }
1872}
1873
1874// There are a few more left
1875
1876// Also some integer instructions are missing
1877void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {
1878 WriteSSEOp(0x66, 0x6B, dest, arg);
1879}
1880void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {
1881 WriteSSEOp(0x66, 0x63, dest, arg);
1882}
1883void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {
1884 WriteSSEOp(0x66, 0x67, dest, arg);
1885}
1886
1887void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) {
1888 WriteSSEOp(0x66, 0x60, dest, arg);
1889}
1890void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) {
1891 WriteSSEOp(0x66, 0x61, dest, arg);
1892}
1893void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) {
1894 WriteSSEOp(0x66, 0x62, dest, arg);
1895}
1896void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) {
1897 WriteSSEOp(0x66, 0x6C, dest, arg);
1898}
1899
1900void XEmitter::PSRLW(X64Reg reg, int shift) {
1901 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1902 Write8(shift);
1903}
1904
1905void XEmitter::PSRLD(X64Reg reg, int shift) {
1906 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1907 Write8(shift);
1908}
1909
1910void XEmitter::PSRLQ(X64Reg reg, int shift) {
1911 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1912 Write8(shift);
1913}
1914
1915void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) {
1916 WriteSSEOp(0x66, 0xd3, reg, arg);
1917}
1918
1919void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1920 WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
1921 Write8(shift);
1922}
1923
1924void XEmitter::PSLLW(X64Reg reg, int shift) {
1925 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1926 Write8(shift);
1927}
1928
1929void XEmitter::PSLLD(X64Reg reg, int shift) {
1930 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1931 Write8(shift);
1932}
1933
1934void XEmitter::PSLLQ(X64Reg reg, int shift) {
1935 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1936 Write8(shift);
1937}
1938
1939void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1940 WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg));
1941 Write8(shift);
1942}
1943
1944void XEmitter::PSRAW(X64Reg reg, int shift) {
1945 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1946 Write8(shift);
1947}
1948
1949void XEmitter::PSRAD(X64Reg reg, int shift) {
1950 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1951 Write8(shift);
1952}
1953
1954void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1955 if (!Common::GetCPUCaps().ssse3)
1956 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1957 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1958}
1959
1960void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1961 if (!Common::GetCPUCaps().sse4_1)
1962 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1963 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1964}
1965
1966void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {
1967 WriteSSSE3Op(0x66, 0x3800, dest, arg);
1968}
1969void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {
1970 WriteSSE41Op(0x66, 0x3817, dest, arg);
1971}
1972void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {
1973 WriteSSE41Op(0x66, 0x382b, dest, arg);
1974}
1975void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {
1976 WriteSSE41Op(0x66, 0x3A40, dest, arg, 1);
1977 Write8(mask);
1978}
1979
1980void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {
1981 WriteSSE41Op(0x66, 0x3838, dest, arg);
1982}
1983void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {
1984 WriteSSE41Op(0x66, 0x3839, dest, arg);
1985}
1986void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {
1987 WriteSSE41Op(0x66, 0x383a, dest, arg);
1988}
1989void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {
1990 WriteSSE41Op(0x66, 0x383b, dest, arg);
1991}
1992void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {
1993 WriteSSE41Op(0x66, 0x383c, dest, arg);
1994}
1995void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {
1996 WriteSSE41Op(0x66, 0x383d, dest, arg);
1997}
1998void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {
1999 WriteSSE41Op(0x66, 0x383e, dest, arg);
2000}
2001void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {
2002 WriteSSE41Op(0x66, 0x383f, dest, arg);
2003}
2004
2005void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {
2006 WriteSSE41Op(0x66, 0x3820, dest, arg);
2007}
2008void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {
2009 WriteSSE41Op(0x66, 0x3821, dest, arg);
2010}
2011void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {
2012 WriteSSE41Op(0x66, 0x3822, dest, arg);
2013}
2014void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {
2015 WriteSSE41Op(0x66, 0x3823, dest, arg);
2016}
2017void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {
2018 WriteSSE41Op(0x66, 0x3824, dest, arg);
2019}
2020void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {
2021 WriteSSE41Op(0x66, 0x3825, dest, arg);
2022}
2023void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {
2024 WriteSSE41Op(0x66, 0x3830, dest, arg);
2025}
2026void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {
2027 WriteSSE41Op(0x66, 0x3831, dest, arg);
2028}
2029void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {
2030 WriteSSE41Op(0x66, 0x3832, dest, arg);
2031}
2032void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {
2033 WriteSSE41Op(0x66, 0x3833, dest, arg);
2034}
2035void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {
2036 WriteSSE41Op(0x66, 0x3834, dest, arg);
2037}
2038void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {
2039 WriteSSE41Op(0x66, 0x3835, dest, arg);
2040}
2041
2042void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {
2043 WriteSSE41Op(0x66, 0x3810, dest, arg);
2044}
2045void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {
2046 WriteSSE41Op(0x66, 0x3814, dest, arg);
2047}
2048void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {
2049 WriteSSE41Op(0x66, 0x3815, dest, arg);
2050}
2051void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) {
2052 WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1);
2053 Write8(blend);
2054}
2055void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) {
2056 WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1);
2057 Write8(blend);
2058}
2059
2060void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {
2061 WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1);
2062 Write8(mode);
2063}
2064void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {
2065 WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1);
2066 Write8(mode);
2067}
2068void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {
2069 WriteSSE41Op(0x66, 0x3A08, dest, arg, 1);
2070 Write8(mode);
2071}
2072void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {
2073 WriteSSE41Op(0x66, 0x3A09, dest, arg, 1);
2074 Write8(mode);
2075}
2076
2077void XEmitter::PAND(X64Reg dest, const OpArg& arg) {
2078 WriteSSEOp(0x66, 0xDB, dest, arg);
2079}
2080void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {
2081 WriteSSEOp(0x66, 0xDF, dest, arg);
2082}
2083void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {
2084 WriteSSEOp(0x66, 0xEF, dest, arg);
2085}
2086void XEmitter::POR(X64Reg dest, const OpArg& arg) {
2087 WriteSSEOp(0x66, 0xEB, dest, arg);
2088}
2089
2090void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {
2091 WriteSSEOp(0x66, 0xFC, dest, arg);
2092}
2093void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {
2094 WriteSSEOp(0x66, 0xFD, dest, arg);
2095}
2096void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {
2097 WriteSSEOp(0x66, 0xFE, dest, arg);
2098}
2099void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {
2100 WriteSSEOp(0x66, 0xD4, dest, arg);
2101}
2102
2103void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {
2104 WriteSSEOp(0x66, 0xEC, dest, arg);
2105}
2106void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {
2107 WriteSSEOp(0x66, 0xED, dest, arg);
2108}
2109void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {
2110 WriteSSEOp(0x66, 0xDC, dest, arg);
2111}
2112void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {
2113 WriteSSEOp(0x66, 0xDD, dest, arg);
2114}
2115
2116void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {
2117 WriteSSEOp(0x66, 0xF8, dest, arg);
2118}
2119void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {
2120 WriteSSEOp(0x66, 0xF9, dest, arg);
2121}
2122void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {
2123 WriteSSEOp(0x66, 0xFA, dest, arg);
2124}
2125void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {
2126 WriteSSEOp(0x66, 0xFB, dest, arg);
2127}
2128
2129void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {
2130 WriteSSEOp(0x66, 0xE8, dest, arg);
2131}
2132void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {
2133 WriteSSEOp(0x66, 0xE9, dest, arg);
2134}
2135void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {
2136 WriteSSEOp(0x66, 0xD8, dest, arg);
2137}
2138void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {
2139 WriteSSEOp(0x66, 0xD9, dest, arg);
2140}
2141
2142void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {
2143 WriteSSEOp(0x66, 0xE0, dest, arg);
2144}
2145void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {
2146 WriteSSEOp(0x66, 0xE3, dest, arg);
2147}
2148
2149void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {
2150 WriteSSEOp(0x66, 0x74, dest, arg);
2151}
2152void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {
2153 WriteSSEOp(0x66, 0x75, dest, arg);
2154}
2155void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {
2156 WriteSSEOp(0x66, 0x76, dest, arg);
2157}
2158
2159void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {
2160 WriteSSEOp(0x66, 0x64, dest, arg);
2161}
2162void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {
2163 WriteSSEOp(0x66, 0x65, dest, arg);
2164}
2165void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {
2166 WriteSSEOp(0x66, 0x66, dest, arg);
2167}
2168
2169void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2170 WriteSSEOp(0x66, 0xC5, dest, arg, 1);
2171 Write8(subreg);
2172}
2173void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2174 WriteSSEOp(0x66, 0xC4, dest, arg, 1);
2175 Write8(subreg);
2176}
2177
2178void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {
2179 WriteSSEOp(0x66, 0xF5, dest, arg);
2180}
2181void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {
2182 WriteSSEOp(0x66, 0xF6, dest, arg);
2183}
2184
2185void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {
2186 WriteSSEOp(0x66, 0xEE, dest, arg);
2187}
2188void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {
2189 WriteSSEOp(0x66, 0xDE, dest, arg);
2190}
2191void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {
2192 WriteSSEOp(0x66, 0xEA, dest, arg);
2193}
2194void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {
2195 WriteSSEOp(0x66, 0xDA, dest, arg);
2196}
2197
2198void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {
2199 WriteSSEOp(0x66, 0xD7, dest, arg);
2200}
2201void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2202 WriteSSEOp(0x66, 0x70, regOp, arg, 1);
2203 Write8(shuffle);
2204}
2205void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2206 WriteSSEOp(0xF2, 0x70, regOp, arg, 1);
2207 Write8(shuffle);
2208}
2209void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2210 WriteSSEOp(0xF3, 0x70, regOp, arg, 1);
2211 Write8(shuffle);
2212}
2213
2214// VEX
2215void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2216 WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);
2217}
2218void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2219 WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);
2220}
2221void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2222 WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);
2223}
2224void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2225 WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);
2226}
2227void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2228 WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);
2229}
2230void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2231 WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);
2232}
2233void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2234 WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);
2235}
2236void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2237 WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);
2238}
2239void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2240 WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);
2241}
2242void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {
2243 WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1);
2244 Write8(shuffle);
2245}
2246void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2247 WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);
2248}
2249void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2250 WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);
2251}
2252
2253void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2254 WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg);
2255}
2256void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2257 WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg);
2258}
2259void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2260 WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg);
2261}
2262void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2263 WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg);
2264}
2265void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2266 WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg);
2267}
2268void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2269 WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg);
2270}
2271void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2272 WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg);
2273}
2274void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2275 WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg);
2276}
2277
2278void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2279 WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg);
2280}
2281void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2282 WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg);
2283}
2284void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2285 WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg);
2286}
2287void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2288 WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg);
2289}
2290
2291void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2292 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg);
2293}
2294void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2295 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg);
2296}
2297void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2298 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg);
2299}
2300void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2301 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1);
2302}
2303void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2304 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1);
2305}
2306void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2307 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1);
2308}
2309void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2310 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg);
2311}
2312void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2313 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg);
2314}
2315void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2316 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg);
2317}
2318void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2319 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1);
2320}
2321void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2322 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1);
2323}
2324void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2325 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1);
2326}
2327void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2328 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg);
2329}
2330void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2331 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg);
2332}
2333void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2334 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg);
2335}
2336void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2337 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1);
2338}
2339void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2340 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1);
2341}
2342void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2343 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1);
2344}
2345void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2346 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg);
2347}
2348void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2349 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg);
2350}
2351void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2352 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg);
2353}
2354void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2355 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1);
2356}
2357void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2358 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1);
2359}
2360void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2361 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1);
2362}
2363void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2364 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg);
2365}
2366void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2367 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg);
2368}
2369void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2370 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg);
2371}
2372void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2373 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1);
2374}
2375void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2376 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1);
2377}
2378void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2379 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1);
2380}
2381void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2382 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg);
2383}
2384void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2385 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg);
2386}
2387void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2388 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg);
2389}
2390void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2391 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1);
2392}
2393void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2394 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1);
2395}
2396void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2397 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1);
2398}
2399void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2400 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg);
2401}
2402void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2403 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg);
2404}
2405void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2406 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg);
2407}
2408void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2409 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1);
2410}
2411void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2412 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1);
2413}
2414void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2415 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1);
2416}
2417void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2418 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg);
2419}
2420void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2421 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg);
2422}
2423void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2424 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg);
2425}
2426void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2427 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1);
2428}
2429void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2430 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1);
2431}
2432void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2433 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1);
2434}
2435void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2436 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg);
2437}
2438void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2439 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg);
2440}
2441void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2442 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg);
2443}
2444void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2445 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1);
2446}
2447void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2448 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1);
2449}
2450void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2451 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1);
2452}
2453void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2454 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg);
2455}
2456void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2457 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg);
2458}
2459void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2460 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg);
2461}
2462void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2463 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1);
2464}
2465void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2466 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1);
2467}
2468void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2469 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1);
2470}
2471
2472void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2473 WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);
2474}
2475void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2476 WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);
2477}
2478void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2479 WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);
2480}
2481void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {
2482 WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1);
2483 Write8(rotate);
2484}
2485void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2486 WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);
2487}
2488void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2489 WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);
2490}
2491void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2492 WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);
2493}
2494void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2495 WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);
2496}
2497void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {
2498 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);
2499}
2500void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {
2501 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);
2502}
2503void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {
2504 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);
2505}
2506void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2507 WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);
2508}
2509void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2510 WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);
2511}
2512
2513// Prefixes
2514
2515void XEmitter::LOCK() {
2516 Write8(0xF0);
2517}
2518void XEmitter::REP() {
2519 Write8(0xF3);
2520}
2521void XEmitter::REPNE() {
2522 Write8(0xF2);
2523}
2524void XEmitter::FSOverride() {
2525 Write8(0x64);
2526}
2527void XEmitter::GSOverride() {
2528 Write8(0x65);
2529}
2530
2531void XEmitter::FWAIT() {
2532 Write8(0x9B);
2533}
2534
2535// TODO: make this more generic
2536void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) {
2537 int mf = 0;
2538 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID),
2539 "WriteFloatLoadStore: 80 bits not supported for this instruction");
2540 switch (bits) {
2541 case 32:
2542 mf = 0;
2543 break;
2544 case 64:
2545 mf = 4;
2546 break;
2547 case 80:
2548 mf = 2;
2549 break;
2550 default:
2551 ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
2552 }
2553 Write8(0xd9 | mf);
2554 // x87 instructions use the reg field of the ModR/M byte as opcode:
2555 if (bits == 80)
2556 op = op_80b;
2557 arg.WriteRest(this, 0, (X64Reg)op);
2558}
2559
2560void XEmitter::FLD(int bits, const OpArg& src) {
2561 WriteFloatLoadStore(bits, floatLD, floatLD80, src);
2562}
2563void XEmitter::FST(int bits, const OpArg& dest) {
2564 WriteFloatLoadStore(bits, floatST, floatINVALID, dest);
2565}
2566void XEmitter::FSTP(int bits, const OpArg& dest) {
2567 WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);
2568}
2569void XEmitter::FNSTSW_AX() {
2570 Write8(0xDF);
2571 Write8(0xE0);
2572}
2573
2574void XEmitter::RDTSC() {
2575 Write8(0x0F);
2576 Write8(0x31);
2577}
2578
2579void XCodeBlock::PoisonMemory() {
2580 // x86/64: 0xCC = breakpoint
2581 memset(region, 0xCC, region_size);
2582}
2583}