summaryrefslogtreecommitdiff
path: root/src/common/x64/emitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/x64/emitter.cpp')
-rw-r--r--src/common/x64/emitter.cpp1989
1 files changed, 1989 insertions, 0 deletions
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
new file mode 100644
index 000000000..4b79acd1f
--- /dev/null
+++ b/src/common/x64/emitter.cpp
@@ -0,0 +1,1989 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include <cstring>
19
20#include "common/assert.h"
21#include "common/logging/log.h"
22#include "common/memory_util.h"
23
24#include "abi.h"
25#include "cpu_detect.h"
26#include "emitter.h"
27
28#define PRIx64 "llx"
29
30// Minimize the diff against Dolphin
31#define DYNA_REC JIT
32
33namespace Gen
34{
35
36struct NormalOpDef
37{
38 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
39};
40
41// 0xCC is code for invalid combination of immediates
42static const NormalOpDef normalops[11] =
43{
44 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD
45 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC
46
47 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB
48 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB
49
50 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND
51 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR
52
53 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR
54 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV
55
56 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from)
57 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP
58
59 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG
60};
61
62enum NormalSSEOps
63{
64 sseCMP = 0xC2,
65 sseADD = 0x58, //ADD
66 sseSUB = 0x5C, //SUB
67 sseAND = 0x54, //AND
68 sseANDN = 0x55, //ANDN
69 sseOR = 0x56,
70 sseXOR = 0x57,
71 sseMUL = 0x59, //MUL
72 sseDIV = 0x5E, //DIV
73 sseMIN = 0x5D, //MIN
74 sseMAX = 0x5F, //MAX
75 sseCOMIS = 0x2F, //COMIS
76 sseUCOMIS = 0x2E, //UCOMIS
77 sseSQRT = 0x51, //SQRT
78 sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!)
79 sseRCP = 0x53, //RCP
80 sseMOVAPfromRM = 0x28, //MOVAP from RM
81 sseMOVAPtoRM = 0x29, //MOVAP to RM
82 sseMOVUPfromRM = 0x10, //MOVUP from RM
83 sseMOVUPtoRM = 0x11, //MOVUP to RM
84 sseMOVLPfromRM= 0x12,
85 sseMOVLPtoRM = 0x13,
86 sseMOVHPfromRM= 0x16,
87 sseMOVHPtoRM = 0x17,
88 sseMOVHLPS = 0x12,
89 sseMOVLHPS = 0x16,
90 sseMOVDQfromRM = 0x6F,
91 sseMOVDQtoRM = 0x7F,
92 sseMASKMOVDQU = 0xF7,
93 sseLDDQU = 0xF0,
94 sseSHUF = 0xC6,
95 sseMOVNTDQ = 0xE7,
96 sseMOVNTP = 0x2B,
97 sseHADD = 0x7C,
98};
99
100
101void XEmitter::SetCodePtr(u8 *ptr)
102{
103 code = ptr;
104}
105
106const u8 *XEmitter::GetCodePtr() const
107{
108 return code;
109}
110
111u8 *XEmitter::GetWritableCodePtr()
112{
113 return code;
114}
115
116void XEmitter::ReserveCodeSpace(int bytes)
117{
118 for (int i = 0; i < bytes; i++)
119 *code++ = 0xCC;
120}
121
122const u8 *XEmitter::AlignCode4()
123{
124 int c = int((u64)code & 3);
125 if (c)
126 ReserveCodeSpace(4-c);
127 return code;
128}
129
130const u8 *XEmitter::AlignCode16()
131{
132 int c = int((u64)code & 15);
133 if (c)
134 ReserveCodeSpace(16-c);
135 return code;
136}
137
138const u8 *XEmitter::AlignCodePage()
139{
140 int c = int((u64)code & 4095);
141 if (c)
142 ReserveCodeSpace(4096-c);
143 return code;
144}
145
146// This operation modifies flags; check to see the flags are locked.
147// If the flags are locked, we should immediately and loudly fail before
148// causing a subtle JIT bug.
149void XEmitter::CheckFlags()
150{
151 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
152}
153
154void XEmitter::WriteModRM(int mod, int reg, int rm)
155{
156 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
157}
158
159void XEmitter::WriteSIB(int scale, int index, int base)
160{
161 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
162}
163
164void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const
165{
166 if (customOp == -1) customOp = operandReg;
167#ifdef ARCHITECTURE_x86_64
168 u8 op = 0x40;
169 // REX.W (whether operation is a 64-bit operation)
170 if (opBits == 64) op |= 8;
171 // REX.R (whether ModR/M reg field refers to R8-R15.
172 if (customOp & 8) op |= 4;
173 // REX.X (whether ModR/M SIB index field refers to R8-R15)
174 if (indexReg & 8) op |= 2;
175 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
176 if (offsetOrBaseReg & 8) op |= 1;
177 // Write REX if wr have REX bits to write, or if the operation accesses
178 // SIL, DIL, BPL, or SPL.
179 if (op != 0x40 ||
180 (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
181 (opBits == 8 && (customOp & 0x10c) == 4))
182 {
183 emit->Write8(op);
184 // Check the operation doesn't access AH, BH, CH, or DH.
185 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
186 DEBUG_ASSERT((customOp & 0x100) == 0);
187 }
188#else
189 DEBUG_ASSERT(opBits != 64);
190 DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1);
191 DEBUG_ASSERT((indexReg & 8) == 0);
192 DEBUG_ASSERT((offsetOrBaseReg & 8) == 0);
193 DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1);
194 DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4);
195#endif
196}
197
198void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const
199{
200 int R = !(regOp1 & 8);
201 int X = !(indexReg & 8);
202 int B = !(offsetOrBaseReg & 8);
203
204 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
205
206 // do we need any VEX fields that only appear in the three-byte form?
207 if (X == 1 && B == 1 && W == 0 && mmmmm == 1)
208 {
209 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 1) | pp;
210 emit->Write8(0xC5);
211 emit->Write8(RvvvvLpp);
212 }
213 else
214 {
215 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
216 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 1) | pp;
217 emit->Write8(0xC4);
218 emit->Write8(RXBmmmmm);
219 emit->Write8(WvvvvLpp);
220 }
221}
222
223void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
224 bool warn_64bit_offset) const
225{
226 if (_operandReg == INVALID_REG)
227 _operandReg = (X64Reg)this->operandReg;
228 int mod = 0;
229 int ireg = indexReg;
230 bool SIB = false;
231 int _offsetOrBaseReg = this->offsetOrBaseReg;
232
233 if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address
234 {
235 // Oh, RIP addressing.
236 _offsetOrBaseReg = 5;
237 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
238 //TODO : add some checks
239#ifdef ARCHITECTURE_x86_64
240 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
241 s64 distance = (s64)offset - (s64)ripAddr;
242 ASSERT_MSG(
243 (distance < 0x80000000LL &&
244 distance >= -0x80000000LL) ||
245 !warn_64bit_offset,
246 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")",
247 ripAddr, offset);
248 s32 offs = (s32)distance;
249 emit->Write32((u32)offs);
250#else
251 emit->Write32((u32)offset);
252#endif
253 return;
254 }
255
256 if (scale == 0)
257 {
258 // Oh, no memory, Just a reg.
259 mod = 3; //11
260 }
261 else if (scale >= 1)
262 {
263 //Ah good, no scaling.
264 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5))
265 {
266 //Okay, we're good. No SIB necessary.
267 int ioff = (int)offset;
268 if (ioff == 0)
269 {
270 mod = 0;
271 }
272 else if (ioff<-128 || ioff>127)
273 {
274 mod = 2; //32-bit displacement
275 }
276 else
277 {
278 mod = 1; //8-bit displacement
279 }
280 }
281 else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)
282 {
283 SIB = true;
284 mod = 0;
285 _offsetOrBaseReg = 5;
286 }
287 else //if (scale != SCALE_ATREG)
288 {
289 if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :(
290 {
291 //So we have to fake it with SIB encoding :(
292 SIB = true;
293 }
294
295 if (scale >= SCALE_1 && scale < SCALE_ATREG)
296 {
297 SIB = true;
298 }
299
300 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4))
301 {
302 SIB = true;
303 ireg = _offsetOrBaseReg;
304 }
305
306 //Okay, we're fine. Just disp encoding.
307 //We need displacement. Which size?
308 int ioff = (int)(s64)offset;
309 if (ioff < -128 || ioff > 127)
310 {
311 mod = 2; //32-bit displacement
312 }
313 else
314 {
315 mod = 1; //8-bit displacement
316 }
317 }
318 }
319
320 // Okay. Time to do the actual writing
321 // ModRM byte:
322 int oreg = _offsetOrBaseReg;
323 if (SIB)
324 oreg = 4;
325
326 // TODO(ector): WTF is this if about? I don't remember writing it :-)
327 //if (RIP)
328 // oreg = 5;
329
330 emit->WriteModRM(mod, _operandReg&7, oreg&7);
331
332 if (SIB)
333 {
334 //SIB byte
335 int ss;
336 switch (scale)
337 {
338 case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP
339 case SCALE_1: ss = 0; break;
340 case SCALE_2: ss = 1; break;
341 case SCALE_4: ss = 2; break;
342 case SCALE_8: ss = 3; break;
343 case SCALE_NOBASE_2: ss = 1; break;
344 case SCALE_NOBASE_4: ss = 2; break;
345 case SCALE_NOBASE_8: ss = 3; break;
346 case SCALE_ATREG: ss = 0; break;
347 default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break;
348 }
349 emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7)));
350 }
351
352 if (mod == 1) //8-bit disp
353 {
354 emit->Write8((u8)(s8)(s32)offset);
355 }
356 else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp
357 {
358 emit->Write32((u32)offset);
359 }
360}
361
362// W = operand extended width (1 if 64-bit)
363// R = register# upper bit
364// X = scale amnt upper bit
365// B = base register# upper bit
366void XEmitter::Rex(int w, int r, int x, int b)
367{
368 w = w ? 1 : 0;
369 r = r ? 1 : 0;
370 x = x ? 1 : 0;
371 b = b ? 1 : 0;
372 u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b));
373 if (rx != 0x40)
374 Write8(rx);
375}
376
377void XEmitter::JMP(const u8 *addr, bool force5Bytes)
378{
379 u64 fn = (u64)addr;
380 if (!force5Bytes)
381 {
382 s64 distance = (s64)(fn - ((u64)code + 2));
383 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
384 "Jump target too far away, needs force5Bytes = true");
385 //8 bits will do
386 Write8(0xEB);
387 Write8((u8)(s8)distance);
388 }
389 else
390 {
391 s64 distance = (s64)(fn - ((u64)code + 5));
392
393 ASSERT_MSG(
394 distance >= -0x80000000LL && distance < 0x80000000LL,
395 "Jump target too far away, needs indirect register");
396 Write8(0xE9);
397 Write32((u32)(s32)distance);
398 }
399}
400
401void XEmitter::JMPptr(const OpArg &arg2)
402{
403 OpArg arg = arg2;
404 if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
405 arg.operandReg = 4;
406 arg.WriteRex(this, 0, 0);
407 Write8(0xFF);
408 arg.WriteRest(this);
409}
410
411//Can be used to trap other processors, before overwriting their code
412// not used in dolphin
413void XEmitter::JMPself()
414{
415 Write8(0xEB);
416 Write8(0xFE);
417}
418
419void XEmitter::CALLptr(OpArg arg)
420{
421 if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument");
422 arg.operandReg = 2;
423 arg.WriteRex(this, 0, 0);
424 Write8(0xFF);
425 arg.WriteRest(this);
426}
427
428void XEmitter::CALL(const void *fnptr)
429{
430 u64 distance = u64(fnptr) - (u64(code) + 5);
431 ASSERT_MSG(
432 distance < 0x0000000080000000ULL ||
433 distance >= 0xFFFFFFFF80000000ULL,
434 "CALL out of range (%p calls %p)", code, fnptr);
435 Write8(0xE8);
436 Write32(u32(distance));
437}
438
439FixupBranch XEmitter::J(bool force5bytes)
440{
441 FixupBranch branch;
442 branch.type = force5bytes ? 1 : 0;
443 branch.ptr = code + (force5bytes ? 5 : 2);
444 if (!force5bytes)
445 {
446 //8 bits will do
447 Write8(0xEB);
448 Write8(0);
449 }
450 else
451 {
452 Write8(0xE9);
453 Write32(0);
454 }
455 return branch;
456}
457
458FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes)
459{
460 FixupBranch branch;
461 branch.type = force5bytes ? 1 : 0;
462 branch.ptr = code + (force5bytes ? 6 : 2);
463 if (!force5bytes)
464 {
465 //8 bits will do
466 Write8(0x70 + conditionCode);
467 Write8(0);
468 }
469 else
470 {
471 Write8(0x0F);
472 Write8(0x80 + conditionCode);
473 Write32(0);
474 }
475 return branch;
476}
477
478void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
479{
480 u64 fn = (u64)addr;
481 s64 distance = (s64)(fn - ((u64)code + 2));
482 if (distance < -0x80 || distance >= 0x80 || force5bytes)
483 {
484 distance = (s64)(fn - ((u64)code + 6));
485 ASSERT_MSG(
486 distance >= -0x80000000LL && distance < 0x80000000LL,
487 "Jump target too far away, needs indirect register");
488 Write8(0x0F);
489 Write8(0x80 + conditionCode);
490 Write32((u32)(s32)distance);
491 }
492 else
493 {
494 Write8(0x70 + conditionCode);
495 Write8((u8)(s8)distance);
496 }
497}
498
499void XEmitter::SetJumpTarget(const FixupBranch &branch)
500{
501 if (branch.type == 0)
502 {
503 s64 distance = (s64)(code - branch.ptr);
504 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
505 branch.ptr[-1] = (u8)(s8)distance;
506 }
507 else if (branch.type == 1)
508 {
509 s64 distance = (s64)(code - branch.ptr);
510 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
511 ((s32*)branch.ptr)[-1] = (s32)distance;
512 }
513}
514
515// INC/DEC considered harmful on newer CPUs due to partial flag set.
516// Use ADD, SUB instead.
517
518/*
519void XEmitter::INC(int bits, OpArg arg)
520{
521 if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
522 arg.operandReg = 0;
523 if (bits == 16) {Write8(0x66);}
524 arg.WriteRex(this, bits, bits);
525 Write8(bits == 8 ? 0xFE : 0xFF);
526 arg.WriteRest(this);
527}
528void XEmitter::DEC(int bits, OpArg arg)
529{
530 if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
531 arg.operandReg = 1;
532 if (bits == 16) {Write8(0x66);}
533 arg.WriteRex(this, bits, bits);
534 Write8(bits == 8 ? 0xFE : 0xFF);
535 arg.WriteRest(this);
536}
537*/
538
539//Single byte opcodes
540//There is no PUSHAD/POPAD in 64-bit mode.
541void XEmitter::INT3() {Write8(0xCC);}
542void XEmitter::RET() {Write8(0xC3);}
543void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret
544
545// The first sign of decadence: optimized NOPs.
546void XEmitter::NOP(size_t size)
547{
548 DEBUG_ASSERT((int)size > 0);
549 while (true)
550 {
551 switch (size)
552 {
553 case 0:
554 return;
555 case 1:
556 Write8(0x90);
557 return;
558 case 2:
559 Write8(0x66); Write8(0x90);
560 return;
561 case 3:
562 Write8(0x0F); Write8(0x1F); Write8(0x00);
563 return;
564 case 4:
565 Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00);
566 return;
567 case 5:
568 Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00);
569 Write8(0x00);
570 return;
571 case 6:
572 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44);
573 Write8(0x00); Write8(0x00);
574 return;
575 case 7:
576 Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00);
577 Write8(0x00); Write8(0x00); Write8(0x00);
578 return;
579 case 8:
580 Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00);
581 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00);
582 return;
583 case 9:
584 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84);
585 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00);
586 Write8(0x00);
587 return;
588 case 10:
589 Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F);
590 Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00);
591 Write8(0x00); Write8(0x00);
592 return;
593 default:
594 // Even though x86 instructions are allowed to be up to 15 bytes long,
595 // AMD advises against using NOPs longer than 11 bytes because they
596 // carry a performance penalty on CPUs older than AMD family 16h.
597 Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F);
598 Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00);
599 Write8(0x00); Write8(0x00); Write8(0x00);
600 size -= 11;
601 continue;
602 }
603 }
604}
605
606void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu
607void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry
608void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry
609void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry
610
611//TODO: xchg ah, al ???
612void XEmitter::XCHG_AHAL()
613{
614 Write8(0x86);
615 Write8(0xe0);
616 // alt. 86 c4
617}
618
619//These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
620void XEmitter::LAHF() {Write8(0x9F);}
621void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);}
622
623void XEmitter::PUSHF() {Write8(0x9C);}
624void XEmitter::POPF() {CheckFlags(); Write8(0x9D);}
625
626void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);}
627void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);}
628void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);}
629
630void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg)
631{
632 if (bits == 16)
633 Write8(0x66);
634 Rex(bits == 64, 0, 0, (int)reg >> 3);
635 Write8(byte + ((int)reg & 7));
636}
637
638void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg)
639{
640 if (bits == 16)
641 Write8(0x66);
642 Rex(bits==64, 0, 0, (int)reg >> 3);
643 Write8(byte1);
644 Write8(byte2 + ((int)reg & 7));
645}
646
647void XEmitter::CWD(int bits)
648{
649 if (bits == 16)
650 Write8(0x66);
651 Rex(bits == 64, 0, 0, 0);
652 Write8(0x99);
653}
654
655void XEmitter::CBW(int bits)
656{
657 if (bits == 8)
658 Write8(0x66);
659 Rex(bits == 32, 0, 0, 0);
660 Write8(0x98);
661}
662
663//Simple opcodes
664
665
666//push/pop do not need wide to be 64-bit
667void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
668void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);}
669
670void XEmitter::PUSH(int bits, const OpArg &reg)
671{
672 if (reg.IsSimpleReg())
673 PUSH(reg.GetSimpleReg());
674 else if (reg.IsImm())
675 {
676 switch (reg.GetImmBits())
677 {
678 case 8:
679 Write8(0x6A);
680 Write8((u8)(s8)reg.offset);
681 break;
682 case 16:
683 Write8(0x66);
684 Write8(0x68);
685 Write16((u16)(s16)(s32)reg.offset);
686 break;
687 case 32:
688 Write8(0x68);
689 Write32((u32)reg.offset);
690 break;
691 default:
692 ASSERT_MSG(0, "PUSH - Bad imm bits");
693 break;
694 }
695 }
696 else
697 {
698 if (bits == 16)
699 Write8(0x66);
700 reg.WriteRex(this, bits, bits);
701 Write8(0xFF);
702 reg.WriteRest(this, 0, (X64Reg)6);
703 }
704}
705
706void XEmitter::POP(int /*bits*/, const OpArg &reg)
707{
708 if (reg.IsSimpleReg())
709 POP(reg.GetSimpleReg());
710 else
711 ASSERT_MSG(0, "POP - Unsupported encoding");
712}
713
714void XEmitter::BSWAP(int bits, X64Reg reg)
715{
716 if (bits >= 32)
717 {
718 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
719 }
720 else if (bits == 16)
721 {
722 ROL(16, R(reg), Imm8(8));
723 }
724 else if (bits == 8)
725 {
726 // Do nothing - can't bswap a single byte...
727 }
728 else
729 {
730 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
731 }
732}
733
734// Undefined opcode - reserved
735// If we ever need a way to always cause a non-breakpoint hard exception...
736void XEmitter::UD2()
737{
738 Write8(0x0F);
739 Write8(0x0B);
740}
741
742void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg)
743{
744 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
745 arg.operandReg = (u8)level;
746 arg.WriteRex(this, 0, 0);
747 Write8(0x0F);
748 Write8(0x18);
749 arg.WriteRest(this);
750}
751
752void XEmitter::SETcc(CCFlags flag, OpArg dest)
753{
754 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
755 dest.operandReg = 0;
756 dest.WriteRex(this, 0, 8);
757 Write8(0x0F);
758 Write8(0x90 + (u8)flag);
759 dest.WriteRest(this);
760}
761
762void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag)
763{
764 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
765 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
766 if (bits == 16)
767 Write8(0x66);
768 src.operandReg = dest;
769 src.WriteRex(this, bits, bits);
770 Write8(0x0F);
771 Write8(0x40 + (u8)flag);
772 src.WriteRest(this);
773}
774
775void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
776{
777 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
778 CheckFlags();
779 src.operandReg = ext;
780 if (bits == 16)
781 Write8(0x66);
782 src.WriteRex(this, bits, bits, 0);
783 if (bits == 8)
784 {
785 Write8(0xF6);
786 }
787 else
788 {
789 Write8(0xF7);
790 }
791 src.WriteRest(this);
792}
793
794void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);}
795void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);}
796void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);}
797void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
798void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
799void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
800
801void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
802{
803 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
804 CheckFlags();
805 src.operandReg = (u8)dest;
806 if (bits == 16)
807 Write8(0x66);
808 if (rep)
809 Write8(0xF3);
810 src.WriteRex(this, bits, bits);
811 Write8(0x0F);
812 Write8(byte2);
813 src.WriteRest(this);
814}
815
816void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
817{
818 if (bits <= 16)
819 ASSERT_MSG(0, "MOVNTI - bits<=16");
820 WriteBitSearchType(bits, src, dest, 0xC3);
821}
822
823void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
824void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
825
826void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
827{
828 CheckFlags();
829 if (!Common::GetCPUCaps().bmi1)
830 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
831 WriteBitSearchType(bits, dest, src, 0xBC, true);
832}
833void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
834{
835 CheckFlags();
836 if (!Common::GetCPUCaps().lzcnt)
837 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
838 WriteBitSearchType(bits, dest, src, 0xBD, true);
839}
840
841void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
842{
843 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
844 if (dbits == sbits)
845 {
846 MOV(dbits, R(dest), src);
847 return;
848 }
849 src.operandReg = (u8)dest;
850 if (dbits == 16)
851 Write8(0x66);
852 src.WriteRex(this, dbits, sbits);
853 if (sbits == 8)
854 {
855 Write8(0x0F);
856 Write8(0xBE);
857 }
858 else if (sbits == 16)
859 {
860 Write8(0x0F);
861 Write8(0xBF);
862 }
863 else if (sbits == 32 && dbits == 64)
864 {
865 Write8(0x63);
866 }
867 else
868 {
869 Crash();
870 }
871 src.WriteRest(this);
872}
873
874void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src)
875{
876 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
877 if (dbits == sbits)
878 {
879 MOV(dbits, R(dest), src);
880 return;
881 }
882 src.operandReg = (u8)dest;
883 if (dbits == 16)
884 Write8(0x66);
885 //the 32bit result is automatically zero extended to 64bit
886 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
887 if (sbits == 8)
888 {
889 Write8(0x0F);
890 Write8(0xB6);
891 }
892 else if (sbits == 16)
893 {
894 Write8(0x0F);
895 Write8(0xB7);
896 }
897 else if (sbits == 32 && dbits == 64)
898 {
899 Write8(0x8B);
900 }
901 else
902 {
903 ASSERT_MSG(0, "MOVZX - Invalid size");
904 }
905 src.WriteRest(this);
906}
907
908void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src)
909{
910 ASSERT_MSG(Common::GetCPUCaps().movbe, "Generating MOVBE on a system that does not support it.");
911 if (bits == 8)
912 {
913 MOV(bits, dest, src);
914 return;
915 }
916
917 if (bits == 16)
918 Write8(0x66);
919
920 if (dest.IsSimpleReg())
921 {
922 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
923 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
924 Write8(0x0F); Write8(0x38); Write8(0xF0);
925 src.WriteRest(this, 0, dest.GetSimpleReg());
926 }
927 else if (src.IsSimpleReg())
928 {
929 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
930 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
931 Write8(0x0F); Write8(0x38); Write8(0xF1);
932 dest.WriteRest(this, 0, src.GetSimpleReg());
933 }
934 else
935 {
936 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
937 }
938}
939
940
941void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
942{
943 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
944 src.operandReg = (u8)dest;
945 if (bits == 16)
946 Write8(0x66); //TODO: performance warning
947 src.WriteRex(this, bits, bits);
948 Write8(0x8D);
949 src.WriteRest(this, 0, INVALID_REG, bits == 64);
950}
951
952//shift can be either imm8 or cl
953void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
954{
955 CheckFlags();
956 bool writeImm = false;
957 if (dest.IsImm())
958 {
959 ASSERT_MSG(0, "WriteShift - can't shift imms");
960 }
961 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
962 {
963 ASSERT_MSG(0, "WriteShift - illegal argument");
964 }
965 dest.operandReg = ext;
966 if (bits == 16)
967 Write8(0x66);
968 dest.WriteRex(this, bits, bits, 0);
969 if (shift.GetImmBits() == 8)
970 {
971 //ok an imm
972 u8 imm = (u8)shift.offset;
973 if (imm == 1)
974 {
975 Write8(bits == 8 ? 0xD0 : 0xD1);
976 }
977 else
978 {
979 writeImm = true;
980 Write8(bits == 8 ? 0xC0 : 0xC1);
981 }
982 }
983 else
984 {
985 Write8(bits == 8 ? 0xD2 : 0xD3);
986 }
987 dest.WriteRest(this, writeImm ? 1 : 0);
988 if (writeImm)
989 Write8((u8)shift.offset);
990}
991
992// large rotates and shift are slower on intel than amd
993// intel likes to rotate by 1, and the op is smaller too
994void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);}
995void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);}
996void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);}
997void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);}
998void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);}
999void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);}
1000void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);}
1001
1002// index can be either imm8 or register, don't use memory destination because it's slow
1003void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
1004{
1005 CheckFlags();
1006 if (dest.IsImm())
1007 {
1008 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1009 }
1010 if ((index.IsImm() && index.GetImmBits() != 8))
1011 {
1012 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1013 }
1014 if (bits == 16)
1015 Write8(0x66);
1016 if (index.IsImm())
1017 {
1018 dest.WriteRex(this, bits, bits);
1019 Write8(0x0F); Write8(0xBA);
1020 dest.WriteRest(this, 1, (X64Reg)ext);
1021 Write8((u8)index.offset);
1022 }
1023 else
1024 {
1025 X64Reg operand = index.GetSimpleReg();
1026 dest.WriteRex(this, bits, bits, operand);
1027 Write8(0x0F); Write8(0x83 + 8*ext);
1028 dest.WriteRest(this, 1, operand);
1029 }
1030}
1031
1032void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);}
1033void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);}
1034void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);}
1035void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);}
1036
1037//shift can be either imm8 or cl
1038void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
1039{
1040 CheckFlags();
1041 if (dest.IsImm())
1042 {
1043 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1044 }
1045 if (!src.IsSimpleReg())
1046 {
1047 ASSERT_MSG(0, "SHRD - must use simple register as source");
1048 }
1049 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
1050 {
1051 ASSERT_MSG(0, "SHRD - illegal shift");
1052 }
1053 if (bits == 16)
1054 Write8(0x66);
1055 X64Reg operand = src.GetSimpleReg();
1056 dest.WriteRex(this, bits, bits, operand);
1057 if (shift.GetImmBits() == 8)
1058 {
1059 Write8(0x0F); Write8(0xAC);
1060 dest.WriteRest(this, 1, operand);
1061 Write8((u8)shift.offset);
1062 }
1063 else
1064 {
1065 Write8(0x0F); Write8(0xAD);
1066 dest.WriteRest(this, 0, operand);
1067 }
1068}
1069
1070void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift)
1071{
1072 CheckFlags();
1073 if (dest.IsImm())
1074 {
1075 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1076 }
1077 if (!src.IsSimpleReg())
1078 {
1079 ASSERT_MSG(0, "SHLD - must use simple register as source");
1080 }
1081 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
1082 {
1083 ASSERT_MSG(0, "SHLD - illegal shift");
1084 }
1085 if (bits == 16)
1086 Write8(0x66);
1087 X64Reg operand = src.GetSimpleReg();
1088 dest.WriteRex(this, bits, bits, operand);
1089 if (shift.GetImmBits() == 8)
1090 {
1091 Write8(0x0F); Write8(0xA4);
1092 dest.WriteRest(this, 1, operand);
1093 Write8((u8)shift.offset);
1094 }
1095 else
1096 {
1097 Write8(0x0F); Write8(0xA5);
1098 dest.WriteRest(this, 0, operand);
1099 }
1100}
1101
1102void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits)
1103{
1104 if (bits == 16)
1105 emit->Write8(0x66);
1106
1107 this->operandReg = (u8)_operandReg;
1108 WriteRex(emit, bits, bits);
1109 emit->Write8(op);
1110 WriteRest(emit);
1111}
1112
1113//operand can either be immediate or register
1114void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const
1115{
1116 X64Reg _operandReg;
1117 if (IsImm())
1118 {
1119 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1120 }
1121
1122 if (bits == 16)
1123 emit->Write8(0x66);
1124
1125 int immToWrite = 0;
1126
1127 if (operand.IsImm())
1128 {
1129 WriteRex(emit, bits, bits);
1130
1131 if (!toRM)
1132 {
1133 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1134 }
1135
1136 if (operand.scale == SCALE_IMM8 && bits == 8)
1137 {
1138 // op al, imm8
1139 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC)
1140 {
1141 emit->Write8(normalops[op].eaximm8);
1142 emit->Write8((u8)operand.offset);
1143 return;
1144 }
1145 // mov reg, imm8
1146 if (!scale && op == nrmMOV)
1147 {
1148 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1149 emit->Write8((u8)operand.offset);
1150 return;
1151 }
1152 // op r/m8, imm8
1153 emit->Write8(normalops[op].imm8);
1154 immToWrite = 8;
1155 }
1156 else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1157 (operand.scale == SCALE_IMM32 && bits == 32) ||
1158 (operand.scale == SCALE_IMM32 && bits == 64))
1159 {
1160 // Try to save immediate size if we can, but first check to see
1161 // if the instruction supports simm8.
1162 // op r/m, imm8
1163 if (normalops[op].simm8 != 0xCC &&
1164 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1165 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset)))
1166 {
1167 emit->Write8(normalops[op].simm8);
1168 immToWrite = 8;
1169 }
1170 else
1171 {
1172 // mov reg, imm
1173 if (!scale && op == nrmMOV && bits != 64)
1174 {
1175 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1176 if (bits == 16)
1177 emit->Write16((u16)operand.offset);
1178 else
1179 emit->Write32((u32)operand.offset);
1180 return;
1181 }
1182 // op eax, imm
1183 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC)
1184 {
1185 emit->Write8(normalops[op].eaximm32);
1186 if (bits == 16)
1187 emit->Write16((u16)operand.offset);
1188 else
1189 emit->Write32((u32)operand.offset);
1190 return;
1191 }
1192 // op r/m, imm
1193 emit->Write8(normalops[op].imm32);
1194 immToWrite = bits == 16 ? 16 : 32;
1195 }
1196 }
1197 else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1198 (operand.scale == SCALE_IMM8 && bits == 32) ||
1199 (operand.scale == SCALE_IMM8 && bits == 64))
1200 {
1201 // op r/m, imm8
1202 emit->Write8(normalops[op].simm8);
1203 immToWrite = 8;
1204 }
1205 else if (operand.scale == SCALE_IMM64 && bits == 64)
1206 {
1207 if (scale)
1208 {
1209 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1210 }
1211 // mov reg64, imm64
1212 else if (op == nrmMOV)
1213 {
1214 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1215 emit->Write64((u64)operand.offset);
1216 return;
1217 }
1218 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1219 }
1220 else
1221 {
1222 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1223 }
1224 _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM
1225 }
1226 else
1227 {
1228 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1229 WriteRex(emit, bits, bits, _operandReg);
1230 // op r/m, reg
1231 if (toRM)
1232 {
1233 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1234 }
1235 // op reg, r/m
1236 else
1237 {
1238 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1239 }
1240 }
1241 WriteRest(emit, immToWrite >> 3, _operandReg);
1242 switch (immToWrite)
1243 {
1244 case 0:
1245 break;
1246 case 8:
1247 emit->Write8((u8)operand.offset);
1248 break;
1249 case 16:
1250 emit->Write16((u16)operand.offset);
1251 break;
1252 case 32:
1253 emit->Write32((u32)operand.offset);
1254 break;
1255 default:
1256 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1257 }
1258}
1259
1260void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2)
1261{
1262 if (a1.IsImm())
1263 {
1264 //Booh! Can't write to an imm
1265 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1266 return;
1267 }
1268 if (a2.IsImm())
1269 {
1270 a1.WriteNormalOp(emit, true, op, a2, bits);
1271 }
1272 else
1273 {
1274 if (a1.IsSimpleReg())
1275 {
1276 a2.WriteNormalOp(emit, false, op, a1, bits);
1277 }
1278 else
1279 {
1280 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory");
1281 a1.WriteNormalOp(emit, true, op, a2, bits);
1282 }
1283 }
1284}
1285
1286void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
1287void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
1288void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
1289void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
1290void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
1291void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
1292void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
1293void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
1294{
1295 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1296 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1297 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1298}
1299void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
1300void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
1301void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
1302
1303void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
1304{
1305 CheckFlags();
1306 if (bits == 8)
1307 {
1308 ASSERT_MSG(0, "IMUL - illegal bit size!");
1309 return;
1310 }
1311
1312 if (a1.IsImm())
1313 {
1314 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1315 return;
1316 }
1317
1318 if (!a2.IsImm())
1319 {
1320 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1321 return;
1322 }
1323
1324 if (bits == 16)
1325 Write8(0x66);
1326 a1.WriteRex(this, bits, bits, regOp);
1327
1328 if (a2.GetImmBits() == 8 ||
1329 (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1330 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset))
1331 {
1332 Write8(0x6B);
1333 a1.WriteRest(this, 1, regOp);
1334 Write8((u8)a2.offset);
1335 }
1336 else
1337 {
1338 Write8(0x69);
1339 if (a2.GetImmBits() == 16 && bits == 16)
1340 {
1341 a1.WriteRest(this, 2, regOp);
1342 Write16((u16)a2.offset);
1343 }
1344 else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64))
1345 {
1346 a1.WriteRest(this, 4, regOp);
1347 Write32((u32)a2.offset);
1348 }
1349 else
1350 {
1351 ASSERT_MSG(0, "IMUL - unhandled case!");
1352 }
1353 }
1354}
1355
1356void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a)
1357{
1358 CheckFlags();
1359 if (bits == 8)
1360 {
1361 ASSERT_MSG(0, "IMUL - illegal bit size!");
1362 return;
1363 }
1364
1365 if (a.IsImm())
1366 {
1367 IMUL(bits, regOp, R(regOp), a) ;
1368 return;
1369 }
1370
1371 if (bits == 16)
1372 Write8(0x66);
1373 a.WriteRex(this, bits, bits, regOp);
1374 Write8(0x0F);
1375 Write8(0xAF);
1376 a.WriteRest(this, 0, regOp);
1377}
1378
1379
1380void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1381{
1382 if (opPrefix)
1383 Write8(opPrefix);
1384 arg.operandReg = regOp;
1385 arg.WriteRex(this, 0, 0);
1386 Write8(0x0F);
1387 if (op > 0xFF)
1388 Write8((op >> 8) & 0xFF);
1389 Write8(op & 0xFF);
1390 arg.WriteRest(this, extrabytes);
1391}
1392
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1394{
1395 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1396}
1397
1398static int GetVEXmmmmm(u16 op)
1399{
1400 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1401 if ((op >> 8) == 0x3A)
1402 return 3;
1403 else if ((op >> 8) == 0x38)
1404 return 2;
1405 else
1406 return 1;
1407}
1408
1409static int GetVEXpp(u8 opPrefix)
1410{
1411 if (opPrefix == 0x66)
1412 return 1;
1413 else if (opPrefix == 0xF3)
1414 return 2;
1415 else if (opPrefix == 0xF2)
1416 return 3;
1417 else
1418 return 0;
1419}
1420
1421void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1422{
1423 if (!Common::GetCPUCaps().avx)
1424 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1425 int mmmmm = GetVEXmmmmm(op);
1426 int pp = GetVEXpp(opPrefix);
1427 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here
1428 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1429 Write8(op & 0xFF);
1430 arg.WriteRest(this, extrabytes, regOp1);
1431}
1432
1433// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1434void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1435{
1436 if (size != 32 && size != 64)
1437 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1438 int mmmmm = GetVEXmmmmm(op);
1439 int pp = GetVEXpp(opPrefix);
1440 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
1441 Write8(op & 0xFF);
1442 arg.WriteRest(this, extrabytes, regOp1);
1443}
1444
1445void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1446{
1447 CheckFlags();
1448 if (!Common::GetCPUCaps().bmi1)
1449 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1450 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1451}
1452
1453void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1454{
1455 CheckFlags();
1456 if (!Common::GetCPUCaps().bmi2)
1457 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1458 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1459}
1460
1461void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);}
1462void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);}
1463
1464void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg)
1465{
1466#ifdef ARCHITECTURE_x86_64
1467 // Alternate encoding
1468 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1469 arg.operandReg = dest;
1470 Write8(0x66);
1471 arg.WriteRex(this, 64, 0);
1472 Write8(0x0f);
1473 Write8(0x6E);
1474 arg.WriteRest(this, 0);
1475#else
1476 arg.operandReg = dest;
1477 Write8(0xF3);
1478 Write8(0x0f);
1479 Write8(0x7E);
1480 arg.WriteRest(this, 0);
1481#endif
1482}
1483
1484void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src)
1485{
1486 if (src > 7 || arg.IsSimpleReg())
1487 {
1488 // Alternate encoding
1489 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1490 arg.operandReg = src;
1491 Write8(0x66);
1492 arg.WriteRex(this, 64, 0);
1493 Write8(0x0f);
1494 Write8(0x7E);
1495 arg.WriteRest(this, 0);
1496 }
1497 else
1498 {
1499 arg.operandReg = src;
1500 arg.WriteRex(this, 0, 0);
1501 Write8(0x66);
1502 Write8(0x0f);
1503 Write8(0xD6);
1504 arg.WriteRest(this, 0);
1505 }
1506}
1507
1508void XEmitter::WriteMXCSR(OpArg arg, int ext)
1509{
1510 if (arg.IsImm() || arg.IsSimpleReg())
1511 ASSERT_MSG(0, "MXCSR - invalid operand");
1512
1513 arg.operandReg = ext;
1514 arg.WriteRex(this, 0, 0);
1515 Write8(0x0F);
1516 Write8(0xAE);
1517 arg.WriteRest(this);
1518}
1519
1520void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);}
1521void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);}
1522
1523void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
1524void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
1525void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
1526
1527void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
1528void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
1529void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
1530void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
1531void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
1532void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
1533void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
1534void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
1535void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
1536void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
1537void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
1538void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
1539void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
1540void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
1541void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
1542void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
1543void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
1544
1545void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
1546void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
1547void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
1548void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
1549void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
1550void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
1551void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
1552void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
1553void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
1554void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
1555void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
1556void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
1557void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
1558void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
1559void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
1560void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
1561void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
1562void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
1563void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
1564void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
1565void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
1566void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
1567void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
1568void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
1569void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
1570void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
1571void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
1572void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
1573
1574void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
1575
1576void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
1577void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
1578void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
1579void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
1580
1581void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
1582void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
1583void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
1584void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
1585
1586void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
1587void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
1588void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
1589void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
1590
1591void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
1592void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
1593void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
1594void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
1595
1596void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
1597void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
1598void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
1599void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
1600
1601void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
1602void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
1603void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
1604void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
1605
1606void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
1607void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
1608void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
1609void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
1610
1611void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
1612void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
1613
1614void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
1615void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
1616
1617void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
1618void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
1619void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
1620void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
1621void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
1622void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
1623
1624void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
1625void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
1626void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
1627void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
1628
1629void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
1630void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
1631void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
1632void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
1633
1634void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
1635
1636void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
1637void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
1638
1639void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
1640
1641// THESE TWO ARE UNTESTED.
1642void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
1643void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
1644
1645void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
1646void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
1647
1648void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
1649{
1650 if (Common::GetCPUCaps().sse3)
1651 {
1652 WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup
1653 }
1654 else
1655 {
1656 // Simulate this instruction with SSE2 instructions
1657 if (!arg.IsSimpleReg(regOp))
1658 MOVSD(regOp, arg);
1659 UNPCKLPD(regOp, R(regOp));
1660 }
1661}
1662
1663//There are a few more left
1664
1665// Also some integer instructions are missing
1666void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
1667void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
1668void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
1669
1670void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
1671void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
1672void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);}
1673void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);}
1674
1675void XEmitter::PSRLW(X64Reg reg, int shift)
1676{
1677 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1678 Write8(shift);
1679}
1680
1681void XEmitter::PSRLD(X64Reg reg, int shift)
1682{
1683 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1684 Write8(shift);
1685}
1686
1687void XEmitter::PSRLQ(X64Reg reg, int shift)
1688{
1689 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1690 Write8(shift);
1691}
1692
1693void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
1694{
1695 WriteSSEOp(0x66, 0xd3, reg, arg);
1696}
1697
1698void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1699 WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
1700 Write8(shift);
1701}
1702
1703void XEmitter::PSLLW(X64Reg reg, int shift)
1704{
1705 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1706 Write8(shift);
1707}
1708
1709void XEmitter::PSLLD(X64Reg reg, int shift)
1710{
1711 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1712 Write8(shift);
1713}
1714
1715void XEmitter::PSLLQ(X64Reg reg, int shift)
1716{
1717 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1718 Write8(shift);
1719}
1720
1721void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1722 WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg));
1723 Write8(shift);
1724}
1725
1726void XEmitter::PSRAW(X64Reg reg, int shift)
1727{
1728 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1729 Write8(shift);
1730}
1731
1732void XEmitter::PSRAD(X64Reg reg, int shift)
1733{
1734 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1735 Write8(shift);
1736}
1737
1738void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1739{
1740 if (!Common::GetCPUCaps().ssse3)
1741 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1742 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1743}
1744
1745void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1746{
1747 if (!Common::GetCPUCaps().sse4_1)
1748 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1749 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1750}
1751
1752void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
1753void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
1754void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
1755void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
1756
1757void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
1758void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
1759void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
1760void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
1761void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
1762void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
1763void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
1764void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
1765
1766void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
1767void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
1768void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
1769void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
1770void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
1771void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
1772void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
1773void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
1774void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
1775void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
1776void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
1777void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
1778
1779void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
1780void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
1781void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
1782void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
1783void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
1784
1785void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
1786void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
1787void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
1788void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
1789
1790void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
1791void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
1792void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
1793void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
1794
1795void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
1796void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
1797void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
1798void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
1799
1800void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
1801void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
1802void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
1803void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
1804
1805void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
1806void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
1807void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
1808void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
1809
1810void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
1811void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
1812void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
1813void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
1814
1815void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
1816void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
1817
1818void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
1819void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
1820void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
1821
1822void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
1823void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
1824void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
1825
1826void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
1827void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
1828
1829void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
1830void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
1831
1832void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
1833void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
1834void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
1835void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
1836
1837void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
1838void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
1839void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
1840void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
1841
1842// VEX
1843void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
1844void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
1845void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
1846void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
1847void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
1848void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
1849void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
1850void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
1851void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
1852void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
1853void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
1854void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
1855
1856void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
1857void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
1858void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
1859void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
1860void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
1861void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
1862void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
1863void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
1864
1865void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
1866void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
1867void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
1868void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
1869
1870void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
1871void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
1872void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
1873void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
1874void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
1875void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
1876void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
1877void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
1878void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
1879void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
1880void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
1881void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
1882void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
1883void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
1884void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
1885void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
1886void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
1887void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
1888void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
1889void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
1890void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
1891void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
1892void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
1893void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
1894void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
1895void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
1896void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
1897void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
1898void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
1899void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
1900void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
1901void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
1902void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
1903void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
1904void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
1905void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
1906void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
1907void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
1908void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
1909void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
1910void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
1911void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
1912void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
1913void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
1914void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
1915void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
1916void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
1917void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
1918void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
1919void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
1920void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
1921void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
1922void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
1923void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
1924void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
1925void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
1926void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
1927void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
1928void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
1929void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
1930
1931void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
1932void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
1933void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
1934void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
1935void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
1936void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
1937void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
1938void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
1939void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
1940void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
1941void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
1942void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
1943void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
1944
1945// Prefixes
1946
1947void XEmitter::LOCK() { Write8(0xF0); }
1948void XEmitter::REP() { Write8(0xF3); }
1949void XEmitter::REPNE() { Write8(0xF2); }
1950void XEmitter::FSOverride() { Write8(0x64); }
1951void XEmitter::GSOverride() { Write8(0x65); }
1952
1953void XEmitter::FWAIT()
1954{
1955 Write8(0x9B);
1956}
1957
1958// TODO: make this more generic
1959void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg)
1960{
1961 int mf = 0;
1962 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
1963 switch (bits)
1964 {
1965 case 32: mf = 0; break;
1966 case 64: mf = 4; break;
1967 case 80: mf = 2; break;
1968 default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
1969 }
1970 Write8(0xd9 | mf);
1971 // x87 instructions use the reg field of the ModR/M byte as opcode:
1972 if (bits == 80)
1973 op = op_80b;
1974 arg.WriteRest(this, 0, (X64Reg) op);
1975}
1976
1977void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
1978void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
1979void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
1980void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
1981
1982void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
1983
1984void XCodeBlock::PoisonMemory() {
1985 // x86/64: 0xCC = breakpoint
1986 memset(region, 0xCC, region_size);
1987}
1988
1989}