diff options
Diffstat (limited to 'src/common/x64/emitter.h')
| -rw-r--r-- | src/common/x64/emitter.h | 849 |
1 files changed, 414 insertions, 435 deletions
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index e9c924126..86f4a1fff 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h | |||
| @@ -328,8 +328,6 @@ enum SSECompare | |||
| 328 | ORD, | 328 | ORD, |
| 329 | }; | 329 | }; |
| 330 | 330 | ||
| 331 | typedef const u8* JumpTarget; | ||
| 332 | |||
| 333 | class XEmitter | 331 | class XEmitter |
| 334 | { | 332 | { |
| 335 | friend struct OpArg; // for Write8 etc | 333 | friend struct OpArg; // for Write8 etc |
| @@ -344,27 +342,27 @@ private: | |||
| 344 | void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); | 342 | void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); |
| 345 | void WriteMulDivType(int bits, OpArg src, int ext); | 343 | void WriteMulDivType(int bits, OpArg src, int ext); |
| 346 | void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); | 344 | void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); |
| 347 | void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); | 345 | void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); |
| 348 | void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); | 346 | void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext); |
| 349 | void WriteMXCSR(OpArg arg, int ext); | 347 | void WriteMXCSR(OpArg arg, int ext); |
| 350 | void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 348 | void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); |
| 351 | void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 349 | void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); |
| 352 | void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 350 | void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); |
| 353 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 351 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); |
| 354 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 352 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 355 | void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 353 | void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 356 | void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 354 | void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 357 | void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 355 | void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 358 | void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); | 356 | void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); |
| 359 | void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); | 357 | void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); |
| 360 | 358 | ||
| 361 | void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); | 359 | void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); |
| 362 | 360 | ||
| 363 | protected: | 361 | protected: |
| 364 | inline void Write8(u8 value) {*code++ = value;} | 362 | void Write8(u8 value) {*code++ = value;} |
| 365 | inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} | 363 | void Write16(u16 value) {*(u16*)code = (value); code += 2;} |
| 366 | inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} | 364 | void Write32(u32 value) {*(u32*)code = (value); code += 4;} |
| 367 | inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} | 365 | void Write64(u64 value) {*(u64*)code = (value); code += 8;} |
| 368 | 366 | ||
| 369 | public: | 367 | public: |
| 370 | XEmitter() { code = nullptr; flags_locked = false; } | 368 | XEmitter() { code = nullptr; flags_locked = false; } |
| @@ -413,8 +411,8 @@ public: | |||
| 413 | // Stack control | 411 | // Stack control |
| 414 | void PUSH(X64Reg reg); | 412 | void PUSH(X64Reg reg); |
| 415 | void POP(X64Reg reg); | 413 | void POP(X64Reg reg); |
| 416 | void PUSH(int bits, const OpArg ®); | 414 | void PUSH(int bits, const OpArg& reg); |
| 417 | void POP(int bits, const OpArg ®); | 415 | void POP(int bits, const OpArg& reg); |
| 418 | void PUSHF(); | 416 | void PUSHF(); |
| 419 | void POPF(); | 417 | void POPF(); |
| 420 | 418 | ||
| @@ -424,21 +422,19 @@ public: | |||
| 424 | void UD2(); | 422 | void UD2(); |
| 425 | FixupBranch J(bool force5bytes = false); | 423 | FixupBranch J(bool force5bytes = false); |
| 426 | 424 | ||
| 427 | void JMP(const u8 * addr, bool force5Bytes = false); | 425 | void JMP(const u8* addr, bool force5Bytes = false); |
| 428 | void JMP(OpArg arg); | 426 | void JMPptr(const OpArg& arg); |
| 429 | void JMPptr(const OpArg &arg); | ||
| 430 | void JMPself(); //infinite loop! | 427 | void JMPself(); //infinite loop! |
| 431 | #ifdef CALL | 428 | #ifdef CALL |
| 432 | #undef CALL | 429 | #undef CALL |
| 433 | #endif | 430 | #endif |
| 434 | void CALL(const void *fnptr); | 431 | void CALL(const void* fnptr); |
| 435 | void CALLptr(OpArg arg); | 432 | void CALLptr(OpArg arg); |
| 436 | 433 | ||
| 437 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); | 434 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); |
| 438 | //void J_CC(CCFlags conditionCode, JumpTarget target); | 435 | void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); |
| 439 | void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); | ||
| 440 | 436 | ||
| 441 | void SetJumpTarget(const FixupBranch &branch); | 437 | void SetJumpTarget(const FixupBranch& branch); |
| 442 | 438 | ||
| 443 | void SETcc(CCFlags flag, OpArg dest); | 439 | void SETcc(CCFlags flag, OpArg dest); |
| 444 | // Note: CMOV brings small if any benefit on current cpus. | 440 | // Note: CMOV brings small if any benefit on current cpus. |
| @@ -450,8 +446,8 @@ public: | |||
| 450 | void SFENCE(); | 446 | void SFENCE(); |
| 451 | 447 | ||
| 452 | // Bit scan | 448 | // Bit scan |
| 453 | void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit | 449 | void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit |
| 454 | void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit | 450 | void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit |
| 455 | 451 | ||
| 456 | // Cache control | 452 | // Cache control |
| 457 | enum PrefetchLevel | 453 | enum PrefetchLevel |
| @@ -462,67 +458,67 @@ public: | |||
| 462 | PF_T2, //Levels 3+ (aliased to T0 on AMD) | 458 | PF_T2, //Levels 3+ (aliased to T0 on AMD) |
| 463 | }; | 459 | }; |
| 464 | void PREFETCH(PrefetchLevel level, OpArg arg); | 460 | void PREFETCH(PrefetchLevel level, OpArg arg); |
| 465 | void MOVNTI(int bits, OpArg dest, X64Reg src); | 461 | void MOVNTI(int bits, const OpArg& dest, X64Reg src); |
| 466 | void MOVNTDQ(OpArg arg, X64Reg regOp); | 462 | void MOVNTDQ(const OpArg& arg, X64Reg regOp); |
| 467 | void MOVNTPS(OpArg arg, X64Reg regOp); | 463 | void MOVNTPS(const OpArg& arg, X64Reg regOp); |
| 468 | void MOVNTPD(OpArg arg, X64Reg regOp); | 464 | void MOVNTPD(const OpArg& arg, X64Reg regOp); |
| 469 | 465 | ||
| 470 | // Multiplication / division | 466 | // Multiplication / division |
| 471 | void MUL(int bits, OpArg src); //UNSIGNED | 467 | void MUL(int bits, const OpArg& src); //UNSIGNED |
| 472 | void IMUL(int bits, OpArg src); //SIGNED | 468 | void IMUL(int bits, const OpArg& src); //SIGNED |
| 473 | void IMUL(int bits, X64Reg regOp, OpArg src); | 469 | void IMUL(int bits, X64Reg regOp, const OpArg& src); |
| 474 | void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); | 470 | void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); |
| 475 | void DIV(int bits, OpArg src); | 471 | void DIV(int bits, const OpArg& src); |
| 476 | void IDIV(int bits, OpArg src); | 472 | void IDIV(int bits, const OpArg& src); |
| 477 | 473 | ||
| 478 | // Shift | 474 | // Shift |
| 479 | void ROL(int bits, OpArg dest, OpArg shift); | 475 | void ROL(int bits, const OpArg& dest, const OpArg& shift); |
| 480 | void ROR(int bits, OpArg dest, OpArg shift); | 476 | void ROR(int bits, const OpArg& dest, const OpArg& shift); |
| 481 | void RCL(int bits, OpArg dest, OpArg shift); | 477 | void RCL(int bits, const OpArg& dest, const OpArg& shift); |
| 482 | void RCR(int bits, OpArg dest, OpArg shift); | 478 | void RCR(int bits, const OpArg& dest, const OpArg& shift); |
| 483 | void SHL(int bits, OpArg dest, OpArg shift); | 479 | void SHL(int bits, const OpArg& dest, const OpArg& shift); |
| 484 | void SHR(int bits, OpArg dest, OpArg shift); | 480 | void SHR(int bits, const OpArg& dest, const OpArg& shift); |
| 485 | void SAR(int bits, OpArg dest, OpArg shift); | 481 | void SAR(int bits, const OpArg& dest, const OpArg& shift); |
| 486 | 482 | ||
| 487 | // Bit Test | 483 | // Bit Test |
| 488 | void BT(int bits, OpArg dest, OpArg index); | 484 | void BT(int bits, const OpArg& dest, const OpArg& index); |
| 489 | void BTS(int bits, OpArg dest, OpArg index); | 485 | void BTS(int bits, const OpArg& dest, const OpArg& index); |
| 490 | void BTR(int bits, OpArg dest, OpArg index); | 486 | void BTR(int bits, const OpArg& dest, const OpArg& index); |
| 491 | void BTC(int bits, OpArg dest, OpArg index); | 487 | void BTC(int bits, const OpArg& dest, const OpArg& index); |
| 492 | 488 | ||
| 493 | // Double-Precision Shift | 489 | // Double-Precision Shift |
| 494 | void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); | 490 | void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); |
| 495 | void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); | 491 | void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); |
| 496 | 492 | ||
| 497 | // Extend EAX into EDX in various ways | 493 | // Extend EAX into EDX in various ways |
| 498 | void CWD(int bits = 16); | 494 | void CWD(int bits = 16); |
| 499 | inline void CDQ() {CWD(32);} | 495 | void CDQ() {CWD(32);} |
| 500 | inline void CQO() {CWD(64);} | 496 | void CQO() {CWD(64);} |
| 501 | void CBW(int bits = 8); | 497 | void CBW(int bits = 8); |
| 502 | inline void CWDE() {CBW(16);} | 498 | void CWDE() {CBW(16);} |
| 503 | inline void CDQE() {CBW(32);} | 499 | void CDQE() {CBW(32);} |
| 504 | 500 | ||
| 505 | // Load effective address | 501 | // Load effective address |
| 506 | void LEA(int bits, X64Reg dest, OpArg src); | 502 | void LEA(int bits, X64Reg dest, OpArg src); |
| 507 | 503 | ||
| 508 | // Integer arithmetic | 504 | // Integer arithmetic |
| 509 | void NEG (int bits, OpArg src); | 505 | void NEG(int bits, const OpArg& src); |
| 510 | void ADD (int bits, const OpArg &a1, const OpArg &a2); | 506 | void ADD(int bits, const OpArg& a1, const OpArg& a2); |
| 511 | void ADC (int bits, const OpArg &a1, const OpArg &a2); | 507 | void ADC(int bits, const OpArg& a1, const OpArg& a2); |
| 512 | void SUB (int bits, const OpArg &a1, const OpArg &a2); | 508 | void SUB(int bits, const OpArg& a1, const OpArg& a2); |
| 513 | void SBB (int bits, const OpArg &a1, const OpArg &a2); | 509 | void SBB(int bits, const OpArg& a1, const OpArg& a2); |
| 514 | void AND (int bits, const OpArg &a1, const OpArg &a2); | 510 | void AND(int bits, const OpArg& a1, const OpArg& a2); |
| 515 | void CMP (int bits, const OpArg &a1, const OpArg &a2); | 511 | void CMP(int bits, const OpArg& a1, const OpArg& a2); |
| 516 | 512 | ||
| 517 | // Bit operations | 513 | // Bit operations |
| 518 | void NOT (int bits, OpArg src); | 514 | void NOT (int bits, const OpArg& src); |
| 519 | void OR (int bits, const OpArg &a1, const OpArg &a2); | 515 | void OR(int bits, const OpArg& a1, const OpArg& a2); |
| 520 | void XOR (int bits, const OpArg &a1, const OpArg &a2); | 516 | void XOR(int bits, const OpArg& a1, const OpArg& a2); |
| 521 | void MOV (int bits, const OpArg &a1, const OpArg &a2); | 517 | void MOV(int bits, const OpArg& a1, const OpArg& a2); |
| 522 | void TEST(int bits, const OpArg &a1, const OpArg &a2); | 518 | void TEST(int bits, const OpArg& a1, const OpArg& a2); |
| 523 | 519 | ||
| 524 | // Are these useful at all? Consider removing. | 520 | // Are these useful at all? Consider removing. |
| 525 | void XCHG(int bits, const OpArg &a1, const OpArg &a2); | 521 | void XCHG(int bits, const OpArg& a1, const OpArg& a2); |
| 526 | void XCHG_AHAL(); | 522 | void XCHG_AHAL(); |
| 527 | 523 | ||
| 528 | // Byte swapping (32 and 64-bit only). | 524 | // Byte swapping (32 and 64-bit only). |
| @@ -536,13 +532,13 @@ public: | |||
| 536 | void MOVBE(int dbits, const OpArg& dest, const OpArg& src); | 532 | void MOVBE(int dbits, const OpArg& dest, const OpArg& src); |
| 537 | 533 | ||
| 538 | // Available only on AMD >= Phenom or Intel >= Haswell | 534 | // Available only on AMD >= Phenom or Intel >= Haswell |
| 539 | void LZCNT(int bits, X64Reg dest, OpArg src); | 535 | void LZCNT(int bits, X64Reg dest, const OpArg& src); |
| 540 | // Note: this one is actually part of BMI1 | 536 | // Note: this one is actually part of BMI1 |
| 541 | void TZCNT(int bits, X64Reg dest, OpArg src); | 537 | void TZCNT(int bits, X64Reg dest, const OpArg& src); |
| 542 | 538 | ||
| 543 | // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) | 539 | // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) |
| 544 | void STMXCSR(OpArg memloc); | 540 | void STMXCSR(const OpArg& memloc); |
| 545 | void LDMXCSR(OpArg memloc); | 541 | void LDMXCSR(const OpArg& memloc); |
| 546 | 542 | ||
| 547 | // Prefixes | 543 | // Prefixes |
| 548 | void LOCK(); | 544 | void LOCK(); |
| @@ -569,259 +565,242 @@ public: | |||
| 569 | x87_FPUBusy = 0x8000, | 565 | x87_FPUBusy = 0x8000, |
| 570 | }; | 566 | }; |
| 571 | 567 | ||
| 572 | void FLD(int bits, OpArg src); | 568 | void FLD(int bits, const OpArg& src); |
| 573 | void FST(int bits, OpArg dest); | 569 | void FST(int bits, const OpArg& dest); |
| 574 | void FSTP(int bits, OpArg dest); | 570 | void FSTP(int bits, const OpArg& dest); |
| 575 | void FNSTSW_AX(); | 571 | void FNSTSW_AX(); |
| 576 | void FWAIT(); | 572 | void FWAIT(); |
| 577 | 573 | ||
| 578 | // SSE/SSE2: Floating point arithmetic | 574 | // SSE/SSE2: Floating point arithmetic |
| 579 | void ADDSS(X64Reg regOp, OpArg arg); | 575 | void ADDSS(X64Reg regOp, const OpArg& arg); |
| 580 | void ADDSD(X64Reg regOp, OpArg arg); | 576 | void ADDSD(X64Reg regOp, const OpArg& arg); |
| 581 | void SUBSS(X64Reg regOp, OpArg arg); | 577 | void SUBSS(X64Reg regOp, const OpArg& arg); |
| 582 | void SUBSD(X64Reg regOp, OpArg arg); | 578 | void SUBSD(X64Reg regOp, const OpArg& arg); |
| 583 | void MULSS(X64Reg regOp, OpArg arg); | 579 | void MULSS(X64Reg regOp, const OpArg& arg); |
| 584 | void MULSD(X64Reg regOp, OpArg arg); | 580 | void MULSD(X64Reg regOp, const OpArg& arg); |
| 585 | void DIVSS(X64Reg regOp, OpArg arg); | 581 | void DIVSS(X64Reg regOp, const OpArg& arg); |
| 586 | void DIVSD(X64Reg regOp, OpArg arg); | 582 | void DIVSD(X64Reg regOp, const OpArg& arg); |
| 587 | void MINSS(X64Reg regOp, OpArg arg); | 583 | void MINSS(X64Reg regOp, const OpArg& arg); |
| 588 | void MINSD(X64Reg regOp, OpArg arg); | 584 | void MINSD(X64Reg regOp, const OpArg& arg); |
| 589 | void MAXSS(X64Reg regOp, OpArg arg); | 585 | void MAXSS(X64Reg regOp, const OpArg& arg); |
| 590 | void MAXSD(X64Reg regOp, OpArg arg); | 586 | void MAXSD(X64Reg regOp, const OpArg& arg); |
| 591 | void SQRTSS(X64Reg regOp, OpArg arg); | 587 | void SQRTSS(X64Reg regOp, const OpArg& arg); |
| 592 | void SQRTSD(X64Reg regOp, OpArg arg); | 588 | void SQRTSD(X64Reg regOp, const OpArg& arg); |
| 593 | void RSQRTSS(X64Reg regOp, OpArg arg); | 589 | void RSQRTSS(X64Reg regOp, const OpArg& arg); |
| 594 | 590 | ||
| 595 | // SSE/SSE2: Floating point bitwise (yes) | 591 | // SSE/SSE2: Floating point bitwise (yes) |
| 596 | void CMPSS(X64Reg regOp, OpArg arg, u8 compare); | 592 | void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); |
| 597 | void CMPSD(X64Reg regOp, OpArg arg, u8 compare); | 593 | void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); |
| 598 | 594 | ||
| 599 | inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } | 595 | void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } |
| 600 | inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } | 596 | void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } |
| 601 | inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } | 597 | void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } |
| 602 | inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } | 598 | void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } |
| 603 | inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } | 599 | void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } |
| 604 | inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } | 600 | void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } |
| 605 | inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } | 601 | void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); } |
| 606 | 602 | ||
| 607 | // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) | 603 | // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) |
| 608 | void ADDPS(X64Reg regOp, OpArg arg); | 604 | void ADDPS(X64Reg regOp, const OpArg& arg); |
| 609 | void ADDPD(X64Reg regOp, OpArg arg); | 605 | void ADDPD(X64Reg regOp, const OpArg& arg); |
| 610 | void SUBPS(X64Reg regOp, OpArg arg); | 606 | void SUBPS(X64Reg regOp, const OpArg& arg); |
| 611 | void SUBPD(X64Reg regOp, OpArg arg); | 607 | void SUBPD(X64Reg regOp, const OpArg& arg); |
| 612 | void CMPPS(X64Reg regOp, OpArg arg, u8 compare); | 608 | void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); |
| 613 | void CMPPD(X64Reg regOp, OpArg arg, u8 compare); | 609 | void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); |
| 614 | void MULPS(X64Reg regOp, OpArg arg); | 610 | void MULPS(X64Reg regOp, const OpArg& arg); |
| 615 | void MULPD(X64Reg regOp, OpArg arg); | 611 | void MULPD(X64Reg regOp, const OpArg& arg); |
| 616 | void DIVPS(X64Reg regOp, OpArg arg); | 612 | void DIVPS(X64Reg regOp, const OpArg& arg); |
| 617 | void DIVPD(X64Reg regOp, OpArg arg); | 613 | void DIVPD(X64Reg regOp, const OpArg& arg); |
| 618 | void MINPS(X64Reg regOp, OpArg arg); | 614 | void MINPS(X64Reg regOp, const OpArg& arg); |
| 619 | void MINPD(X64Reg regOp, OpArg arg); | 615 | void MINPD(X64Reg regOp, const OpArg& arg); |
| 620 | void MAXPS(X64Reg regOp, OpArg arg); | 616 | void MAXPS(X64Reg regOp, const OpArg& arg); |
| 621 | void MAXPD(X64Reg regOp, OpArg arg); | 617 | void MAXPD(X64Reg regOp, const OpArg& arg); |
| 622 | void SQRTPS(X64Reg regOp, OpArg arg); | 618 | void SQRTPS(X64Reg regOp, const OpArg& arg); |
| 623 | void SQRTPD(X64Reg regOp, OpArg arg); | 619 | void SQRTPD(X64Reg regOp, const OpArg& arg); |
| 624 | void RCPPS(X64Reg regOp, OpArg arg); | 620 | void RCPPS(X64Reg regOp, const OpArg& arg); |
| 625 | void RSQRTPS(X64Reg regOp, OpArg arg); | 621 | void RSQRTPS(X64Reg regOp, const OpArg& arg); |
| 626 | 622 | ||
| 627 | // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) | 623 | // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) |
| 628 | void ANDPS(X64Reg regOp, OpArg arg); | 624 | void ANDPS(X64Reg regOp, const OpArg& arg); |
| 629 | void ANDPD(X64Reg regOp, OpArg arg); | 625 | void ANDPD(X64Reg regOp, const OpArg& arg); |
| 630 | void ANDNPS(X64Reg regOp, OpArg arg); | 626 | void ANDNPS(X64Reg regOp, const OpArg& arg); |
| 631 | void ANDNPD(X64Reg regOp, OpArg arg); | 627 | void ANDNPD(X64Reg regOp, const OpArg& arg); |
| 632 | void ORPS(X64Reg regOp, OpArg arg); | 628 | void ORPS(X64Reg regOp, const OpArg& arg); |
| 633 | void ORPD(X64Reg regOp, OpArg arg); | 629 | void ORPD(X64Reg regOp, const OpArg& arg); |
| 634 | void XORPS(X64Reg regOp, OpArg arg); | 630 | void XORPS(X64Reg regOp, const OpArg& arg); |
| 635 | void XORPD(X64Reg regOp, OpArg arg); | 631 | void XORPD(X64Reg regOp, const OpArg& arg); |
| 636 | 632 | ||
| 637 | // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. | 633 | // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. |
| 638 | void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); | 634 | void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); |
| 639 | void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); | 635 | void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); |
| 640 | 636 | ||
| 641 | // SSE/SSE2: Useful alternative to shuffle in some cases. | 637 | // SSE/SSE2: Useful alternative to shuffle in some cases. |
| 642 | void MOVDDUP(X64Reg regOp, OpArg arg); | 638 | void MOVDDUP(X64Reg regOp, const OpArg& arg); |
| 643 | |||
| 644 | // TODO: Actually implement | ||
| 645 | #if 0 | ||
| 646 | // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... | ||
| 647 | void ADDSUBPS(X64Reg dest, OpArg src); | ||
| 648 | void ADDSUBPD(X64Reg dest, OpArg src); | ||
| 649 | void HADDPD(X64Reg dest, OpArg src); | ||
| 650 | void HSUBPS(X64Reg dest, OpArg src); | ||
| 651 | void HSUBPD(X64Reg dest, OpArg src); | ||
| 652 | |||
| 653 | // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". | ||
| 654 | void DPPD(X64Reg dest, OpArg src, u8 arg); | ||
| 655 | |||
| 656 | // These are probably useful for VFPU emulation. | ||
| 657 | void INSERTPS(X64Reg dest, OpArg src, u8 arg); | ||
| 658 | void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); | ||
| 659 | #endif | ||
| 660 | 639 | ||
| 661 | // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. | 640 | // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. |
| 662 | void HADDPS(X64Reg dest, OpArg src); | 641 | void HADDPS(X64Reg dest, const OpArg& src); |
| 663 | 642 | ||
| 664 | // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". | 643 | // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". |
| 665 | void DPPS(X64Reg dest, OpArg src, u8 arg); | 644 | void DPPS(X64Reg dest, const OpArg& src, u8 arg); |
| 666 | 645 | ||
| 667 | void UNPCKLPS(X64Reg dest, OpArg src); | 646 | void UNPCKLPS(X64Reg dest, const OpArg& src); |
| 668 | void UNPCKHPS(X64Reg dest, OpArg src); | 647 | void UNPCKHPS(X64Reg dest, const OpArg& src); |
| 669 | void UNPCKLPD(X64Reg dest, OpArg src); | 648 | void UNPCKLPD(X64Reg dest, const OpArg& src); |
| 670 | void UNPCKHPD(X64Reg dest, OpArg src); | 649 | void UNPCKHPD(X64Reg dest, const OpArg& src); |
| 671 | 650 | ||
| 672 | // SSE/SSE2: Compares. | 651 | // SSE/SSE2: Compares. |
| 673 | void COMISS(X64Reg regOp, OpArg arg); | 652 | void COMISS(X64Reg regOp, const OpArg& arg); |
| 674 | void COMISD(X64Reg regOp, OpArg arg); | 653 | void COMISD(X64Reg regOp, const OpArg& arg); |
| 675 | void UCOMISS(X64Reg regOp, OpArg arg); | 654 | void UCOMISS(X64Reg regOp, const OpArg& arg); |
| 676 | void UCOMISD(X64Reg regOp, OpArg arg); | 655 | void UCOMISD(X64Reg regOp, const OpArg& arg); |
| 677 | 656 | ||
| 678 | // SSE/SSE2: Moves. Use the right data type for your data, in most cases. | 657 | // SSE/SSE2: Moves. Use the right data type for your data, in most cases. |
| 679 | void MOVAPS(X64Reg regOp, OpArg arg); | 658 | void MOVAPS(X64Reg regOp, const OpArg& arg); |
| 680 | void MOVAPD(X64Reg regOp, OpArg arg); | 659 | void MOVAPD(X64Reg regOp, const OpArg& arg); |
| 681 | void MOVAPS(OpArg arg, X64Reg regOp); | 660 | void MOVAPS(const OpArg& arg, X64Reg regOp); |
| 682 | void MOVAPD(OpArg arg, X64Reg regOp); | 661 | void MOVAPD(const OpArg& arg, X64Reg regOp); |
| 683 | 662 | ||
| 684 | void MOVUPS(X64Reg regOp, OpArg arg); | 663 | void MOVUPS(X64Reg regOp, const OpArg& arg); |
| 685 | void MOVUPD(X64Reg regOp, OpArg arg); | 664 | void MOVUPD(X64Reg regOp, const OpArg& arg); |
| 686 | void MOVUPS(OpArg arg, X64Reg regOp); | 665 | void MOVUPS(const OpArg& arg, X64Reg regOp); |
| 687 | void MOVUPD(OpArg arg, X64Reg regOp); | 666 | void MOVUPD(const OpArg& arg, X64Reg regOp); |
| 688 | 667 | ||
| 689 | void MOVDQA(X64Reg regOp, OpArg arg); | 668 | void MOVDQA(X64Reg regOp, const OpArg& arg); |
| 690 | void MOVDQA(OpArg arg, X64Reg regOp); | 669 | void MOVDQA(const OpArg& arg, X64Reg regOp); |
| 691 | void MOVDQU(X64Reg regOp, OpArg arg); | 670 | void MOVDQU(X64Reg regOp, const OpArg& arg); |
| 692 | void MOVDQU(OpArg arg, X64Reg regOp); | 671 | void MOVDQU(const OpArg& arg, X64Reg regOp); |
| 693 | 672 | ||
| 694 | void MOVSS(X64Reg regOp, OpArg arg); | 673 | void MOVSS(X64Reg regOp, const OpArg& arg); |
| 695 | void MOVSD(X64Reg regOp, OpArg arg); | 674 | void MOVSD(X64Reg regOp, const OpArg& arg); |
| 696 | void MOVSS(OpArg arg, X64Reg regOp); | 675 | void MOVSS(const OpArg& arg, X64Reg regOp); |
| 697 | void MOVSD(OpArg arg, X64Reg regOp); | 676 | void MOVSD(const OpArg& arg, X64Reg regOp); |
| 698 | 677 | ||
| 699 | void MOVLPS(X64Reg regOp, OpArg arg); | 678 | void MOVLPS(X64Reg regOp, const OpArg& arg); |
| 700 | void MOVLPD(X64Reg regOp, OpArg arg); | 679 | void MOVLPD(X64Reg regOp, const OpArg& arg); |
| 701 | void MOVLPS(OpArg arg, X64Reg regOp); | 680 | void MOVLPS(const OpArg& arg, X64Reg regOp); |
| 702 | void MOVLPD(OpArg arg, X64Reg regOp); | 681 | void MOVLPD(const OpArg& arg, X64Reg regOp); |
| 703 | 682 | ||
| 704 | void MOVHPS(X64Reg regOp, OpArg arg); | 683 | void MOVHPS(X64Reg regOp, const OpArg& arg); |
| 705 | void MOVHPD(X64Reg regOp, OpArg arg); | 684 | void MOVHPD(X64Reg regOp, const OpArg& arg); |
| 706 | void MOVHPS(OpArg arg, X64Reg regOp); | 685 | void MOVHPS(const OpArg& arg, X64Reg regOp); |
| 707 | void MOVHPD(OpArg arg, X64Reg regOp); | 686 | void MOVHPD(const OpArg& arg, X64Reg regOp); |
| 708 | 687 | ||
| 709 | void MOVHLPS(X64Reg regOp1, X64Reg regOp2); | 688 | void MOVHLPS(X64Reg regOp1, X64Reg regOp2); |
| 710 | void MOVLHPS(X64Reg regOp1, X64Reg regOp2); | 689 | void MOVLHPS(X64Reg regOp1, X64Reg regOp2); |
| 711 | 690 | ||
| 712 | void MOVD_xmm(X64Reg dest, const OpArg &arg); | 691 | void MOVD_xmm(X64Reg dest, const OpArg& arg); |
| 713 | void MOVQ_xmm(X64Reg dest, OpArg arg); | 692 | void MOVQ_xmm(X64Reg dest, OpArg arg); |
| 714 | void MOVD_xmm(const OpArg &arg, X64Reg src); | 693 | void MOVD_xmm(const OpArg& arg, X64Reg src); |
| 715 | void MOVQ_xmm(OpArg arg, X64Reg src); | 694 | void MOVQ_xmm(OpArg arg, X64Reg src); |
| 716 | 695 | ||
| 717 | // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. | 696 | // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. |
| 718 | void MOVMSKPS(X64Reg dest, OpArg arg); | 697 | void MOVMSKPS(X64Reg dest, const OpArg& arg); |
| 719 | void MOVMSKPD(X64Reg dest, OpArg arg); | 698 | void MOVMSKPD(X64Reg dest, const OpArg& arg); |
| 720 | 699 | ||
| 721 | // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. | 700 | // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. |
| 722 | void MASKMOVDQU(X64Reg dest, X64Reg src); | 701 | void MASKMOVDQU(X64Reg dest, X64Reg src); |
| 723 | void LDDQU(X64Reg dest, OpArg src); | 702 | void LDDQU(X64Reg dest, const OpArg& src); |
| 724 | 703 | ||
| 725 | // SSE/SSE2: Data type conversions. | 704 | // SSE/SSE2: Data type conversions. |
| 726 | void CVTPS2PD(X64Reg dest, OpArg src); | 705 | void CVTPS2PD(X64Reg dest, const OpArg& src); |
| 727 | void CVTPD2PS(X64Reg dest, OpArg src); | 706 | void CVTPD2PS(X64Reg dest, const OpArg& src); |
| 728 | void CVTSS2SD(X64Reg dest, OpArg src); | 707 | void CVTSS2SD(X64Reg dest, const OpArg& src); |
| 729 | void CVTSI2SS(X64Reg dest, OpArg src); | 708 | void CVTSI2SS(X64Reg dest, const OpArg& src); |
| 730 | void CVTSD2SS(X64Reg dest, OpArg src); | 709 | void CVTSD2SS(X64Reg dest, const OpArg& src); |
| 731 | void CVTSI2SD(X64Reg dest, OpArg src); | 710 | void CVTSI2SD(X64Reg dest, const OpArg& src); |
| 732 | void CVTDQ2PD(X64Reg regOp, OpArg arg); | 711 | void CVTDQ2PD(X64Reg regOp, const OpArg& arg); |
| 733 | void CVTPD2DQ(X64Reg regOp, OpArg arg); | 712 | void CVTPD2DQ(X64Reg regOp, const OpArg& arg); |
| 734 | void CVTDQ2PS(X64Reg regOp, OpArg arg); | 713 | void CVTDQ2PS(X64Reg regOp, const OpArg& arg); |
| 735 | void CVTPS2DQ(X64Reg regOp, OpArg arg); | 714 | void CVTPS2DQ(X64Reg regOp, const OpArg& arg); |
| 736 | 715 | ||
| 737 | void CVTTPS2DQ(X64Reg regOp, OpArg arg); | 716 | void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); |
| 738 | void CVTTPD2DQ(X64Reg regOp, OpArg arg); | 717 | void CVTTPD2DQ(X64Reg regOp, const OpArg& arg); |
| 739 | 718 | ||
| 740 | // Destinations are X64 regs (rax, rbx, ...) for these instructions. | 719 | // Destinations are X64 regs (rax, rbx, ...) for these instructions. |
| 741 | void CVTSS2SI(X64Reg xregdest, OpArg src); | 720 | void CVTSS2SI(X64Reg xregdest, const OpArg& src); |
| 742 | void CVTSD2SI(X64Reg xregdest, OpArg src); | 721 | void CVTSD2SI(X64Reg xregdest, const OpArg& src); |
| 743 | void CVTTSS2SI(X64Reg xregdest, OpArg arg); | 722 | void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); |
| 744 | void CVTTSD2SI(X64Reg xregdest, OpArg arg); | 723 | void CVTTSD2SI(X64Reg xregdest, const OpArg& arg); |
| 745 | 724 | ||
| 746 | // SSE2: Packed integer instructions | 725 | // SSE2: Packed integer instructions |
| 747 | void PACKSSDW(X64Reg dest, OpArg arg); | 726 | void PACKSSDW(X64Reg dest, const OpArg& arg); |
| 748 | void PACKSSWB(X64Reg dest, OpArg arg); | 727 | void PACKSSWB(X64Reg dest, const OpArg& arg); |
| 749 | void PACKUSDW(X64Reg dest, OpArg arg); | 728 | void PACKUSDW(X64Reg dest, const OpArg& arg); |
| 750 | void PACKUSWB(X64Reg dest, OpArg arg); | 729 | void PACKUSWB(X64Reg dest, const OpArg& arg); |
| 751 | 730 | ||
| 752 | void PUNPCKLBW(X64Reg dest, const OpArg &arg); | 731 | void PUNPCKLBW(X64Reg dest, const OpArg &arg); |
| 753 | void PUNPCKLWD(X64Reg dest, const OpArg &arg); | 732 | void PUNPCKLWD(X64Reg dest, const OpArg &arg); |
| 754 | void PUNPCKLDQ(X64Reg dest, const OpArg &arg); | 733 | void PUNPCKLDQ(X64Reg dest, const OpArg &arg); |
| 755 | void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); | 734 | void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); |
| 756 | 735 | ||
| 757 | void PTEST(X64Reg dest, OpArg arg); | 736 | void PTEST(X64Reg dest, const OpArg& arg); |
| 758 | void PAND(X64Reg dest, OpArg arg); | 737 | void PAND(X64Reg dest, const OpArg& arg); |
| 759 | void PANDN(X64Reg dest, OpArg arg); | 738 | void PANDN(X64Reg dest, const OpArg& arg); |
| 760 | void PXOR(X64Reg dest, OpArg arg); | 739 | void PXOR(X64Reg dest, const OpArg& arg); |
| 761 | void POR(X64Reg dest, OpArg arg); | 740 | void POR(X64Reg dest, const OpArg& arg); |
| 762 | 741 | ||
| 763 | void PADDB(X64Reg dest, OpArg arg); | 742 | void PADDB(X64Reg dest, const OpArg& arg); |
| 764 | void PADDW(X64Reg dest, OpArg arg); | 743 | void PADDW(X64Reg dest, const OpArg& arg); |
| 765 | void PADDD(X64Reg dest, OpArg arg); | 744 | void PADDD(X64Reg dest, const OpArg& arg); |
| 766 | void PADDQ(X64Reg dest, OpArg arg); | 745 | void PADDQ(X64Reg dest, const OpArg& arg); |
| 767 | 746 | ||
| 768 | void PADDSB(X64Reg dest, OpArg arg); | 747 | void PADDSB(X64Reg dest, const OpArg& arg); |
| 769 | void PADDSW(X64Reg dest, OpArg arg); | 748 | void PADDSW(X64Reg dest, const OpArg& arg); |
| 770 | void PADDUSB(X64Reg dest, OpArg arg); | 749 | void PADDUSB(X64Reg dest, const OpArg& arg); |
| 771 | void PADDUSW(X64Reg dest, OpArg arg); | 750 | void PADDUSW(X64Reg dest, const OpArg& arg); |
| 772 | 751 | ||
| 773 | void PSUBB(X64Reg dest, OpArg arg); | 752 | void PSUBB(X64Reg dest, const OpArg& arg); |
| 774 | void PSUBW(X64Reg dest, OpArg arg); | 753 | void PSUBW(X64Reg dest, const OpArg& arg); |
| 775 | void PSUBD(X64Reg dest, OpArg arg); | 754 | void PSUBD(X64Reg dest, const OpArg& arg); |
| 776 | void PSUBQ(X64Reg dest, OpArg arg); | 755 | void PSUBQ(X64Reg dest, const OpArg& arg); |
| 777 | 756 | ||
| 778 | void PSUBSB(X64Reg dest, OpArg arg); | 757 | void PSUBSB(X64Reg dest, const OpArg& arg); |
| 779 | void PSUBSW(X64Reg dest, OpArg arg); | 758 | void PSUBSW(X64Reg dest, const OpArg& arg); |
| 780 | void PSUBUSB(X64Reg dest, OpArg arg); | 759 | void PSUBUSB(X64Reg dest, const OpArg& arg); |
| 781 | void PSUBUSW(X64Reg dest, OpArg arg); | 760 | void PSUBUSW(X64Reg dest, const OpArg& arg); |
| 782 | 761 | ||
| 783 | void PAVGB(X64Reg dest, OpArg arg); | 762 | void PAVGB(X64Reg dest, const OpArg& arg); |
| 784 | void PAVGW(X64Reg dest, OpArg arg); | 763 | void PAVGW(X64Reg dest, const OpArg& arg); |
| 785 | 764 | ||
| 786 | void PCMPEQB(X64Reg dest, OpArg arg); | 765 | void PCMPEQB(X64Reg dest, const OpArg& arg); |
| 787 | void PCMPEQW(X64Reg dest, OpArg arg); | 766 | void PCMPEQW(X64Reg dest, const OpArg& arg); |
| 788 | void PCMPEQD(X64Reg dest, OpArg arg); | 767 | void PCMPEQD(X64Reg dest, const OpArg& arg); |
| 789 | 768 | ||
| 790 | void PCMPGTB(X64Reg dest, OpArg arg); | 769 | void PCMPGTB(X64Reg dest, const OpArg& arg); |
| 791 | void PCMPGTW(X64Reg dest, OpArg arg); | 770 | void PCMPGTW(X64Reg dest, const OpArg& arg); |
| 792 | void PCMPGTD(X64Reg dest, OpArg arg); | 771 | void PCMPGTD(X64Reg dest, const OpArg& arg); |
| 793 | 772 | ||
| 794 | void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); | 773 | void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); |
| 795 | void PINSRW(X64Reg dest, OpArg arg, u8 subreg); | 774 | void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); |
| 796 | 775 | ||
| 797 | void PMADDWD(X64Reg dest, OpArg arg); | 776 | void PMADDWD(X64Reg dest, const OpArg& arg); |
| 798 | void PSADBW(X64Reg dest, OpArg arg); | 777 | void PSADBW(X64Reg dest, const OpArg& arg); |
| 799 | 778 | ||
| 800 | void PMAXSW(X64Reg dest, OpArg arg); | 779 | void PMAXSW(X64Reg dest, const OpArg& arg); |
| 801 | void PMAXUB(X64Reg dest, OpArg arg); | 780 | void PMAXUB(X64Reg dest, const OpArg& arg); |
| 802 | void PMINSW(X64Reg dest, OpArg arg); | 781 | void PMINSW(X64Reg dest, const OpArg& arg); |
| 803 | void PMINUB(X64Reg dest, OpArg arg); | 782 | void PMINUB(X64Reg dest, const OpArg& arg); |
| 804 | // SSE4: More MAX/MIN instructions. | 783 | // SSE4: More MAX/MIN instructions. |
| 805 | void PMINSB(X64Reg dest, OpArg arg); | 784 | void PMINSB(X64Reg dest, const OpArg& arg); |
| 806 | void PMINSD(X64Reg dest, OpArg arg); | 785 | void PMINSD(X64Reg dest, const OpArg& arg); |
| 807 | void PMINUW(X64Reg dest, OpArg arg); | 786 | void PMINUW(X64Reg dest, const OpArg& arg); |
| 808 | void PMINUD(X64Reg dest, OpArg arg); | 787 | void PMINUD(X64Reg dest, const OpArg& arg); |
| 809 | void PMAXSB(X64Reg dest, OpArg arg); | 788 | void PMAXSB(X64Reg dest, const OpArg& arg); |
| 810 | void PMAXSD(X64Reg dest, OpArg arg); | 789 | void PMAXSD(X64Reg dest, const OpArg& arg); |
| 811 | void PMAXUW(X64Reg dest, OpArg arg); | 790 | void PMAXUW(X64Reg dest, const OpArg& arg); |
| 812 | void PMAXUD(X64Reg dest, OpArg arg); | 791 | void PMAXUD(X64Reg dest, const OpArg& arg); |
| 813 | 792 | ||
| 814 | void PMOVMSKB(X64Reg dest, OpArg arg); | 793 | void PMOVMSKB(X64Reg dest, const OpArg& arg); |
| 815 | void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); | 794 | void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); |
| 816 | void PSHUFB(X64Reg dest, OpArg arg); | 795 | void PSHUFB(X64Reg dest, const OpArg& arg); |
| 817 | 796 | ||
| 818 | void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); | 797 | void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); |
| 819 | void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); | 798 | void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle); |
| 820 | 799 | ||
| 821 | void PSRLW(X64Reg reg, int shift); | 800 | void PSRLW(X64Reg reg, int shift); |
| 822 | void PSRLD(X64Reg reg, int shift); | 801 | void PSRLD(X64Reg reg, int shift); |
| 823 | void PSRLQ(X64Reg reg, int shift); | 802 | void PSRLQ(X64Reg reg, int shift); |
| 824 | void PSRLQ(X64Reg reg, OpArg arg); | 803 | void PSRLQ(X64Reg reg, const OpArg& arg); |
| 825 | void PSRLDQ(X64Reg reg, int shift); | 804 | void PSRLDQ(X64Reg reg, int shift); |
| 826 | 805 | ||
| 827 | void PSLLW(X64Reg reg, int shift); | 806 | void PSLLW(X64Reg reg, int shift); |
| @@ -833,198 +812,198 @@ public: | |||
| 833 | void PSRAD(X64Reg reg, int shift); | 812 | void PSRAD(X64Reg reg, int shift); |
| 834 | 813 | ||
| 835 | // SSE4: data type conversions | 814 | // SSE4: data type conversions |
| 836 | void PMOVSXBW(X64Reg dest, OpArg arg); | 815 | void PMOVSXBW(X64Reg dest, const OpArg& arg); |
| 837 | void PMOVSXBD(X64Reg dest, OpArg arg); | 816 | void PMOVSXBD(X64Reg dest, const OpArg& arg); |
| 838 | void PMOVSXBQ(X64Reg dest, OpArg arg); | 817 | void PMOVSXBQ(X64Reg dest, const OpArg& arg); |
| 839 | void PMOVSXWD(X64Reg dest, OpArg arg); | 818 | void PMOVSXWD(X64Reg dest, const OpArg& arg); |
| 840 | void PMOVSXWQ(X64Reg dest, OpArg arg); | 819 | void PMOVSXWQ(X64Reg dest, const OpArg& arg); |
| 841 | void PMOVSXDQ(X64Reg dest, OpArg arg); | 820 | void PMOVSXDQ(X64Reg dest, const OpArg& arg); |
| 842 | void PMOVZXBW(X64Reg dest, OpArg arg); | 821 | void PMOVZXBW(X64Reg dest, const OpArg& arg); |
| 843 | void PMOVZXBD(X64Reg dest, OpArg arg); | 822 | void PMOVZXBD(X64Reg dest, const OpArg& arg); |
| 844 | void PMOVZXBQ(X64Reg dest, OpArg arg); | 823 | void PMOVZXBQ(X64Reg dest, const OpArg& arg); |
| 845 | void PMOVZXWD(X64Reg dest, OpArg arg); | 824 | void PMOVZXWD(X64Reg dest, const OpArg& arg); |
| 846 | void PMOVZXWQ(X64Reg dest, OpArg arg); | 825 | void PMOVZXWQ(X64Reg dest, const OpArg& arg); |
| 847 | void PMOVZXDQ(X64Reg dest, OpArg arg); | 826 | void PMOVZXDQ(X64Reg dest, const OpArg& arg); |
| 848 | 827 | ||
| 849 | // SSE4: variable blend instructions (xmm0 implicit argument) | 828 | // SSE4: variable blend instructions (xmm0 implicit argument) |
| 850 | void PBLENDVB(X64Reg dest, OpArg arg); | 829 | void PBLENDVB(X64Reg dest, const OpArg& arg); |
| 851 | void BLENDVPS(X64Reg dest, OpArg arg); | 830 | void BLENDVPS(X64Reg dest, const OpArg& arg); |
| 852 | void BLENDVPD(X64Reg dest, OpArg arg); | 831 | void BLENDVPD(X64Reg dest, const OpArg& arg); |
| 853 | void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); | 832 | void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); |
| 854 | void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); | 833 | void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); |
| 855 | 834 | ||
| 856 | // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) | 835 | // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) |
| 857 | void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); | 836 | void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); |
| 858 | void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); | 837 | void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); |
| 859 | void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); | 838 | void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); |
| 860 | void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); | 839 | void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); |
| 861 | 840 | ||
| 862 | inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } | 841 | void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } |
| 863 | inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } | 842 | void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } |
| 864 | inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } | 843 | void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } |
| 865 | inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } | 844 | void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } |
| 866 | 845 | ||
| 867 | inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } | 846 | void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } |
| 868 | inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } | 847 | void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } |
| 869 | inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } | 848 | void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } |
| 870 | inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } | 849 | void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } |
| 871 | 850 | ||
| 872 | inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } | 851 | void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } |
| 873 | inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } | 852 | void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } |
| 874 | inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } | 853 | void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } |
| 875 | inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } | 854 | void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } |
| 876 | 855 | ||
| 877 | inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } | 856 | void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } |
| 878 | inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } | 857 | void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } |
| 879 | inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } | 858 | void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } |
| 880 | inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } | 859 | void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); } |
| 881 | 860 | ||
| 882 | // AVX | 861 | // AVX |
| 883 | void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 862 | void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 884 | void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 863 | void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 885 | void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 864 | void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 886 | void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 865 | void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 887 | void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 866 | void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 888 | void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 867 | void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 889 | void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 868 | void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 890 | void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 869 | void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 891 | void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 870 | void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 892 | void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); | 871 | void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); |
| 893 | void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 872 | void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 894 | void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 873 | void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 895 | 874 | ||
| 896 | void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 875 | void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 897 | void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 876 | void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 898 | void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 877 | void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 899 | void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 878 | void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 900 | void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 879 | void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 901 | void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 880 | void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 902 | void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 881 | void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 903 | void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 882 | void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 904 | 883 | ||
| 905 | void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 884 | void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 906 | void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 885 | void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 907 | void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 886 | void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 908 | void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 887 | void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 909 | 888 | ||
| 910 | // FMA3 | 889 | // FMA3 |
| 911 | void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 890 | void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 912 | void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 891 | void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 913 | void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 892 | void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 914 | void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 893 | void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 915 | void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 894 | void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 916 | void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 895 | void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 917 | void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 896 | void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 918 | void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 897 | void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 919 | void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 898 | void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 920 | void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 899 | void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 921 | void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 900 | void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 922 | void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 901 | void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 923 | void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 902 | void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 924 | void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 903 | void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 925 | void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 904 | void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 926 | void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 905 | void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 927 | void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 906 | void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 928 | void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 907 | void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 929 | void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 908 | void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 930 | void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 909 | void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 931 | void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 910 | void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 932 | void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 911 | void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 933 | void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 912 | void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 934 | void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 913 | void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 935 | void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 914 | void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 936 | void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 915 | void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 937 | void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 916 | void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 938 | void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 917 | void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 939 | void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 918 | void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 940 | void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 919 | void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 941 | void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 920 | void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 942 | void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 921 | void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 943 | void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 922 | void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 944 | void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 923 | void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 945 | void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 924 | void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 946 | void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 925 | void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 947 | void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 926 | void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 948 | void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 927 | void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 949 | void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 928 | void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 950 | void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 929 | void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 951 | void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 930 | void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 952 | void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 931 | void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 953 | void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 932 | void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 954 | void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 933 | void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 955 | void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 934 | void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 956 | void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 935 | void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 957 | void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 936 | void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 958 | void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 937 | void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 959 | void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 938 | void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 960 | void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 939 | void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 961 | void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 940 | void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 962 | void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 941 | void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 963 | void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 942 | void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 964 | void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 943 | void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 965 | void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 944 | void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 966 | void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 945 | void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 967 | void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 946 | void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 968 | void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 947 | void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 969 | void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 948 | void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 970 | void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 949 | void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 971 | 950 | ||
| 972 | // VEX GPR instructions | 951 | // VEX GPR instructions |
| 973 | void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 952 | void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 974 | void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 953 | void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 975 | void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 954 | void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 976 | void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); | 955 | void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); |
| 977 | void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 956 | void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 978 | void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 957 | void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 979 | void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 958 | void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 980 | void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 959 | void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 981 | void BLSR(int bits, X64Reg regOp, OpArg arg); | 960 | void BLSR(int bits, X64Reg regOp, const OpArg& arg); |
| 982 | void BLSMSK(int bits, X64Reg regOp, OpArg arg); | 961 | void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); |
| 983 | void BLSI(int bits, X64Reg regOp, OpArg arg); | 962 | void BLSI(int bits, X64Reg regOp, const OpArg& arg); |
| 984 | void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 963 | void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 985 | void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 964 | void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 986 | 965 | ||
| 987 | void RDTSC(); | 966 | void RDTSC(); |
| 988 | 967 | ||
| 989 | // Utility functions | 968 | // Utility functions |
| 990 | // The difference between this and CALL is that this aligns the stack | 969 | // The difference between this and CALL is that this aligns the stack |
| 991 | // where appropriate. | 970 | // where appropriate. |
| 992 | void ABI_CallFunction(const void *func); | 971 | void ABI_CallFunction(const void* func); |
| 993 | template <typename T> | 972 | template <typename T> |
| 994 | void ABI_CallFunction(T (*func)()) { | 973 | void ABI_CallFunction(T (*func)()) { |
| 995 | ABI_CallFunction((const void *)func); | 974 | ABI_CallFunction((const void*)func); |
| 996 | } | 975 | } |
| 997 | 976 | ||
| 998 | void ABI_CallFunction(const u8 *func) { | 977 | void ABI_CallFunction(const u8* func) { |
| 999 | ABI_CallFunction((const void *)func); | 978 | ABI_CallFunction((const void*)func); |
| 1000 | } | 979 | } |
| 1001 | void ABI_CallFunctionC16(const void *func, u16 param1); | 980 | void ABI_CallFunctionC16(const void* func, u16 param1); |
| 1002 | void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); | 981 | void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); |
| 1003 | 982 | ||
| 1004 | 983 | ||
| 1005 | // These only support u32 parameters, but that's enough for a lot of uses. | 984 | // These only support u32 parameters, but that's enough for a lot of uses. |
| 1006 | // These will destroy the 1 or 2 first "parameter regs". | 985 | // These will destroy the 1 or 2 first "parameter regs". |
| 1007 | void ABI_CallFunctionC(const void *func, u32 param1); | 986 | void ABI_CallFunctionC(const void* func, u32 param1); |
| 1008 | void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); | 987 | void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); |
| 1009 | void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); | 988 | void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); |
| 1010 | void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); | 989 | void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); |
| 1011 | void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); | 990 | void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); |
| 1012 | void ABI_CallFunctionP(const void *func, void *param1); | 991 | void ABI_CallFunctionP(const void* func, void* param1); |
| 1013 | void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); | 992 | void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); |
| 1014 | void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); | 993 | void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); |
| 1015 | void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); | 994 | void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); |
| 1016 | void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); | 995 | void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); |
| 1017 | void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); | 996 | void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); |
| 1018 | void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); | 997 | void ABI_CallFunctionA(const void* func, const OpArg& arg1); |
| 1019 | void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); | 998 | void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2); |
| 1020 | 999 | ||
| 1021 | // Pass a register as a parameter. | 1000 | // Pass a register as a parameter. |
| 1022 | void ABI_CallFunctionR(const void *func, X64Reg reg1); | 1001 | void ABI_CallFunctionR(const void* func, X64Reg reg1); |
| 1023 | void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); | 1002 | void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2); |
| 1024 | 1003 | ||
| 1025 | template <typename Tr, typename T1> | 1004 | template <typename Tr, typename T1> |
| 1026 | void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { | 1005 | void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { |
| 1027 | ABI_CallFunctionC((const void *)func, param1); | 1006 | ABI_CallFunctionC((const void*)func, param1); |
| 1028 | } | 1007 | } |
| 1029 | 1008 | ||
| 1030 | // A function that doesn't have any control over what it will do to regs, | 1009 | // A function that doesn't have any control over what it will do to regs, |
| @@ -1048,9 +1027,9 @@ public: | |||
| 1048 | void ABI_EmitEpilogue(int maxCallParams); | 1027 | void ABI_EmitEpilogue(int maxCallParams); |
| 1049 | 1028 | ||
| 1050 | #ifdef _M_IX86 | 1029 | #ifdef _M_IX86 |
| 1051 | inline int ABI_GetNumXMMRegs() { return 8; } | 1030 | static int ABI_GetNumXMMRegs() { return 8; } |
| 1052 | #else | 1031 | #else |
| 1053 | inline int ABI_GetNumXMMRegs() { return 16; } | 1032 | static int ABI_GetNumXMMRegs() { return 16; } |
| 1054 | #endif | 1033 | #endif |
| 1055 | }; // class XEmitter | 1034 | }; // class XEmitter |
| 1056 | 1035 | ||