summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt16
-rw-r--r--src/common/code_block.h87
-rw-r--r--src/common/common_funcs.h2
-rw-r--r--src/common/cpu_detect.h78
-rw-r--r--src/common/hash.cpp126
-rw-r--r--src/common/hash.h25
-rw-r--r--src/common/memory_util.cpp8
-rw-r--r--src/common/platform.h2
-rw-r--r--src/common/x64/abi.cpp680
-rw-r--r--src/common/x64/abi.h78
-rw-r--r--src/common/x64/cpu_detect.cpp187
-rw-r--r--src/common/x64/cpu_detect.h66
-rw-r--r--src/common/x64/emitter.cpp1989
-rw-r--r--src/common/x64/emitter.h1067
14 files changed, 4326 insertions, 85 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 4c086cd2f..e743a026d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SRCS
5 break_points.cpp 5 break_points.cpp
6 emu_window.cpp 6 emu_window.cpp
7 file_util.cpp 7 file_util.cpp
8 hash.cpp
8 key_map.cpp 9 key_map.cpp
9 logging/filter.cpp 10 logging/filter.cpp
10 logging/text_formatter.cpp 11 logging/text_formatter.cpp
@@ -24,14 +25,15 @@ set(HEADERS
24 bit_field.h 25 bit_field.h
25 break_points.h 26 break_points.h
26 chunk_file.h 27 chunk_file.h
28 code_block.h
27 color.h 29 color.h
28 common_funcs.h 30 common_funcs.h
29 common_paths.h 31 common_paths.h
30 common_types.h 32 common_types.h
31 cpu_detect.h
32 debug_interface.h 33 debug_interface.h
33 emu_window.h 34 emu_window.h
34 file_util.h 35 file_util.h
36 hash.h
35 key_map.h 37 key_map.h
36 linear_disk_cache.h 38 linear_disk_cache.h
37 logging/text_formatter.h 39 logging/text_formatter.h
@@ -56,6 +58,18 @@ set(HEADERS
56 vector_math.h 58 vector_math.h
57 ) 59 )
58 60
61if(ARCHITECTURE_x86_64)
62 set(SRCS ${SRCS}
63 x64/abi.cpp
64 x64/cpu_detect.cpp
65 x64/emitter.cpp)
66
67 set(HEADERS ${HEADERS}
68 x64/abi.h
69 x64/cpu_detect.h
70 x64/emitter.h)
71endif()
72
59create_directory_groups(${SRCS} ${HEADERS}) 73create_directory_groups(${SRCS} ${HEADERS})
60 74
61add_library(common STATIC ${SRCS} ${HEADERS}) 75add_library(common STATIC ${SRCS} ${HEADERS})
diff --git a/src/common/code_block.h b/src/common/code_block.h
new file mode 100644
index 000000000..9ef7296d3
--- /dev/null
+++ b/src/common/code_block.h
@@ -0,0 +1,87 @@
1// Copyright 2013 Dolphin Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common_types.h"
8#include "memory_util.h"
9
10// Everything that needs to generate code should inherit from this.
11// You get memory management for free, plus, you can use all emitter functions without
12// having to prefix them with gen-> or something similar.
13// Example implementation:
14// class JIT : public CodeBlock<ARMXEmitter> {}
15template<class T> class CodeBlock : public T, NonCopyable
16{
17private:
18 // A privately used function to set the executable RAM space to something invalid.
19 // For debugging usefulness it should be used to set the RAM to a host specific breakpoint instruction
20 virtual void PoisonMemory() = 0;
21
22protected:
23 u8 *region;
24 size_t region_size;
25
26public:
27 CodeBlock() : region(nullptr), region_size(0) {}
28 virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
29
30 // Call this before you generate any code.
31 void AllocCodeSpace(int size)
32 {
33 region_size = size;
34 region = (u8*)AllocateExecutableMemory(region_size);
35 T::SetCodePtr(region);
36 }
37
38 // Always clear code space with breakpoints, so that if someone accidentally executes
39 // uninitialized, it just breaks into the debugger.
40 void ClearCodeSpace()
41 {
42 PoisonMemory();
43 ResetCodePtr();
44 }
45
46 // Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
47 void FreeCodeSpace()
48 {
49#ifdef __SYMBIAN32__
50 ResetExecutableMemory(region);
51#else
52 FreeMemoryPages(region, region_size);
53#endif
54 region = nullptr;
55 region_size = 0;
56 }
57
58 bool IsInSpace(const u8 *ptr)
59 {
60 return (ptr >= region) && (ptr < (region + region_size));
61 }
62
63 // Cannot currently be undone. Will write protect the entire code region.
64 // Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
65 void WriteProtect()
66 {
67 WriteProtectMemory(region, region_size, true);
68 }
69
70 void ResetCodePtr()
71 {
72 T::SetCodePtr(region);
73 }
74
75 size_t GetSpaceLeft() const
76 {
77 return region_size - (T::GetCodePtr() - region);
78 }
79
80 u8 *GetBasePtr() {
81 return region;
82 }
83
84 size_t GetOffset(const u8 *ptr) const {
85 return ptr - region;
86 }
87};
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 83b47f61e..88e452a16 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -35,7 +35,7 @@
35 35
36#ifndef _MSC_VER 36#ifndef _MSC_VER
37 37
38#if defined(__x86_64__) || defined(_M_X64) 38#ifdef ARCHITECTURE_x86_64
39#define Crash() __asm__ __volatile__("int $3") 39#define Crash() __asm__ __volatile__("int $3")
40#elif defined(_M_ARM) 40#elif defined(_M_ARM)
41#define Crash() __asm__ __volatile__("trap") 41#define Crash() __asm__ __volatile__("trap")
diff --git a/src/common/cpu_detect.h b/src/common/cpu_detect.h
deleted file mode 100644
index b585f9608..000000000
--- a/src/common/cpu_detect.h
+++ /dev/null
@@ -1,78 +0,0 @@
1// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5
6// Detect the cpu, so we'll know which optimizations to use
7#pragma once
8
9#include <string>
10
11enum CPUVendor
12{
13 VENDOR_INTEL = 0,
14 VENDOR_AMD = 1,
15 VENDOR_ARM = 2,
16 VENDOR_OTHER = 3,
17};
18
19struct CPUInfo
20{
21 CPUVendor vendor;
22
23 char cpu_string[0x21];
24 char brand_string[0x41];
25 bool OS64bit;
26 bool CPU64bit;
27 bool Mode64bit;
28
29 bool HTT;
30 int num_cores;
31 int logical_cpu_count;
32
33 bool bSSE;
34 bool bSSE2;
35 bool bSSE3;
36 bool bSSSE3;
37 bool bPOPCNT;
38 bool bSSE4_1;
39 bool bSSE4_2;
40 bool bLZCNT;
41 bool bSSE4A;
42 bool bAVX;
43 bool bAES;
44 bool bLAHFSAHF64;
45 bool bLongMode;
46
47 // ARM specific CPUInfo
48 bool bSwp;
49 bool bHalf;
50 bool bThumb;
51 bool bFastMult;
52 bool bVFP;
53 bool bEDSP;
54 bool bThumbEE;
55 bool bNEON;
56 bool bVFPv3;
57 bool bTLS;
58 bool bVFPv4;
59 bool bIDIVa;
60 bool bIDIVt;
61 bool bArmV7; // enable MOVT, MOVW etc
62
63 // ARMv8 specific
64 bool bFP;
65 bool bASIMD;
66
67 // Call Detect()
68 explicit CPUInfo();
69
70 // Turn the cpu info into a string we can show
71 std::string Summarize();
72
73private:
74 // Detects the various cpu features
75 void Detect();
76};
77
78extern CPUInfo cpu_info;
diff --git a/src/common/hash.cpp b/src/common/hash.cpp
new file mode 100644
index 000000000..413e9c6f1
--- /dev/null
+++ b/src/common/hash.cpp
@@ -0,0 +1,126 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#if defined(_MSC_VER)
6#include <stdlib.h>
7#endif
8
9#include "common_funcs.h"
10#include "common_types.h"
11#include "hash.h"
12
13namespace Common {
14
15// MurmurHash3 was written by Austin Appleby, and is placed in the public
16// domain. The author hereby disclaims copyright to this source code.
17
18// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
19// the conversion here
20
21static FORCE_INLINE u32 getblock32(const u32* p, int i) {
22 return p[i];
23}
24
25static FORCE_INLINE u64 getblock64(const u64* p, int i) {
26 return p[i];
27}
28
29// Finalization mix - force all bits of a hash block to avalanche
30
31static FORCE_INLINE u32 fmix32(u32 h) {
32 h ^= h >> 16;
33 h *= 0x85ebca6b;
34 h ^= h >> 13;
35 h *= 0xc2b2ae35;
36 h ^= h >> 16;
37
38 return h;
39}
40
41static FORCE_INLINE u64 fmix64(u64 k) {
42 k ^= k >> 33;
43 k *= 0xff51afd7ed558ccdllu;
44 k ^= k >> 33;
45 k *= 0xc4ceb9fe1a85ec53llu;
46 k ^= k >> 33;
47
48 return k;
49}
50
51// This is the 128-bit variant of the MurmurHash3 hash function that is targetted for 64-bit
52// platforms (MurmurHash3_x64_128). It was taken from:
53// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
54void MurmurHash3_128(const void* key, int len, u32 seed, void* out) {
55 const u8 * data = (const u8*)key;
56 const int nblocks = len / 16;
57
58 u64 h1 = seed;
59 u64 h2 = seed;
60
61 const u64 c1 = 0x87c37b91114253d5llu;
62 const u64 c2 = 0x4cf5ad432745937fllu;
63
64 // Body
65
66 const u64 * blocks = (const u64 *)(data);
67
68 for (int i = 0; i < nblocks; i++) {
69 u64 k1 = getblock64(blocks,i*2+0);
70 u64 k2 = getblock64(blocks,i*2+1);
71
72 k1 *= c1; k1 = _rotl64(k1,31); k1 *= c2; h1 ^= k1;
73
74 h1 = _rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
75
76 k2 *= c2; k2 = _rotl64(k2,33); k2 *= c1; h2 ^= k2;
77
78 h2 = _rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
79 }
80
81 // Tail
82
83 const u8 * tail = (const u8*)(data + nblocks*16);
84
85 u64 k1 = 0;
86 u64 k2 = 0;
87
88 switch (len & 15) {
89 case 15: k2 ^= ((u64)tail[14]) << 48;
90 case 14: k2 ^= ((u64)tail[13]) << 40;
91 case 13: k2 ^= ((u64)tail[12]) << 32;
92 case 12: k2 ^= ((u64)tail[11]) << 24;
93 case 11: k2 ^= ((u64)tail[10]) << 16;
94 case 10: k2 ^= ((u64)tail[ 9]) << 8;
95 case 9: k2 ^= ((u64)tail[ 8]) << 0;
96 k2 *= c2; k2 = _rotl64(k2,33); k2 *= c1; h2 ^= k2;
97
98 case 8: k1 ^= ((u64)tail[ 7]) << 56;
99 case 7: k1 ^= ((u64)tail[ 6]) << 48;
100 case 6: k1 ^= ((u64)tail[ 5]) << 40;
101 case 5: k1 ^= ((u64)tail[ 4]) << 32;
102 case 4: k1 ^= ((u64)tail[ 3]) << 24;
103 case 3: k1 ^= ((u64)tail[ 2]) << 16;
104 case 2: k1 ^= ((u64)tail[ 1]) << 8;
105 case 1: k1 ^= ((u64)tail[ 0]) << 0;
106 k1 *= c1; k1 = _rotl64(k1,31); k1 *= c2; h1 ^= k1;
107 };
108
109 // Finalization
110
111 h1 ^= len; h2 ^= len;
112
113 h1 += h2;
114 h2 += h1;
115
116 h1 = fmix64(h1);
117 h2 = fmix64(h2);
118
119 h1 += h2;
120 h2 += h1;
121
122 ((u64*)out)[0] = h1;
123 ((u64*)out)[1] = h2;
124}
125
126} // namespace Common
diff --git a/src/common/hash.h b/src/common/hash.h
new file mode 100644
index 000000000..a3850be68
--- /dev/null
+++ b/src/common/hash.h
@@ -0,0 +1,25 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Common {
10
11void MurmurHash3_128(const void* key, int len, u32 seed, void* out);
12
13/**
14 * Computes a 64-bit hash over the specified block of data
15 * @param data Block of data to compute hash over
16 * @param len Length of data (in bytes) to compute hash over
17 * @returns 64-bit hash value that was computed over the data block
18 */
19static inline u64 ComputeHash64(const void* data, int len) {
20 u64 res[2];
21 MurmurHash3_128(data, len, 0, res);
22 return res[0];
23}
24
25} // namespace Common
diff --git a/src/common/memory_util.cpp b/src/common/memory_util.cpp
index 2b3ace528..5ef784224 100644
--- a/src/common/memory_util.cpp
+++ b/src/common/memory_util.cpp
@@ -16,7 +16,7 @@
16 #include <sys/mman.h> 16 #include <sys/mman.h>
17#endif 17#endif
18 18
19#if !defined(_WIN32) && defined(__x86_64__) && !defined(MAP_32BIT) 19#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
20#include <unistd.h> 20#include <unistd.h>
21#define PAGE_MASK (getpagesize() - 1) 21#define PAGE_MASK (getpagesize() - 1)
22#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK)) 22#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
@@ -31,7 +31,7 @@ void* AllocateExecutableMemory(size_t size, bool low)
31 void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); 31 void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
32#else 32#else
33 static char *map_hint = 0; 33 static char *map_hint = 0;
34#if defined(__x86_64__) && !defined(MAP_32BIT) 34#if defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
35 // This OS has no flag to enforce allocation below the 4 GB boundary, 35 // This OS has no flag to enforce allocation below the 4 GB boundary,
36 // but if we hint that we want a low address it is very likely we will 36 // but if we hint that we want a low address it is very likely we will
37 // get one. 37 // get one.
@@ -43,7 +43,7 @@ void* AllocateExecutableMemory(size_t size, bool low)
43#endif 43#endif
44 void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC, 44 void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC,
45 MAP_ANON | MAP_PRIVATE 45 MAP_ANON | MAP_PRIVATE
46#if defined(__x86_64__) && defined(MAP_32BIT) 46#if defined(ARCHITECTURE_X64) && defined(MAP_32BIT)
47 | (low ? MAP_32BIT : 0) 47 | (low ? MAP_32BIT : 0)
48#endif 48#endif
49 , -1, 0); 49 , -1, 0);
@@ -62,7 +62,7 @@ void* AllocateExecutableMemory(size_t size, bool low)
62#endif 62#endif
63 LOG_ERROR(Common_Memory, "Failed to allocate executable memory"); 63 LOG_ERROR(Common_Memory, "Failed to allocate executable memory");
64 } 64 }
65#if !defined(_WIN32) && defined(__x86_64__) && !defined(MAP_32BIT) 65#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
66 else 66 else
67 { 67 {
68 if (low) 68 if (low)
diff --git a/src/common/platform.h b/src/common/platform.h
index 0a912dda3..9ba4db11b 100644
--- a/src/common/platform.h
+++ b/src/common/platform.h
@@ -27,7 +27,7 @@
27//////////////////////////////////////////////////////////////////////////////////////////////////// 27////////////////////////////////////////////////////////////////////////////////////////////////////
28// Platform detection 28// Platform detection
29 29
30#if defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) 30#if defined(ARCHITECTURE_x86_64) || defined(__aarch64__)
31 #define EMU_ARCH_BITS 64 31 #define EMU_ARCH_BITS 64
32#elif defined(__i386) || defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) 32#elif defined(__i386) || defined(_M_IX86) || defined(__arm__) || defined(_M_ARM)
33 #define EMU_ARCH_BITS 32 33 #define EMU_ARCH_BITS 32
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp
new file mode 100644
index 000000000..4c07a6ebe
--- /dev/null
+++ b/src/common/x64/abi.cpp
@@ -0,0 +1,680 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include "abi.h"
19#include "emitter.h"
20
21using namespace Gen;
22
23// Shared code between Win64 and Unix64
24
25// Sets up a __cdecl function.
26void XEmitter::ABI_EmitPrologue(int maxCallParams)
27{
28#ifdef _M_IX86
29 // Don't really need to do anything
30#elif defined(ARCHITECTURE_x86_64)
31#if _WIN32
32 int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8;
33 // Set up a stack frame so that we can call functions
34 // TODO: use maxCallParams
35 SUB(64, R(RSP), Imm8(stacksize));
36#endif
37#else
38#error Arch not supported
39#endif
40}
41
42void XEmitter::ABI_EmitEpilogue(int maxCallParams)
43{
44#ifdef _M_IX86
45 RET();
46#elif defined(ARCHITECTURE_x86_64)
47#ifdef _WIN32
48 int stacksize = ((maxCallParams+1)&~1)*8 + 8;
49 ADD(64, R(RSP), Imm8(stacksize));
50#endif
51 RET();
52#else
53#error Arch not supported
54
55
56#endif
57}
58
59#ifdef _M_IX86 // All32
60
61// Shared code between Win32 and Unix32
62void XEmitter::ABI_CallFunction(const void *func) {
63 ABI_AlignStack(0);
64 CALL(func);
65 ABI_RestoreStack(0);
66}
67
68void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
69 ABI_AlignStack(1 * 2);
70 PUSH(16, Imm16(param1));
71 CALL(func);
72 ABI_RestoreStack(1 * 2);
73}
74
75void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
76 ABI_AlignStack(1 * 2 + 1 * 4);
77 PUSH(16, Imm16(param2));
78 PUSH(32, Imm32(param1));
79 CALL(func);
80 ABI_RestoreStack(1 * 2 + 1 * 4);
81}
82
83void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
84 ABI_AlignStack(1 * 4);
85 PUSH(32, Imm32(param1));
86 CALL(func);
87 ABI_RestoreStack(1 * 4);
88}
89
90void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
91 ABI_AlignStack(2 * 4);
92 PUSH(32, Imm32(param2));
93 PUSH(32, Imm32(param1));
94 CALL(func);
95 ABI_RestoreStack(2 * 4);
96}
97
98void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
99 ABI_AlignStack(3 * 4);
100 PUSH(32, Imm32(param3));
101 PUSH(32, Imm32(param2));
102 PUSH(32, Imm32(param1));
103 CALL(func);
104 ABI_RestoreStack(3 * 4);
105}
106
107void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
108 ABI_AlignStack(3 * 4);
109 PUSH(32, ImmPtr(param3));
110 PUSH(32, Imm32(param2));
111 PUSH(32, Imm32(param1));
112 CALL(func);
113 ABI_RestoreStack(3 * 4);
114}
115
116void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2,u32 param3, void *param4) {
117 ABI_AlignStack(4 * 4);
118 PUSH(32, ImmPtr(param4));
119 PUSH(32, Imm32(param3));
120 PUSH(32, Imm32(param2));
121 PUSH(32, Imm32(param1));
122 CALL(func);
123 ABI_RestoreStack(4 * 4);
124}
125
126void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
127 ABI_AlignStack(1 * 4);
128 PUSH(32, ImmPtr(param1));
129 CALL(func);
130 ABI_RestoreStack(1 * 4);
131}
132
133void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
134 ABI_AlignStack(2 * 4);
135 PUSH(32, arg2);
136 PUSH(32, ImmPtr(param1));
137 CALL(func);
138 ABI_RestoreStack(2 * 4);
139}
140
141void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
142 ABI_AlignStack(3 * 4);
143 PUSH(32, arg3);
144 PUSH(32, arg2);
145 PUSH(32, ImmPtr(param1));
146 CALL(func);
147 ABI_RestoreStack(3 * 4);
148}
149
150void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
151 ABI_AlignStack(3 * 4);
152 PUSH(32, Imm32(param3));
153 PUSH(32, ImmPtr(param2));
154 PUSH(32, ImmPtr(param1));
155 CALL(func);
156 ABI_RestoreStack(3 * 4);
157}
158
159// Pass a register as a parameter.
160void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
161 ABI_AlignStack(1 * 4);
162 PUSH(32, R(reg1));
163 CALL(func);
164 ABI_RestoreStack(1 * 4);
165}
166
167// Pass two registers as parameters.
168void XEmitter::ABI_CallFunctionRR(const void *func, Gen::X64Reg reg1, Gen::X64Reg reg2)
169{
170 ABI_AlignStack(2 * 4);
171 PUSH(32, R(reg2));
172 PUSH(32, R(reg1));
173 CALL(func);
174 ABI_RestoreStack(2 * 4);
175}
176
177void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
178{
179 ABI_AlignStack(2 * 4);
180 PUSH(32, Imm32(param2));
181 PUSH(32, arg1);
182 CALL(func);
183 ABI_RestoreStack(2 * 4);
184}
185
186void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
187{
188 ABI_AlignStack(3 * 4);
189 PUSH(32, Imm32(param3));
190 PUSH(32, Imm32(param2));
191 PUSH(32, arg1);
192 CALL(func);
193 ABI_RestoreStack(3 * 4);
194}
195
196void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
197{
198 ABI_AlignStack(1 * 4);
199 PUSH(32, arg1);
200 CALL(func);
201 ABI_RestoreStack(1 * 4);
202}
203
204void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
205{
206 ABI_AlignStack(2 * 4);
207 PUSH(32, arg2);
208 PUSH(32, arg1);
209 CALL(func);
210 ABI_RestoreStack(2 * 4);
211}
212
213void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
214 // Note: 4 * 4 = 16 bytes, so alignment is preserved.
215 PUSH(EBP);
216 PUSH(EBX);
217 PUSH(ESI);
218 PUSH(EDI);
219}
220
221void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
222 POP(EDI);
223 POP(ESI);
224 POP(EBX);
225 POP(EBP);
226}
227
228unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
229 frameSize += 4; // reserve space for return address
230 unsigned int alignedSize =
231#ifdef __GNUC__
232 (frameSize + 15) & -16;
233#else
234 (frameSize + 3) & -4;
235#endif
236 return alignedSize;
237}
238
239
240void XEmitter::ABI_AlignStack(unsigned int frameSize) {
241// Mac OS X requires the stack to be 16-byte aligned before every call.
242// Linux requires the stack to be 16-byte aligned before calls that put SSE
243// vectors on the stack, but since we do not keep track of which calls do that,
244// it is effectively every call as well.
245// Windows binaries compiled with MSVC do not have such a restriction*, but I
246// expect that GCC on Windows acts the same as GCC on Linux in this respect.
247// It would be nice if someone could verify this.
248// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times.
249 unsigned int fillSize =
250 ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4);
251 if (fillSize != 0) {
252 SUB(32, R(ESP), Imm8(fillSize));
253 }
254}
255
256void XEmitter::ABI_RestoreStack(unsigned int frameSize) {
257 unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize);
258 alignedSize -= 4; // return address is POPped at end of call
259 if (alignedSize != 0) {
260 ADD(32, R(ESP), Imm8(alignedSize));
261 }
262}
263
264#else //64bit
265
266// Common functions
267void XEmitter::ABI_CallFunction(const void *func) {
268 u64 distance = u64(func) - (u64(code) + 5);
269 if (distance >= 0x0000000080000000ULL
270 && distance < 0xFFFFFFFF80000000ULL) {
271 // Far call
272 MOV(64, R(RAX), ImmPtr(func));
273 CALLptr(R(RAX));
274 } else {
275 CALL(func);
276 }
277}
278
279void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
280 MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
281 u64 distance = u64(func) - (u64(code) + 5);
282 if (distance >= 0x0000000080000000ULL
283 && distance < 0xFFFFFFFF80000000ULL) {
284 // Far call
285 MOV(64, R(RAX), ImmPtr(func));
286 CALLptr(R(RAX));
287 } else {
288 CALL(func);
289 }
290}
291
292void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
293 MOV(32, R(ABI_PARAM1), Imm32(param1));
294 MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
295 u64 distance = u64(func) - (u64(code) + 5);
296 if (distance >= 0x0000000080000000ULL
297 && distance < 0xFFFFFFFF80000000ULL) {
298 // Far call
299 MOV(64, R(RAX), ImmPtr(func));
300 CALLptr(R(RAX));
301 } else {
302 CALL(func);
303 }
304}
305
306void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
307 MOV(32, R(ABI_PARAM1), Imm32(param1));
308 u64 distance = u64(func) - (u64(code) + 5);
309 if (distance >= 0x0000000080000000ULL
310 && distance < 0xFFFFFFFF80000000ULL) {
311 // Far call
312 MOV(64, R(RAX), ImmPtr(func));
313 CALLptr(R(RAX));
314 } else {
315 CALL(func);
316 }
317}
318
319void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
320 MOV(32, R(ABI_PARAM1), Imm32(param1));
321 MOV(32, R(ABI_PARAM2), Imm32(param2));
322 u64 distance = u64(func) - (u64(code) + 5);
323 if (distance >= 0x0000000080000000ULL
324 && distance < 0xFFFFFFFF80000000ULL) {
325 // Far call
326 MOV(64, R(RAX), ImmPtr(func));
327 CALLptr(R(RAX));
328 } else {
329 CALL(func);
330 }
331}
332
333void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
334 MOV(32, R(ABI_PARAM1), Imm32(param1));
335 MOV(32, R(ABI_PARAM2), Imm32(param2));
336 MOV(32, R(ABI_PARAM3), Imm32(param3));
337 u64 distance = u64(func) - (u64(code) + 5);
338 if (distance >= 0x0000000080000000ULL
339 && distance < 0xFFFFFFFF80000000ULL) {
340 // Far call
341 MOV(64, R(RAX), ImmPtr(func));
342 CALLptr(R(RAX));
343 } else {
344 CALL(func);
345 }
346}
347
348void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
349 MOV(32, R(ABI_PARAM1), Imm32(param1));
350 MOV(32, R(ABI_PARAM2), Imm32(param2));
351 MOV(64, R(ABI_PARAM3), ImmPtr(param3));
352 u64 distance = u64(func) - (u64(code) + 5);
353 if (distance >= 0x0000000080000000ULL
354 && distance < 0xFFFFFFFF80000000ULL) {
355 // Far call
356 MOV(64, R(RAX), ImmPtr(func));
357 CALLptr(R(RAX));
358 } else {
359 CALL(func);
360 }
361}
362
363void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
364 MOV(32, R(ABI_PARAM1), Imm32(param1));
365 MOV(32, R(ABI_PARAM2), Imm32(param2));
366 MOV(32, R(ABI_PARAM3), Imm32(param3));
367 MOV(64, R(ABI_PARAM4), ImmPtr(param4));
368 u64 distance = u64(func) - (u64(code) + 5);
369 if (distance >= 0x0000000080000000ULL
370 && distance < 0xFFFFFFFF80000000ULL) {
371 // Far call
372 MOV(64, R(RAX), ImmPtr(func));
373 CALLptr(R(RAX));
374 } else {
375 CALL(func);
376 }
377}
378
379void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
380 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
381 u64 distance = u64(func) - (u64(code) + 5);
382 if (distance >= 0x0000000080000000ULL
383 && distance < 0xFFFFFFFF80000000ULL) {
384 // Far call
385 MOV(64, R(RAX), ImmPtr(func));
386 CALLptr(R(RAX));
387 } else {
388 CALL(func);
389 }
390}
391
392void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
393 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
394 if (!arg2.IsSimpleReg(ABI_PARAM2))
395 MOV(32, R(ABI_PARAM2), arg2);
396 u64 distance = u64(func) - (u64(code) + 5);
397 if (distance >= 0x0000000080000000ULL
398 && distance < 0xFFFFFFFF80000000ULL) {
399 // Far call
400 MOV(64, R(RAX), ImmPtr(func));
401 CALLptr(R(RAX));
402 } else {
403 CALL(func);
404 }
405}
406
407void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
408 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
409 if (!arg2.IsSimpleReg(ABI_PARAM2))
410 MOV(32, R(ABI_PARAM2), arg2);
411 if (!arg3.IsSimpleReg(ABI_PARAM3))
412 MOV(32, R(ABI_PARAM3), arg3);
413 u64 distance = u64(func) - (u64(code) + 5);
414 if (distance >= 0x0000000080000000ULL
415 && distance < 0xFFFFFFFF80000000ULL) {
416 // Far call
417 MOV(64, R(RAX), ImmPtr(func));
418 CALLptr(R(RAX));
419 } else {
420 CALL(func);
421 }
422}
423
424void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
425 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
426 MOV(64, R(ABI_PARAM2), ImmPtr(param2));
427 MOV(32, R(ABI_PARAM3), Imm32(param3));
428 u64 distance = u64(func) - (u64(code) + 5);
429 if (distance >= 0x0000000080000000ULL
430 && distance < 0xFFFFFFFF80000000ULL) {
431 // Far call
432 MOV(64, R(RAX), ImmPtr(func));
433 CALLptr(R(RAX));
434 } else {
435 CALL(func);
436 }
437}
438
439// Pass a register as a parameter.
440void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
441 if (reg1 != ABI_PARAM1)
442 MOV(32, R(ABI_PARAM1), R(reg1));
443 u64 distance = u64(func) - (u64(code) + 5);
444 if (distance >= 0x0000000080000000ULL
445 && distance < 0xFFFFFFFF80000000ULL) {
446 // Far call
447 MOV(64, R(RAX), ImmPtr(func));
448 CALLptr(R(RAX));
449 } else {
450 CALL(func);
451 }
452}
453
454// Pass two registers as parameters.
455void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
456 if (reg2 != ABI_PARAM1) {
457 if (reg1 != ABI_PARAM1)
458 MOV(64, R(ABI_PARAM1), R(reg1));
459 if (reg2 != ABI_PARAM2)
460 MOV(64, R(ABI_PARAM2), R(reg2));
461 } else {
462 if (reg2 != ABI_PARAM2)
463 MOV(64, R(ABI_PARAM2), R(reg2));
464 if (reg1 != ABI_PARAM1)
465 MOV(64, R(ABI_PARAM1), R(reg1));
466 }
467 u64 distance = u64(func) - (u64(code) + 5);
468 if (distance >= 0x0000000080000000ULL
469 && distance < 0xFFFFFFFF80000000ULL) {
470 // Far call
471 MOV(64, R(RAX), ImmPtr(func));
472 CALLptr(R(RAX));
473 } else {
474 CALL(func);
475 }
476}
477
478void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
479{
480 if (!arg1.IsSimpleReg(ABI_PARAM1))
481 MOV(32, R(ABI_PARAM1), arg1);
482 MOV(32, R(ABI_PARAM2), Imm32(param2));
483 u64 distance = u64(func) - (u64(code) + 5);
484 if (distance >= 0x0000000080000000ULL
485 && distance < 0xFFFFFFFF80000000ULL) {
486 // Far call
487 MOV(64, R(RAX), ImmPtr(func));
488 CALLptr(R(RAX));
489 } else {
490 CALL(func);
491 }
492}
493
494void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
495{
496 if (!arg1.IsSimpleReg(ABI_PARAM1))
497 MOV(32, R(ABI_PARAM1), arg1);
498 MOV(32, R(ABI_PARAM2), Imm32(param2));
499 MOV(64, R(ABI_PARAM3), Imm64(param3));
500 u64 distance = u64(func) - (u64(code) + 5);
501 if (distance >= 0x0000000080000000ULL
502 && distance < 0xFFFFFFFF80000000ULL) {
503 // Far call
504 MOV(64, R(RAX), ImmPtr(func));
505 CALLptr(R(RAX));
506 } else {
507 CALL(func);
508 }
509}
510
511void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
512{
513 if (!arg1.IsSimpleReg(ABI_PARAM1))
514 MOV(32, R(ABI_PARAM1), arg1);
515 u64 distance = u64(func) - (u64(code) + 5);
516 if (distance >= 0x0000000080000000ULL
517 && distance < 0xFFFFFFFF80000000ULL) {
518 // Far call
519 MOV(64, R(RAX), ImmPtr(func));
520 CALLptr(R(RAX));
521 } else {
522 CALL(func);
523 }
524}
525
526void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
527{
528 if (!arg1.IsSimpleReg(ABI_PARAM1))
529 MOV(32, R(ABI_PARAM1), arg1);
530 if (!arg2.IsSimpleReg(ABI_PARAM2))
531 MOV(32, R(ABI_PARAM2), arg2);
532 u64 distance = u64(func) - (u64(code) + 5);
533 if (distance >= 0x0000000080000000ULL
534 && distance < 0xFFFFFFFF80000000ULL) {
535 // Far call
536 MOV(64, R(RAX), ImmPtr(func));
537 CALLptr(R(RAX));
538 } else {
539 CALL(func);
540 }
541}
542
543unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) {
544 return frameSize;
545}
546
547#ifdef _WIN32
548
549// The Windows x64 ABI requires XMM6 - XMM15 to be callee saved. 10 regs.
550// But, not saving XMM4 and XMM5 breaks things in VS 2010, even though they are volatile regs.
551// Let's just save all 16.
552const int XMM_STACK_SPACE = 16 * 16;
553
554// Win64 Specific Code
555void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
556 //we only want to do this once
557 PUSH(RBX);
558 PUSH(RSI);
559 PUSH(RDI);
560 PUSH(RBP);
561 PUSH(R12);
562 PUSH(R13);
563 PUSH(R14);
564 PUSH(R15);
565 ABI_AlignStack(0);
566
567 // Do this after aligning, because before it's offset by 8.
568 SUB(64, R(RSP), Imm32(XMM_STACK_SPACE));
569 for (int i = 0; i < 16; ++i)
570 MOVAPS(MDisp(RSP, i * 16), (X64Reg)(XMM0 + i));
571}
572
573void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
574 for (int i = 0; i < 16; ++i)
575 MOVAPS((X64Reg)(XMM0 + i), MDisp(RSP, i * 16));
576 ADD(64, R(RSP), Imm32(XMM_STACK_SPACE));
577
578 ABI_RestoreStack(0);
579 POP(R15);
580 POP(R14);
581 POP(R13);
582 POP(R12);
583 POP(RBP);
584 POP(RDI);
585 POP(RSI);
586 POP(RBX);
587}
588
589// Win64 Specific Code
590void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
591 PUSH(RCX);
592 PUSH(RDX);
593 PUSH(RSI);
594 PUSH(RDI);
595 PUSH(R8);
596 PUSH(R9);
597 PUSH(R10);
598 PUSH(R11);
599 // TODO: Callers preserve XMM4-5 (XMM0-3 are args.)
600 ABI_AlignStack(0);
601}
602
603void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
604 ABI_RestoreStack(0);
605 POP(R11);
606 POP(R10);
607 POP(R9);
608 POP(R8);
609 POP(RDI);
610 POP(RSI);
611 POP(RDX);
612 POP(RCX);
613}
614
615void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
616 SUB(64, R(RSP), Imm8(0x28));
617}
618
619void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
620 ADD(64, R(RSP), Imm8(0x28));
621}
622
623#else
624// Unix64 Specific Code
625void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() {
626 PUSH(RBX);
627 PUSH(RBP);
628 PUSH(R12);
629 PUSH(R13);
630 PUSH(R14);
631 PUSH(R15);
632 PUSH(R15); //just to align stack. duped push/pop doesn't hurt.
633 // TODO: XMM?
634}
635
636void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() {
637 POP(R15);
638 POP(R15);
639 POP(R14);
640 POP(R13);
641 POP(R12);
642 POP(RBP);
643 POP(RBX);
644}
645
646void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() {
647 PUSH(RCX);
648 PUSH(RDX);
649 PUSH(RSI);
650 PUSH(RDI);
651 PUSH(R8);
652 PUSH(R9);
653 PUSH(R10);
654 PUSH(R11);
655 PUSH(R11);
656}
657
658void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() {
659 POP(R11);
660 POP(R11);
661 POP(R10);
662 POP(R9);
663 POP(R8);
664 POP(RDI);
665 POP(RSI);
666 POP(RDX);
667 POP(RCX);
668}
669
670void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) {
671 SUB(64, R(RSP), Imm8(0x08));
672}
673
674void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) {
675 ADD(64, R(RSP), Imm8(0x08));
676}
677
678#endif // WIN32
679
680#endif // 32bit
diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h
new file mode 100644
index 000000000..7e9c156ae
--- /dev/null
+++ b/src/common/x64/abi.h
@@ -0,0 +1,78 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#pragma once
19
20#include "common/common_types.h"
21
22// x86/x64 ABI:s, and helpers to help follow them when JIT-ing code.
23// All convensions return values in EAX (+ possibly EDX).
24
25// Linux 32-bit, Windows 32-bit (cdecl, System V):
26// * Caller pushes left to right
27// * Caller fixes stack after call
28// * function subtract from stack for local storage only.
29// Scratch: EAX ECX EDX
30// Callee-save: EBX ESI EDI EBP
31// Parameters: -
32
33// Windows 64-bit
34// * 4-reg "fastcall" variant, very new-skool stack handling
35// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
36// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
37// Scratch: RAX RCX RDX R8 R9 R10 R11
38// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
39// Parameters: RCX RDX R8 R9, further MOV-ed
40
41// Linux 64-bit
42// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
43// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
44// Callee-save: RBX RBP R12 R13 R14 R15
45// Parameters: RDI RSI RDX RCX R8 R9
46
47#ifdef _M_IX86 // 32 bit calling convention, shared by all
48
49// 32-bit don't pass parameters in regs, but these are convenient to have anyway when we have to
50// choose regs to put stuff in.
51#define ABI_PARAM1 RCX
52#define ABI_PARAM2 RDX
53
54// There are no ABI_PARAM* here, since args are pushed.
55// 32-bit bog standard cdecl, shared between linux and windows
56// MacOSX 32-bit is same as System V with a few exceptions that we probably don't care much about.
57
58#elif ARCHITECTURE_x86_64 // 64 bit calling convention
59
60#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
61
62#define ABI_PARAM1 RCX
63#define ABI_PARAM2 RDX
64#define ABI_PARAM3 R8
65#define ABI_PARAM4 R9
66
67#else //64-bit Unix (hopefully MacOSX too)
68
69#define ABI_PARAM1 RDI
70#define ABI_PARAM2 RSI
71#define ABI_PARAM3 RDX
72#define ABI_PARAM4 RCX
73#define ABI_PARAM5 R8
74#define ABI_PARAM6 R9
75
76#endif // WIN32
77
78#endif // X86
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
new file mode 100644
index 000000000..d9c430c67
--- /dev/null
+++ b/src/common/x64/cpu_detect.cpp
@@ -0,0 +1,187 @@
1// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <string>
7#include <thread>
8
9#include "common/common_types.h"
10
11#include "cpu_detect.h"
12
13namespace Common {
14
15#ifndef _MSC_VER
16
17#ifdef __FreeBSD__
18#include <sys/types.h>
19#include <machine/cpufunc.h>
20#endif
21
22static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
23#ifdef __FreeBSD__
24 // Despite the name, this is just do_cpuid() with ECX as second input.
25 cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
26#else
27 info[0] = function_id; // eax
28 info[2] = subfunction_id; // ecx
29 __asm__(
30 "cpuid"
31 : "=a" (info[0]),
32 "=b" (info[1]),
33 "=c" (info[2]),
34 "=d" (info[3])
35 : "a" (function_id),
36 "c" (subfunction_id)
37 );
38#endif
39}
40
41static inline void __cpuid(int info[4], int function_id) {
42 return __cpuidex(info, function_id, 0);
43}
44
45#define _XCR_XFEATURE_ENABLED_MASK 0
46static u64 _xgetbv(u32 index) {
47 u32 eax, edx;
48 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
49 return ((u64)edx << 32) | eax;
50}
51
52#endif // ifndef _MSC_VER
53
54// Detects the various CPU features
55static CPUCaps Detect() {
56 CPUCaps caps = {};
57
58 caps.num_cores = std::thread::hardware_concurrency();
59
60 // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
61 // Citra at all anyway
62
63 int cpu_id[4];
64 memset(caps.brand_string, 0, sizeof(caps.brand_string));
65
66 // Detect CPU's CPUID capabilities and grab CPU string
67 __cpuid(cpu_id, 0x00000000);
68 u32 max_std_fn = cpu_id[0]; // EAX
69
70 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
71 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
72 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
73
74 __cpuid(cpu_id, 0x80000000);
75
76 u32 max_ex_fn = cpu_id[0];
77 if (!strcmp(caps.brand_string, "GenuineIntel"))
78 caps.vendor = CPUVendor::INTEL;
79 else if (!strcmp(caps.brand_string, "AuthenticAMD"))
80 caps.vendor = CPUVendor::AMD;
81 else
82 caps.vendor = CPUVendor::OTHER;
83
84 // Set reasonable default brand string even if brand string not available
85 strcpy(caps.cpu_string, caps.brand_string);
86
87 // Detect family and other miscellaneous features
88 if (max_std_fn >= 1) {
89 __cpuid(cpu_id, 0x00000001);
90
91 if ((cpu_id[3] >> 25) & 1) caps.sse = true;
92 if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
93 if ((cpu_id[2]) & 1) caps.sse3 = true;
94 if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
95 if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
96 if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
97 if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
98 if ((cpu_id[2] >> 25) & 1) caps.aes = true;
99
100 if ((cpu_id[3] >> 24) & 1) {
101 caps.fxsave_fxrstor = true;
102 }
103
104 // AVX support requires 3 separate checks:
105 // - Is the AVX bit set in CPUID?
106 // - Is the XSAVE bit set in CPUID?
107 // - XGETBV result has the XCR bit set.
108 if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
109 if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
110 caps.avx = true;
111 if ((cpu_id[2] >> 12) & 1)
112 caps.fma = true;
113 }
114 }
115
116 if (max_std_fn >= 7) {
117 __cpuidex(cpu_id, 0x00000007, 0x00000000);
118 // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
119 if ((cpu_id[1] >> 5) & 1)
120 caps.avx2 = caps.avx;
121 if ((cpu_id[1] >> 3) & 1)
122 caps.bmi1 = true;
123 if ((cpu_id[1] >> 8) & 1)
124 caps.bmi2 = true;
125 }
126 }
127
128 caps.flush_to_zero = caps.sse;
129
130 if (max_ex_fn >= 0x80000004) {
131 // Extract CPU model string
132 __cpuid(cpu_id, 0x80000002);
133 std::memcpy(caps.cpu_string, cpu_id, sizeof(cpu_id));
134 __cpuid(cpu_id, 0x80000003);
135 std::memcpy(caps.cpu_string + 16, cpu_id, sizeof(cpu_id));
136 __cpuid(cpu_id, 0x80000004);
137 std::memcpy(caps.cpu_string + 32, cpu_id, sizeof(cpu_id));
138 }
139
140 if (max_ex_fn >= 0x80000001) {
141 // Check for more features
142 __cpuid(cpu_id, 0x80000001);
143 if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
144 if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
145 if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
146 if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
147 }
148
149 return caps;
150}
151
152const CPUCaps& GetCPUCaps() {
153 static CPUCaps caps = Detect();
154 return caps;
155}
156
157std::string GetCPUCapsString() {
158 auto caps = GetCPUCaps();
159
160 std::string sum(caps.cpu_string);
161 sum += " (";
162 sum += caps.brand_string;
163 sum += ")";
164
165 if (caps.sse) sum += ", SSE";
166 if (caps.sse2) {
167 sum += ", SSE2";
168 if (!caps.flush_to_zero) sum += " (without DAZ)";
169 }
170
171 if (caps.sse3) sum += ", SSE3";
172 if (caps.ssse3) sum += ", SSSE3";
173 if (caps.sse4_1) sum += ", SSE4.1";
174 if (caps.sse4_2) sum += ", SSE4.2";
175 if (caps.avx) sum += ", AVX";
176 if (caps.avx2) sum += ", AVX2";
177 if (caps.bmi1) sum += ", BMI1";
178 if (caps.bmi2) sum += ", BMI2";
179 if (caps.fma) sum += ", FMA";
180 if (caps.aes) sum += ", AES";
181 if (caps.movbe) sum += ", MOVBE";
182 if (caps.long_mode) sum += ", 64-bit support";
183
184 return sum;
185}
186
187} // namespace Common
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
new file mode 100644
index 000000000..0af3a8adb
--- /dev/null
+++ b/src/common/x64/cpu_detect.h
@@ -0,0 +1,66 @@
1// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9namespace Common {
10
11/// x86/x64 CPU vendors that may be detected by this module
12enum class CPUVendor {
13 INTEL,
14 AMD,
15 OTHER,
16};
17
18/// x86/x64 CPU capabilities that may be detected by this module
19struct CPUCaps {
20 CPUVendor vendor;
21 char cpu_string[0x21];
22 char brand_string[0x41];
23 int num_cores;
24 bool sse;
25 bool sse2;
26 bool sse3;
27 bool ssse3;
28 bool sse4_1;
29 bool sse4_2;
30 bool lzcnt;
31 bool avx;
32 bool avx2;
33 bool bmi1;
34 bool bmi2;
35 bool fma;
36 bool fma4;
37 bool aes;
38
39 // Support for the FXSAVE and FXRSTOR instructions
40 bool fxsave_fxrstor;
41
42 bool movbe;
43
44 // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
45 // are automatically set to (signed) zero.
46 bool flush_to_zero;
47
48 // Support for LAHF and SAHF instructions in 64-bit mode
49 bool lahf_sahf_64;
50
51 bool long_mode;
52};
53
54/**
55 * Gets the supported capabilities of the host CPU
56 * @return Reference to a CPUCaps struct with the detected host CPU capabilities
57 */
58const CPUCaps& GetCPUCaps();
59
60/**
61 * Gets a string summary of the name and supported capabilities of the host CPU
62 * @return String summary
63 */
64std::string GetCPUCapsString();
65
66} // namespace Common
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
new file mode 100644
index 000000000..4b79acd1f
--- /dev/null
+++ b/src/common/x64/emitter.cpp
@@ -0,0 +1,1989 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include <cstring>
19
20#include "common/assert.h"
21#include "common/logging/log.h"
22#include "common/memory_util.h"
23
24#include "abi.h"
25#include "cpu_detect.h"
26#include "emitter.h"
27
28#define PRIx64 "llx"
29
30// Minimize the diff against Dolphin
31#define DYNA_REC JIT
32
33namespace Gen
34{
35
36struct NormalOpDef
37{
38 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
39};
40
41// 0xCC is code for invalid combination of immediates
42static const NormalOpDef normalops[11] =
43{
44 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD
45 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC
46
47 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB
48 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB
49
50 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND
51 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR
52
53 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR
54 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV
55
56 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from)
57 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP
58
59 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG
60};
61
62enum NormalSSEOps
63{
64 sseCMP = 0xC2,
65 sseADD = 0x58, //ADD
66 sseSUB = 0x5C, //SUB
67 sseAND = 0x54, //AND
68 sseANDN = 0x55, //ANDN
69 sseOR = 0x56,
70 sseXOR = 0x57,
71 sseMUL = 0x59, //MUL
72 sseDIV = 0x5E, //DIV
73 sseMIN = 0x5D, //MIN
74 sseMAX = 0x5F, //MAX
75 sseCOMIS = 0x2F, //COMIS
76 sseUCOMIS = 0x2E, //UCOMIS
77 sseSQRT = 0x51, //SQRT
78 sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!)
79 sseRCP = 0x53, //RCP
80 sseMOVAPfromRM = 0x28, //MOVAP from RM
81 sseMOVAPtoRM = 0x29, //MOVAP to RM
82 sseMOVUPfromRM = 0x10, //MOVUP from RM
83 sseMOVUPtoRM = 0x11, //MOVUP to RM
84 sseMOVLPfromRM= 0x12,
85 sseMOVLPtoRM = 0x13,
86 sseMOVHPfromRM= 0x16,
87 sseMOVHPtoRM = 0x17,
88 sseMOVHLPS = 0x12,
89 sseMOVLHPS = 0x16,
90 sseMOVDQfromRM = 0x6F,
91 sseMOVDQtoRM = 0x7F,
92 sseMASKMOVDQU = 0xF7,
93 sseLDDQU = 0xF0,
94 sseSHUF = 0xC6,
95 sseMOVNTDQ = 0xE7,
96 sseMOVNTP = 0x2B,
97 sseHADD = 0x7C,
98};
99
100
101void XEmitter::SetCodePtr(u8 *ptr)
102{
103 code = ptr;
104}
105
106const u8 *XEmitter::GetCodePtr() const
107{
108 return code;
109}
110
111u8 *XEmitter::GetWritableCodePtr()
112{
113 return code;
114}
115
116void XEmitter::ReserveCodeSpace(int bytes)
117{
118 for (int i = 0; i < bytes; i++)
119 *code++ = 0xCC;
120}
121
122const u8 *XEmitter::AlignCode4()
123{
124 int c = int((u64)code & 3);
125 if (c)
126 ReserveCodeSpace(4-c);
127 return code;
128}
129
130const u8 *XEmitter::AlignCode16()
131{
132 int c = int((u64)code & 15);
133 if (c)
134 ReserveCodeSpace(16-c);
135 return code;
136}
137
138const u8 *XEmitter::AlignCodePage()
139{
140 int c = int((u64)code & 4095);
141 if (c)
142 ReserveCodeSpace(4096-c);
143 return code;
144}
145
146// This operation modifies flags; check to see the flags are locked.
147// If the flags are locked, we should immediately and loudly fail before
148// causing a subtle JIT bug.
149void XEmitter::CheckFlags()
150{
151 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
152}
153
154void XEmitter::WriteModRM(int mod, int reg, int rm)
155{
156 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
157}
158
159void XEmitter::WriteSIB(int scale, int index, int base)
160{
161 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
162}
163
164void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const
165{
166 if (customOp == -1) customOp = operandReg;
167#ifdef ARCHITECTURE_x86_64
168 u8 op = 0x40;
169 // REX.W (whether operation is a 64-bit operation)
170 if (opBits == 64) op |= 8;
171 // REX.R (whether ModR/M reg field refers to R8-R15.
172 if (customOp & 8) op |= 4;
173 // REX.X (whether ModR/M SIB index field refers to R8-R15)
174 if (indexReg & 8) op |= 2;
175 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
176 if (offsetOrBaseReg & 8) op |= 1;
177 // Write REX if wr have REX bits to write, or if the operation accesses
178 // SIL, DIL, BPL, or SPL.
179 if (op != 0x40 ||
180 (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
181 (opBits == 8 && (customOp & 0x10c) == 4))
182 {
183 emit->Write8(op);
184 // Check the operation doesn't access AH, BH, CH, or DH.
185 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
186 DEBUG_ASSERT((customOp & 0x100) == 0);
187 }
188#else
189 DEBUG_ASSERT(opBits != 64);
190 DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1);
191 DEBUG_ASSERT((indexReg & 8) == 0);
192 DEBUG_ASSERT((offsetOrBaseReg & 8) == 0);
193 DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1);
194 DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4);
195#endif
196}
197
198void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const
199{
200 int R = !(regOp1 & 8);
201 int X = !(indexReg & 8);
202 int B = !(offsetOrBaseReg & 8);
203
204 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
205
206 // do we need any VEX fields that only appear in the three-byte form?
207 if (X == 1 && B == 1 && W == 0 && mmmmm == 1)
208 {
209 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 1) | pp;
210 emit->Write8(0xC5);
211 emit->Write8(RvvvvLpp);
212 }
213 else
214 {
215 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
216 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 1) | pp;
217 emit->Write8(0xC4);
218 emit->Write8(RXBmmmmm);
219 emit->Write8(WvvvvLpp);
220 }
221}
222
223void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
224 bool warn_64bit_offset) const
225{
226 if (_operandReg == INVALID_REG)
227 _operandReg = (X64Reg)this->operandReg;
228 int mod = 0;
229 int ireg = indexReg;
230 bool SIB = false;
231 int _offsetOrBaseReg = this->offsetOrBaseReg;
232
233 if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address
234 {
235 // Oh, RIP addressing.
236 _offsetOrBaseReg = 5;
237 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
238 //TODO : add some checks
239#ifdef ARCHITECTURE_x86_64
240 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
241 s64 distance = (s64)offset - (s64)ripAddr;
242 ASSERT_MSG(
243 (distance < 0x80000000LL &&
244 distance >= -0x80000000LL) ||
245 !warn_64bit_offset,
246 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")",
247 ripAddr, offset);
248 s32 offs = (s32)distance;
249 emit->Write32((u32)offs);
250#else
251 emit->Write32((u32)offset);
252#endif
253 return;
254 }
255
256 if (scale == 0)
257 {
258 // Oh, no memory, Just a reg.
259 mod = 3; //11
260 }
261 else if (scale >= 1)
262 {
263 //Ah good, no scaling.
264 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5))
265 {
266 //Okay, we're good. No SIB necessary.
267 int ioff = (int)offset;
268 if (ioff == 0)
269 {
270 mod = 0;
271 }
272 else if (ioff<-128 || ioff>127)
273 {
274 mod = 2; //32-bit displacement
275 }
276 else
277 {
278 mod = 1; //8-bit displacement
279 }
280 }
281 else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)
282 {
283 SIB = true;
284 mod = 0;
285 _offsetOrBaseReg = 5;
286 }
287 else //if (scale != SCALE_ATREG)
288 {
289 if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :(
290 {
291 //So we have to fake it with SIB encoding :(
292 SIB = true;
293 }
294
295 if (scale >= SCALE_1 && scale < SCALE_ATREG)
296 {
297 SIB = true;
298 }
299
300 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4))
301 {
302 SIB = true;
303 ireg = _offsetOrBaseReg;
304 }
305
306 //Okay, we're fine. Just disp encoding.
307 //We need displacement. Which size?
308 int ioff = (int)(s64)offset;
309 if (ioff < -128 || ioff > 127)
310 {
311 mod = 2; //32-bit displacement
312 }
313 else
314 {
315 mod = 1; //8-bit displacement
316 }
317 }
318 }
319
320 // Okay. Time to do the actual writing
321 // ModRM byte:
322 int oreg = _offsetOrBaseReg;
323 if (SIB)
324 oreg = 4;
325
326 // TODO(ector): WTF is this if about? I don't remember writing it :-)
327 //if (RIP)
328 // oreg = 5;
329
330 emit->WriteModRM(mod, _operandReg&7, oreg&7);
331
332 if (SIB)
333 {
334 //SIB byte
335 int ss;
336 switch (scale)
337 {
338 case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP
339 case SCALE_1: ss = 0; break;
340 case SCALE_2: ss = 1; break;
341 case SCALE_4: ss = 2; break;
342 case SCALE_8: ss = 3; break;
343 case SCALE_NOBASE_2: ss = 1; break;
344 case SCALE_NOBASE_4: ss = 2; break;
345 case SCALE_NOBASE_8: ss = 3; break;
346 case SCALE_ATREG: ss = 0; break;
347 default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break;
348 }
349 emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7)));
350 }
351
352 if (mod == 1) //8-bit disp
353 {
354 emit->Write8((u8)(s8)(s32)offset);
355 }
356 else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp
357 {
358 emit->Write32((u32)offset);
359 }
360}
361
362// W = operand extended width (1 if 64-bit)
363// R = register# upper bit
364// X = scale amnt upper bit
365// B = base register# upper bit
366void XEmitter::Rex(int w, int r, int x, int b)
367{
368 w = w ? 1 : 0;
369 r = r ? 1 : 0;
370 x = x ? 1 : 0;
371 b = b ? 1 : 0;
372 u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b));
373 if (rx != 0x40)
374 Write8(rx);
375}
376
377void XEmitter::JMP(const u8 *addr, bool force5Bytes)
378{
379 u64 fn = (u64)addr;
380 if (!force5Bytes)
381 {
382 s64 distance = (s64)(fn - ((u64)code + 2));
383 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
384 "Jump target too far away, needs force5Bytes = true");
385 //8 bits will do
386 Write8(0xEB);
387 Write8((u8)(s8)distance);
388 }
389 else
390 {
391 s64 distance = (s64)(fn - ((u64)code + 5));
392
393 ASSERT_MSG(
394 distance >= -0x80000000LL && distance < 0x80000000LL,
395 "Jump target too far away, needs indirect register");
396 Write8(0xE9);
397 Write32((u32)(s32)distance);
398 }
399}
400
401void XEmitter::JMPptr(const OpArg &arg2)
402{
403 OpArg arg = arg2;
404 if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
405 arg.operandReg = 4;
406 arg.WriteRex(this, 0, 0);
407 Write8(0xFF);
408 arg.WriteRest(this);
409}
410
411//Can be used to trap other processors, before overwriting their code
412// not used in dolphin
413void XEmitter::JMPself()
414{
415 Write8(0xEB);
416 Write8(0xFE);
417}
418
419void XEmitter::CALLptr(OpArg arg)
420{
421 if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument");
422 arg.operandReg = 2;
423 arg.WriteRex(this, 0, 0);
424 Write8(0xFF);
425 arg.WriteRest(this);
426}
427
428void XEmitter::CALL(const void *fnptr)
429{
430 u64 distance = u64(fnptr) - (u64(code) + 5);
431 ASSERT_MSG(
432 distance < 0x0000000080000000ULL ||
433 distance >= 0xFFFFFFFF80000000ULL,
434 "CALL out of range (%p calls %p)", code, fnptr);
435 Write8(0xE8);
436 Write32(u32(distance));
437}
438
439FixupBranch XEmitter::J(bool force5bytes)
440{
441 FixupBranch branch;
442 branch.type = force5bytes ? 1 : 0;
443 branch.ptr = code + (force5bytes ? 5 : 2);
444 if (!force5bytes)
445 {
446 //8 bits will do
447 Write8(0xEB);
448 Write8(0);
449 }
450 else
451 {
452 Write8(0xE9);
453 Write32(0);
454 }
455 return branch;
456}
457
458FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes)
459{
460 FixupBranch branch;
461 branch.type = force5bytes ? 1 : 0;
462 branch.ptr = code + (force5bytes ? 6 : 2);
463 if (!force5bytes)
464 {
465 //8 bits will do
466 Write8(0x70 + conditionCode);
467 Write8(0);
468 }
469 else
470 {
471 Write8(0x0F);
472 Write8(0x80 + conditionCode);
473 Write32(0);
474 }
475 return branch;
476}
477
478void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
479{
480 u64 fn = (u64)addr;
481 s64 distance = (s64)(fn - ((u64)code + 2));
482 if (distance < -0x80 || distance >= 0x80 || force5bytes)
483 {
484 distance = (s64)(fn - ((u64)code + 6));
485 ASSERT_MSG(
486 distance >= -0x80000000LL && distance < 0x80000000LL,
487 "Jump target too far away, needs indirect register");
488 Write8(0x0F);
489 Write8(0x80 + conditionCode);
490 Write32((u32)(s32)distance);
491 }
492 else
493 {
494 Write8(0x70 + conditionCode);
495 Write8((u8)(s8)distance);
496 }
497}
498
499void XEmitter::SetJumpTarget(const FixupBranch &branch)
500{
501 if (branch.type == 0)
502 {
503 s64 distance = (s64)(code - branch.ptr);
504 ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true");
505 branch.ptr[-1] = (u8)(s8)distance;
506 }
507 else if (branch.type == 1)
508 {
509 s64 distance = (s64)(code - branch.ptr);
510 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register");
511 ((s32*)branch.ptr)[-1] = (s32)distance;
512 }
513}
514
515// INC/DEC considered harmful on newer CPUs due to partial flag set.
516// Use ADD, SUB instead.
517
518/*
519void XEmitter::INC(int bits, OpArg arg)
520{
521 if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
522 arg.operandReg = 0;
523 if (bits == 16) {Write8(0x66);}
524 arg.WriteRex(this, bits, bits);
525 Write8(bits == 8 ? 0xFE : 0xFF);
526 arg.WriteRest(this);
527}
528void XEmitter::DEC(int bits, OpArg arg)
529{
530 if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
531 arg.operandReg = 1;
532 if (bits == 16) {Write8(0x66);}
533 arg.WriteRex(this, bits, bits);
534 Write8(bits == 8 ? 0xFE : 0xFF);
535 arg.WriteRest(this);
536}
537*/
538
539//Single byte opcodes
540//There is no PUSHAD/POPAD in 64-bit mode.
541void XEmitter::INT3() {Write8(0xCC);}
542void XEmitter::RET() {Write8(0xC3);}
543void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret
544
545// The first sign of decadence: optimized NOPs.
546void XEmitter::NOP(size_t size)
547{
548 DEBUG_ASSERT((int)size > 0);
549 while (true)
550 {
551 switch (size)
552 {
553 case 0:
554 return;
555 case 1:
556 Write8(0x90);
557 return;
558 case 2:
559 Write8(0x66); Write8(0x90);
560 return;
561 case 3:
562 Write8(0x0F); Write8(0x1F); Write8(0x00);
563 return;
564 case 4:
565 Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00);
566 return;
567 case 5:
568 Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00);
569 Write8(0x00);
570 return;
571 case 6:
572 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44);
573 Write8(0x00); Write8(0x00);
574 return;
575 case 7:
576 Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00);
577 Write8(0x00); Write8(0x00); Write8(0x00);
578 return;
579 case 8:
580 Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00);
581 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00);
582 return;
583 case 9:
584 Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84);
585 Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00);
586 Write8(0x00);
587 return;
588 case 10:
589 Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F);
590 Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00);
591 Write8(0x00); Write8(0x00);
592 return;
593 default:
594 // Even though x86 instructions are allowed to be up to 15 bytes long,
595 // AMD advises against using NOPs longer than 11 bytes because they
596 // carry a performance penalty on CPUs older than AMD family 16h.
597 Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F);
598 Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00);
599 Write8(0x00); Write8(0x00); Write8(0x00);
600 size -= 11;
601 continue;
602 }
603 }
604}
605
606void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu
607void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry
608void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry
609void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry
610
611//TODO: xchg ah, al ???
612void XEmitter::XCHG_AHAL()
613{
614 Write8(0x86);
615 Write8(0xe0);
616 // alt. 86 c4
617}
618
619//These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
620void XEmitter::LAHF() {Write8(0x9F);}
621void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);}
622
623void XEmitter::PUSHF() {Write8(0x9C);}
624void XEmitter::POPF() {CheckFlags(); Write8(0x9D);}
625
626void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);}
627void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);}
628void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);}
629
630void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg)
631{
632 if (bits == 16)
633 Write8(0x66);
634 Rex(bits == 64, 0, 0, (int)reg >> 3);
635 Write8(byte + ((int)reg & 7));
636}
637
638void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg)
639{
640 if (bits == 16)
641 Write8(0x66);
642 Rex(bits==64, 0, 0, (int)reg >> 3);
643 Write8(byte1);
644 Write8(byte2 + ((int)reg & 7));
645}
646
647void XEmitter::CWD(int bits)
648{
649 if (bits == 16)
650 Write8(0x66);
651 Rex(bits == 64, 0, 0, 0);
652 Write8(0x99);
653}
654
655void XEmitter::CBW(int bits)
656{
657 if (bits == 8)
658 Write8(0x66);
659 Rex(bits == 32, 0, 0, 0);
660 Write8(0x98);
661}
662
663//Simple opcodes
664
665
666//push/pop do not need wide to be 64-bit
667void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
668void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);}
669
670void XEmitter::PUSH(int bits, const OpArg &reg)
671{
672 if (reg.IsSimpleReg())
673 PUSH(reg.GetSimpleReg());
674 else if (reg.IsImm())
675 {
676 switch (reg.GetImmBits())
677 {
678 case 8:
679 Write8(0x6A);
680 Write8((u8)(s8)reg.offset);
681 break;
682 case 16:
683 Write8(0x66);
684 Write8(0x68);
685 Write16((u16)(s16)(s32)reg.offset);
686 break;
687 case 32:
688 Write8(0x68);
689 Write32((u32)reg.offset);
690 break;
691 default:
692 ASSERT_MSG(0, "PUSH - Bad imm bits");
693 break;
694 }
695 }
696 else
697 {
698 if (bits == 16)
699 Write8(0x66);
700 reg.WriteRex(this, bits, bits);
701 Write8(0xFF);
702 reg.WriteRest(this, 0, (X64Reg)6);
703 }
704}
705
706void XEmitter::POP(int /*bits*/, const OpArg &reg)
707{
708 if (reg.IsSimpleReg())
709 POP(reg.GetSimpleReg());
710 else
711 ASSERT_MSG(0, "POP - Unsupported encoding");
712}
713
714void XEmitter::BSWAP(int bits, X64Reg reg)
715{
716 if (bits >= 32)
717 {
718 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
719 }
720 else if (bits == 16)
721 {
722 ROL(16, R(reg), Imm8(8));
723 }
724 else if (bits == 8)
725 {
726 // Do nothing - can't bswap a single byte...
727 }
728 else
729 {
730 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
731 }
732}
733
734// Undefined opcode - reserved
735// If we ever need a way to always cause a non-breakpoint hard exception...
736void XEmitter::UD2()
737{
738 Write8(0x0F);
739 Write8(0x0B);
740}
741
742void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg)
743{
744 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
745 arg.operandReg = (u8)level;
746 arg.WriteRex(this, 0, 0);
747 Write8(0x0F);
748 Write8(0x18);
749 arg.WriteRest(this);
750}
751
752void XEmitter::SETcc(CCFlags flag, OpArg dest)
753{
754 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
755 dest.operandReg = 0;
756 dest.WriteRex(this, 0, 8);
757 Write8(0x0F);
758 Write8(0x90 + (u8)flag);
759 dest.WriteRest(this);
760}
761
762void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag)
763{
764 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
765 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
766 if (bits == 16)
767 Write8(0x66);
768 src.operandReg = dest;
769 src.WriteRex(this, bits, bits);
770 Write8(0x0F);
771 Write8(0x40 + (u8)flag);
772 src.WriteRest(this);
773}
774
775void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
776{
777 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
778 CheckFlags();
779 src.operandReg = ext;
780 if (bits == 16)
781 Write8(0x66);
782 src.WriteRex(this, bits, bits, 0);
783 if (bits == 8)
784 {
785 Write8(0xF6);
786 }
787 else
788 {
789 Write8(0xF7);
790 }
791 src.WriteRest(this);
792}
793
794void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);}
795void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);}
796void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);}
797void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
798void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
799void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
800
801void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
802{
803 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
804 CheckFlags();
805 src.operandReg = (u8)dest;
806 if (bits == 16)
807 Write8(0x66);
808 if (rep)
809 Write8(0xF3);
810 src.WriteRex(this, bits, bits);
811 Write8(0x0F);
812 Write8(byte2);
813 src.WriteRest(this);
814}
815
816void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
817{
818 if (bits <= 16)
819 ASSERT_MSG(0, "MOVNTI - bits<=16");
820 WriteBitSearchType(bits, src, dest, 0xC3);
821}
822
823void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
824void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
825
826void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
827{
828 CheckFlags();
829 if (!Common::GetCPUCaps().bmi1)
830 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
831 WriteBitSearchType(bits, dest, src, 0xBC, true);
832}
833void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
834{
835 CheckFlags();
836 if (!Common::GetCPUCaps().lzcnt)
837 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
838 WriteBitSearchType(bits, dest, src, 0xBD, true);
839}
840
841void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
842{
843 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
844 if (dbits == sbits)
845 {
846 MOV(dbits, R(dest), src);
847 return;
848 }
849 src.operandReg = (u8)dest;
850 if (dbits == 16)
851 Write8(0x66);
852 src.WriteRex(this, dbits, sbits);
853 if (sbits == 8)
854 {
855 Write8(0x0F);
856 Write8(0xBE);
857 }
858 else if (sbits == 16)
859 {
860 Write8(0x0F);
861 Write8(0xBF);
862 }
863 else if (sbits == 32 && dbits == 64)
864 {
865 Write8(0x63);
866 }
867 else
868 {
869 Crash();
870 }
871 src.WriteRest(this);
872}
873
874void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src)
875{
876 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
877 if (dbits == sbits)
878 {
879 MOV(dbits, R(dest), src);
880 return;
881 }
882 src.operandReg = (u8)dest;
883 if (dbits == 16)
884 Write8(0x66);
885 //the 32bit result is automatically zero extended to 64bit
886 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
887 if (sbits == 8)
888 {
889 Write8(0x0F);
890 Write8(0xB6);
891 }
892 else if (sbits == 16)
893 {
894 Write8(0x0F);
895 Write8(0xB7);
896 }
897 else if (sbits == 32 && dbits == 64)
898 {
899 Write8(0x8B);
900 }
901 else
902 {
903 ASSERT_MSG(0, "MOVZX - Invalid size");
904 }
905 src.WriteRest(this);
906}
907
908void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src)
909{
910 ASSERT_MSG(Common::GetCPUCaps().movbe, "Generating MOVBE on a system that does not support it.");
911 if (bits == 8)
912 {
913 MOV(bits, dest, src);
914 return;
915 }
916
917 if (bits == 16)
918 Write8(0x66);
919
920 if (dest.IsSimpleReg())
921 {
922 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
923 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
924 Write8(0x0F); Write8(0x38); Write8(0xF0);
925 src.WriteRest(this, 0, dest.GetSimpleReg());
926 }
927 else if (src.IsSimpleReg())
928 {
929 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
930 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
931 Write8(0x0F); Write8(0x38); Write8(0xF1);
932 dest.WriteRest(this, 0, src.GetSimpleReg());
933 }
934 else
935 {
936 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
937 }
938}
939
940
941void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
942{
943 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
944 src.operandReg = (u8)dest;
945 if (bits == 16)
946 Write8(0x66); //TODO: performance warning
947 src.WriteRex(this, bits, bits);
948 Write8(0x8D);
949 src.WriteRest(this, 0, INVALID_REG, bits == 64);
950}
951
952//shift can be either imm8 or cl
953void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
954{
955 CheckFlags();
956 bool writeImm = false;
957 if (dest.IsImm())
958 {
959 ASSERT_MSG(0, "WriteShift - can't shift imms");
960 }
961 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
962 {
963 ASSERT_MSG(0, "WriteShift - illegal argument");
964 }
965 dest.operandReg = ext;
966 if (bits == 16)
967 Write8(0x66);
968 dest.WriteRex(this, bits, bits, 0);
969 if (shift.GetImmBits() == 8)
970 {
971 //ok an imm
972 u8 imm = (u8)shift.offset;
973 if (imm == 1)
974 {
975 Write8(bits == 8 ? 0xD0 : 0xD1);
976 }
977 else
978 {
979 writeImm = true;
980 Write8(bits == 8 ? 0xC0 : 0xC1);
981 }
982 }
983 else
984 {
985 Write8(bits == 8 ? 0xD2 : 0xD3);
986 }
987 dest.WriteRest(this, writeImm ? 1 : 0);
988 if (writeImm)
989 Write8((u8)shift.offset);
990}
991
992// large rotates and shift are slower on intel than amd
993// intel likes to rotate by 1, and the op is smaller too
994void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);}
995void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);}
996void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);}
997void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);}
998void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);}
999void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);}
1000void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);}
1001
1002// index can be either imm8 or register, don't use memory destination because it's slow
1003void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
1004{
1005 CheckFlags();
1006 if (dest.IsImm())
1007 {
1008 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1009 }
1010 if ((index.IsImm() && index.GetImmBits() != 8))
1011 {
1012 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1013 }
1014 if (bits == 16)
1015 Write8(0x66);
1016 if (index.IsImm())
1017 {
1018 dest.WriteRex(this, bits, bits);
1019 Write8(0x0F); Write8(0xBA);
1020 dest.WriteRest(this, 1, (X64Reg)ext);
1021 Write8((u8)index.offset);
1022 }
1023 else
1024 {
1025 X64Reg operand = index.GetSimpleReg();
1026 dest.WriteRex(this, bits, bits, operand);
1027 Write8(0x0F); Write8(0x83 + 8*ext);
1028 dest.WriteRest(this, 1, operand);
1029 }
1030}
1031
1032void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);}
1033void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);}
1034void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);}
1035void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);}
1036
1037//shift can be either imm8 or cl
1038void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
1039{
1040 CheckFlags();
1041 if (dest.IsImm())
1042 {
1043 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1044 }
1045 if (!src.IsSimpleReg())
1046 {
1047 ASSERT_MSG(0, "SHRD - must use simple register as source");
1048 }
1049 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
1050 {
1051 ASSERT_MSG(0, "SHRD - illegal shift");
1052 }
1053 if (bits == 16)
1054 Write8(0x66);
1055 X64Reg operand = src.GetSimpleReg();
1056 dest.WriteRex(this, bits, bits, operand);
1057 if (shift.GetImmBits() == 8)
1058 {
1059 Write8(0x0F); Write8(0xAC);
1060 dest.WriteRest(this, 1, operand);
1061 Write8((u8)shift.offset);
1062 }
1063 else
1064 {
1065 Write8(0x0F); Write8(0xAD);
1066 dest.WriteRest(this, 0, operand);
1067 }
1068}
1069
1070void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift)
1071{
1072 CheckFlags();
1073 if (dest.IsImm())
1074 {
1075 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1076 }
1077 if (!src.IsSimpleReg())
1078 {
1079 ASSERT_MSG(0, "SHLD - must use simple register as source");
1080 }
1081 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8))
1082 {
1083 ASSERT_MSG(0, "SHLD - illegal shift");
1084 }
1085 if (bits == 16)
1086 Write8(0x66);
1087 X64Reg operand = src.GetSimpleReg();
1088 dest.WriteRex(this, bits, bits, operand);
1089 if (shift.GetImmBits() == 8)
1090 {
1091 Write8(0x0F); Write8(0xA4);
1092 dest.WriteRest(this, 1, operand);
1093 Write8((u8)shift.offset);
1094 }
1095 else
1096 {
1097 Write8(0x0F); Write8(0xA5);
1098 dest.WriteRest(this, 0, operand);
1099 }
1100}
1101
1102void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits)
1103{
1104 if (bits == 16)
1105 emit->Write8(0x66);
1106
1107 this->operandReg = (u8)_operandReg;
1108 WriteRex(emit, bits, bits);
1109 emit->Write8(op);
1110 WriteRest(emit);
1111}
1112
1113//operand can either be immediate or register
1114void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const
1115{
1116 X64Reg _operandReg;
1117 if (IsImm())
1118 {
1119 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1120 }
1121
1122 if (bits == 16)
1123 emit->Write8(0x66);
1124
1125 int immToWrite = 0;
1126
1127 if (operand.IsImm())
1128 {
1129 WriteRex(emit, bits, bits);
1130
1131 if (!toRM)
1132 {
1133 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1134 }
1135
1136 if (operand.scale == SCALE_IMM8 && bits == 8)
1137 {
1138 // op al, imm8
1139 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC)
1140 {
1141 emit->Write8(normalops[op].eaximm8);
1142 emit->Write8((u8)operand.offset);
1143 return;
1144 }
1145 // mov reg, imm8
1146 if (!scale && op == nrmMOV)
1147 {
1148 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1149 emit->Write8((u8)operand.offset);
1150 return;
1151 }
1152 // op r/m8, imm8
1153 emit->Write8(normalops[op].imm8);
1154 immToWrite = 8;
1155 }
1156 else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1157 (operand.scale == SCALE_IMM32 && bits == 32) ||
1158 (operand.scale == SCALE_IMM32 && bits == 64))
1159 {
1160 // Try to save immediate size if we can, but first check to see
1161 // if the instruction supports simm8.
1162 // op r/m, imm8
1163 if (normalops[op].simm8 != 0xCC &&
1164 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1165 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset)))
1166 {
1167 emit->Write8(normalops[op].simm8);
1168 immToWrite = 8;
1169 }
1170 else
1171 {
1172 // mov reg, imm
1173 if (!scale && op == nrmMOV && bits != 64)
1174 {
1175 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1176 if (bits == 16)
1177 emit->Write16((u16)operand.offset);
1178 else
1179 emit->Write32((u32)operand.offset);
1180 return;
1181 }
1182 // op eax, imm
1183 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC)
1184 {
1185 emit->Write8(normalops[op].eaximm32);
1186 if (bits == 16)
1187 emit->Write16((u16)operand.offset);
1188 else
1189 emit->Write32((u32)operand.offset);
1190 return;
1191 }
1192 // op r/m, imm
1193 emit->Write8(normalops[op].imm32);
1194 immToWrite = bits == 16 ? 16 : 32;
1195 }
1196 }
1197 else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1198 (operand.scale == SCALE_IMM8 && bits == 32) ||
1199 (operand.scale == SCALE_IMM8 && bits == 64))
1200 {
1201 // op r/m, imm8
1202 emit->Write8(normalops[op].simm8);
1203 immToWrite = 8;
1204 }
1205 else if (operand.scale == SCALE_IMM64 && bits == 64)
1206 {
1207 if (scale)
1208 {
1209 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1210 }
1211 // mov reg64, imm64
1212 else if (op == nrmMOV)
1213 {
1214 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1215 emit->Write64((u64)operand.offset);
1216 return;
1217 }
1218 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1219 }
1220 else
1221 {
1222 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1223 }
1224 _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM
1225 }
1226 else
1227 {
1228 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1229 WriteRex(emit, bits, bits, _operandReg);
1230 // op r/m, reg
1231 if (toRM)
1232 {
1233 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1234 }
1235 // op reg, r/m
1236 else
1237 {
1238 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1239 }
1240 }
1241 WriteRest(emit, immToWrite >> 3, _operandReg);
1242 switch (immToWrite)
1243 {
1244 case 0:
1245 break;
1246 case 8:
1247 emit->Write8((u8)operand.offset);
1248 break;
1249 case 16:
1250 emit->Write16((u16)operand.offset);
1251 break;
1252 case 32:
1253 emit->Write32((u32)operand.offset);
1254 break;
1255 default:
1256 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1257 }
1258}
1259
1260void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2)
1261{
1262 if (a1.IsImm())
1263 {
1264 //Booh! Can't write to an imm
1265 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1266 return;
1267 }
1268 if (a2.IsImm())
1269 {
1270 a1.WriteNormalOp(emit, true, op, a2, bits);
1271 }
1272 else
1273 {
1274 if (a1.IsSimpleReg())
1275 {
1276 a2.WriteNormalOp(emit, false, op, a1, bits);
1277 }
1278 else
1279 {
1280 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory");
1281 a1.WriteNormalOp(emit, true, op, a2, bits);
1282 }
1283 }
1284}
1285
1286void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
1287void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
1288void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
1289void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
1290void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
1291void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
1292void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
1293void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
1294{
1295 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1296 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1297 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1298}
1299void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
1300void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
1301void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
1302
1303void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
1304{
1305 CheckFlags();
1306 if (bits == 8)
1307 {
1308 ASSERT_MSG(0, "IMUL - illegal bit size!");
1309 return;
1310 }
1311
1312 if (a1.IsImm())
1313 {
1314 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1315 return;
1316 }
1317
1318 if (!a2.IsImm())
1319 {
1320 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1321 return;
1322 }
1323
1324 if (bits == 16)
1325 Write8(0x66);
1326 a1.WriteRex(this, bits, bits, regOp);
1327
1328 if (a2.GetImmBits() == 8 ||
1329 (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1330 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset))
1331 {
1332 Write8(0x6B);
1333 a1.WriteRest(this, 1, regOp);
1334 Write8((u8)a2.offset);
1335 }
1336 else
1337 {
1338 Write8(0x69);
1339 if (a2.GetImmBits() == 16 && bits == 16)
1340 {
1341 a1.WriteRest(this, 2, regOp);
1342 Write16((u16)a2.offset);
1343 }
1344 else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64))
1345 {
1346 a1.WriteRest(this, 4, regOp);
1347 Write32((u32)a2.offset);
1348 }
1349 else
1350 {
1351 ASSERT_MSG(0, "IMUL - unhandled case!");
1352 }
1353 }
1354}
1355
1356void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a)
1357{
1358 CheckFlags();
1359 if (bits == 8)
1360 {
1361 ASSERT_MSG(0, "IMUL - illegal bit size!");
1362 return;
1363 }
1364
1365 if (a.IsImm())
1366 {
1367 IMUL(bits, regOp, R(regOp), a) ;
1368 return;
1369 }
1370
1371 if (bits == 16)
1372 Write8(0x66);
1373 a.WriteRex(this, bits, bits, regOp);
1374 Write8(0x0F);
1375 Write8(0xAF);
1376 a.WriteRest(this, 0, regOp);
1377}
1378
1379
1380void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1381{
1382 if (opPrefix)
1383 Write8(opPrefix);
1384 arg.operandReg = regOp;
1385 arg.WriteRex(this, 0, 0);
1386 Write8(0x0F);
1387 if (op > 0xFF)
1388 Write8((op >> 8) & 0xFF);
1389 Write8(op & 0xFF);
1390 arg.WriteRest(this, extrabytes);
1391}
1392
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1394{
1395 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1396}
1397
1398static int GetVEXmmmmm(u16 op)
1399{
1400 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1401 if ((op >> 8) == 0x3A)
1402 return 3;
1403 else if ((op >> 8) == 0x38)
1404 return 2;
1405 else
1406 return 1;
1407}
1408
1409static int GetVEXpp(u8 opPrefix)
1410{
1411 if (opPrefix == 0x66)
1412 return 1;
1413 else if (opPrefix == 0xF3)
1414 return 2;
1415 else if (opPrefix == 0xF2)
1416 return 3;
1417 else
1418 return 0;
1419}
1420
1421void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1422{
1423 if (!Common::GetCPUCaps().avx)
1424 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1425 int mmmmm = GetVEXmmmmm(op);
1426 int pp = GetVEXpp(opPrefix);
1427 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here
1428 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1429 Write8(op & 0xFF);
1430 arg.WriteRest(this, extrabytes, regOp1);
1431}
1432
1433// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1434void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1435{
1436 if (size != 32 && size != 64)
1437 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1438 int mmmmm = GetVEXmmmmm(op);
1439 int pp = GetVEXpp(opPrefix);
1440 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
1441 Write8(op & 0xFF);
1442 arg.WriteRest(this, extrabytes, regOp1);
1443}
1444
1445void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1446{
1447 CheckFlags();
1448 if (!Common::GetCPUCaps().bmi1)
1449 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1450 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1451}
1452
1453void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
1454{
1455 CheckFlags();
1456 if (!Common::GetCPUCaps().bmi2)
1457 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1458 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1459}
1460
1461void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);}
1462void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);}
1463
1464void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg)
1465{
1466#ifdef ARCHITECTURE_x86_64
1467 // Alternate encoding
1468 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1469 arg.operandReg = dest;
1470 Write8(0x66);
1471 arg.WriteRex(this, 64, 0);
1472 Write8(0x0f);
1473 Write8(0x6E);
1474 arg.WriteRest(this, 0);
1475#else
1476 arg.operandReg = dest;
1477 Write8(0xF3);
1478 Write8(0x0f);
1479 Write8(0x7E);
1480 arg.WriteRest(this, 0);
1481#endif
1482}
1483
1484void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src)
1485{
1486 if (src > 7 || arg.IsSimpleReg())
1487 {
1488 // Alternate encoding
1489 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1490 arg.operandReg = src;
1491 Write8(0x66);
1492 arg.WriteRex(this, 64, 0);
1493 Write8(0x0f);
1494 Write8(0x7E);
1495 arg.WriteRest(this, 0);
1496 }
1497 else
1498 {
1499 arg.operandReg = src;
1500 arg.WriteRex(this, 0, 0);
1501 Write8(0x66);
1502 Write8(0x0f);
1503 Write8(0xD6);
1504 arg.WriteRest(this, 0);
1505 }
1506}
1507
1508void XEmitter::WriteMXCSR(OpArg arg, int ext)
1509{
1510 if (arg.IsImm() || arg.IsSimpleReg())
1511 ASSERT_MSG(0, "MXCSR - invalid operand");
1512
1513 arg.operandReg = ext;
1514 arg.WriteRex(this, 0, 0);
1515 Write8(0x0F);
1516 Write8(0xAE);
1517 arg.WriteRest(this);
1518}
1519
1520void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);}
1521void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);}
1522
1523void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
1524void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
1525void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
1526
1527void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
1528void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
1529void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
1530void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
1531void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
1532void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
1533void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
1534void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
1535void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
1536void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
1537void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
1538void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
1539void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
1540void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
1541void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
1542void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
1543void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
1544
1545void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
1546void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
1547void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
1548void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
1549void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
1550void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
1551void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
1552void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
1553void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
1554void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
1555void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
1556void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
1557void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
1558void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
1559void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
1560void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
1561void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
1562void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
1563void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
1564void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
1565void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
1566void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
1567void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
1568void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
1569void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
1570void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
1571void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
1572void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
1573
1574void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
1575
1576void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
1577void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
1578void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
1579void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
1580
1581void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
1582void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
1583void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
1584void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
1585
1586void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
1587void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
1588void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
1589void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
1590
1591void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
1592void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
1593void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
1594void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
1595
1596void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
1597void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
1598void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
1599void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
1600
1601void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
1602void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
1603void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
1604void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
1605
1606void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
1607void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
1608void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
1609void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
1610
1611void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
1612void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
1613
1614void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
1615void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
1616
1617void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
1618void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
1619void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
1620void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
1621void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
1622void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
1623
1624void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
1625void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
1626void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
1627void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
1628
1629void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
1630void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
1631void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
1632void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
1633
1634void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
1635
1636void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
1637void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
1638
1639void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
1640
1641// THESE TWO ARE UNTESTED.
1642void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
1643void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
1644
1645void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
1646void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
1647
1648void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
1649{
1650 if (Common::GetCPUCaps().sse3)
1651 {
1652 WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup
1653 }
1654 else
1655 {
1656 // Simulate this instruction with SSE2 instructions
1657 if (!arg.IsSimpleReg(regOp))
1658 MOVSD(regOp, arg);
1659 UNPCKLPD(regOp, R(regOp));
1660 }
1661}
1662
1663//There are a few more left
1664
1665// Also some integer instructions are missing
1666void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
1667void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
1668void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
1669
1670void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
1671void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
1672void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);}
1673void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);}
1674
1675void XEmitter::PSRLW(X64Reg reg, int shift)
1676{
1677 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1678 Write8(shift);
1679}
1680
1681void XEmitter::PSRLD(X64Reg reg, int shift)
1682{
1683 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1684 Write8(shift);
1685}
1686
1687void XEmitter::PSRLQ(X64Reg reg, int shift)
1688{
1689 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1690 Write8(shift);
1691}
1692
1693void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
1694{
1695 WriteSSEOp(0x66, 0xd3, reg, arg);
1696}
1697
1698void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1699 WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
1700 Write8(shift);
1701}
1702
1703void XEmitter::PSLLW(X64Reg reg, int shift)
1704{
1705 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1706 Write8(shift);
1707}
1708
1709void XEmitter::PSLLD(X64Reg reg, int shift)
1710{
1711 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1712 Write8(shift);
1713}
1714
1715void XEmitter::PSLLQ(X64Reg reg, int shift)
1716{
1717 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1718 Write8(shift);
1719}
1720
1721void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1722 WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg));
1723 Write8(shift);
1724}
1725
1726void XEmitter::PSRAW(X64Reg reg, int shift)
1727{
1728 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1729 Write8(shift);
1730}
1731
1732void XEmitter::PSRAD(X64Reg reg, int shift)
1733{
1734 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1735 Write8(shift);
1736}
1737
1738void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1739{
1740 if (!Common::GetCPUCaps().ssse3)
1741 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1742 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1743}
1744
1745void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
1746{
1747 if (!Common::GetCPUCaps().sse4_1)
1748 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1749 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1750}
1751
1752void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
1753void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
1754void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
1755void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
1756
1757void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
1758void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
1759void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
1760void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
1761void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
1762void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
1763void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
1764void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
1765
1766void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
1767void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
1768void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
1769void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
1770void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
1771void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
1772void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
1773void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
1774void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
1775void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
1776void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
1777void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
1778
1779void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
1780void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
1781void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
1782void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
1783void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
1784
1785void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
1786void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
1787void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
1788void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
1789
1790void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
1791void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
1792void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
1793void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
1794
1795void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
1796void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
1797void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
1798void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
1799
1800void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
1801void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
1802void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
1803void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
1804
1805void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
1806void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
1807void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
1808void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
1809
1810void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
1811void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
1812void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
1813void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
1814
1815void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
1816void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
1817
1818void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
1819void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
1820void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
1821
1822void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
1823void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
1824void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
1825
1826void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
1827void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
1828
1829void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
1830void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
1831
1832void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
1833void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
1834void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
1835void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
1836
1837void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
1838void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
1839void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
1840void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
1841
1842// VEX
1843void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
1844void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
1845void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
1846void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
1847void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
1848void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
1849void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
1850void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
1851void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
1852void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
1853void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
1854void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
1855
1856void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
1857void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
1858void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
1859void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
1860void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
1861void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
1862void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
1863void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
1864
1865void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
1866void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
1867void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
1868void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
1869
1870void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
1871void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
1872void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
1873void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
1874void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
1875void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
1876void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
1877void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
1878void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
1879void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
1880void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
1881void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
1882void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
1883void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
1884void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
1885void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
1886void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
1887void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
1888void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
1889void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
1890void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
1891void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
1892void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
1893void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
1894void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
1895void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
1896void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
1897void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
1898void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
1899void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
1900void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
1901void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
1902void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
1903void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
1904void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
1905void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
1906void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
1907void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
1908void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
1909void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
1910void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
1911void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
1912void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
1913void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
1914void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
1915void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
1916void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
1917void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
1918void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
1919void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
1920void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
1921void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
1922void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
1923void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
1924void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
1925void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
1926void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
1927void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
1928void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
1929void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
1930
1931void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
1932void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
1933void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
1934void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
1935void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
1936void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
1937void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
1938void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
1939void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
1940void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
1941void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
1942void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
1943void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
1944
1945// Prefixes
1946
1947void XEmitter::LOCK() { Write8(0xF0); }
1948void XEmitter::REP() { Write8(0xF3); }
1949void XEmitter::REPNE() { Write8(0xF2); }
1950void XEmitter::FSOverride() { Write8(0x64); }
1951void XEmitter::GSOverride() { Write8(0x65); }
1952
1953void XEmitter::FWAIT()
1954{
1955 Write8(0x9B);
1956}
1957
1958// TODO: make this more generic
1959void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg)
1960{
1961 int mf = 0;
1962 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
1963 switch (bits)
1964 {
1965 case 32: mf = 0; break;
1966 case 64: mf = 4; break;
1967 case 80: mf = 2; break;
1968 default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
1969 }
1970 Write8(0xd9 | mf);
1971 // x87 instructions use the reg field of the ModR/M byte as opcode:
1972 if (bits == 80)
1973 op = op_80b;
1974 arg.WriteRest(this, 0, (X64Reg) op);
1975}
1976
1977void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
1978void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
1979void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
1980void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
1981
1982void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
1983
1984void XCodeBlock::PoisonMemory() {
1985 // x86/64: 0xCC = breakpoint
1986 memset(region, 0xCC, region_size);
1987}
1988
1989}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
new file mode 100644
index 000000000..e9c924126
--- /dev/null
+++ b/src/common/x64/emitter.h
@@ -0,0 +1,1067 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#pragma once
19
20#include "common/assert.h"
21#include "common/common_types.h"
22#include "common/code_block.h"
23
24#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64)
25#define _ARCH_64
26#endif
27
28#ifdef _ARCH_64
29#define PTRBITS 64
30#else
31#define PTRBITS 32
32#endif
33
34namespace Gen
35{
36
37enum X64Reg
38{
39 EAX = 0, EBX = 3, ECX = 1, EDX = 2,
40 ESI = 6, EDI = 7, EBP = 5, ESP = 4,
41
42 RAX = 0, RBX = 3, RCX = 1, RDX = 2,
43 RSI = 6, RDI = 7, RBP = 5, RSP = 4,
44 R8 = 8, R9 = 9, R10 = 10,R11 = 11,
45 R12 = 12,R13 = 13,R14 = 14,R15 = 15,
46
47 AL = 0, BL = 3, CL = 1, DL = 2,
48 SIL = 6, DIL = 7, BPL = 5, SPL = 4,
49 AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106,
50
51 AX = 0, BX = 3, CX = 1, DX = 2,
52 SI = 6, DI = 7, BP = 5, SP = 4,
53
54 XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
55 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
56
57 YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
58 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15,
59
60 INVALID_REG = 0xFFFFFFFF
61};
62
63enum CCFlags
64{
65 CC_O = 0,
66 CC_NO = 1,
67 CC_B = 2, CC_C = 2, CC_NAE = 2,
68 CC_NB = 3, CC_NC = 3, CC_AE = 3,
69 CC_Z = 4, CC_E = 4,
70 CC_NZ = 5, CC_NE = 5,
71 CC_BE = 6, CC_NA = 6,
72 CC_NBE = 7, CC_A = 7,
73 CC_S = 8,
74 CC_NS = 9,
75 CC_P = 0xA, CC_PE = 0xA,
76 CC_NP = 0xB, CC_PO = 0xB,
77 CC_L = 0xC, CC_NGE = 0xC,
78 CC_NL = 0xD, CC_GE = 0xD,
79 CC_LE = 0xE, CC_NG = 0xE,
80 CC_NLE = 0xF, CC_G = 0xF
81};
82
83enum
84{
85 NUMGPRs = 16,
86 NUMXMMs = 16,
87};
88
89enum
90{
91 SCALE_NONE = 0,
92 SCALE_1 = 1,
93 SCALE_2 = 2,
94 SCALE_4 = 4,
95 SCALE_8 = 8,
96 SCALE_ATREG = 16,
97 //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
98 SCALE_NOBASE_2 = 34,
99 SCALE_NOBASE_4 = 36,
100 SCALE_NOBASE_8 = 40,
101 SCALE_RIP = 0xFF,
102 SCALE_IMM8 = 0xF0,
103 SCALE_IMM16 = 0xF1,
104 SCALE_IMM32 = 0xF2,
105 SCALE_IMM64 = 0xF3,
106};
107
108enum NormalOp {
109 nrmADD,
110 nrmADC,
111 nrmSUB,
112 nrmSBB,
113 nrmAND,
114 nrmOR ,
115 nrmXOR,
116 nrmMOV,
117 nrmTEST,
118 nrmCMP,
119 nrmXCHG,
120};
121
122enum {
123 CMP_EQ = 0,
124 CMP_LT = 1,
125 CMP_LE = 2,
126 CMP_UNORD = 3,
127 CMP_NEQ = 4,
128 CMP_NLT = 5,
129 CMP_NLE = 6,
130 CMP_ORD = 7,
131};
132
133enum FloatOp {
134 floatLD = 0,
135 floatST = 2,
136 floatSTP = 3,
137 floatLD80 = 5,
138 floatSTP80 = 7,
139
140 floatINVALID = -1,
141};
142
143enum FloatRound {
144 FROUND_NEAREST = 0,
145 FROUND_FLOOR = 1,
146 FROUND_CEIL = 2,
147 FROUND_ZERO = 3,
148 FROUND_MXCSR = 4,
149
150 FROUND_RAISE_PRECISION = 0,
151 FROUND_IGNORE_PRECISION = 8,
152};
153
154class XEmitter;
155
156// RIP addressing does not benefit from micro op fusion on Core arch
157struct OpArg
158{
159 OpArg() {} // dummy op arg, used for storage
160 OpArg(u64 _offset, int _scale, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
161 {
162 operandReg = 0;
163 scale = (u8)_scale;
164 offsetOrBaseReg = (u16)rmReg;
165 indexReg = (u16)scaledReg;
166 //if scale == 0 never mind offsetting
167 offset = _offset;
168 }
169 bool operator==(const OpArg &b) const
170 {
171 return operandReg == b.operandReg && scale == b.scale && offsetOrBaseReg == b.offsetOrBaseReg &&
172 indexReg == b.indexReg && offset == b.offset;
173 }
174 void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
175 void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
176 void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
177 void WriteFloatModRM(XEmitter *emit, FloatOp op);
178 void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
179 // This one is public - must be written to
180 u64 offset; // use RIP-relative as much as possible - 64-bit immediates are not available.
181 u16 operandReg;
182
183 void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const;
184 bool IsImm() const {return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64;}
185 bool IsSimpleReg() const {return scale == SCALE_NONE;}
186 bool IsSimpleReg(X64Reg reg) const
187 {
188 if (!IsSimpleReg())
189 return false;
190 return GetSimpleReg() == reg;
191 }
192
193 bool CanDoOpWith(const OpArg &other) const
194 {
195 if (IsSimpleReg()) return true;
196 if (!IsSimpleReg() && !other.IsSimpleReg() && !other.IsImm()) return false;
197 return true;
198 }
199
200 int GetImmBits() const
201 {
202 switch (scale)
203 {
204 case SCALE_IMM8: return 8;
205 case SCALE_IMM16: return 16;
206 case SCALE_IMM32: return 32;
207 case SCALE_IMM64: return 64;
208 default: return -1;
209 }
210 }
211
212 void SetImmBits(int bits) {
213 switch (bits)
214 {
215 case 8: scale = SCALE_IMM8; break;
216 case 16: scale = SCALE_IMM16; break;
217 case 32: scale = SCALE_IMM32; break;
218 case 64: scale = SCALE_IMM64; break;
219 }
220 }
221
222 X64Reg GetSimpleReg() const
223 {
224 if (scale == SCALE_NONE)
225 return (X64Reg)offsetOrBaseReg;
226 else
227 return INVALID_REG;
228 }
229
230 u32 GetImmValue() const {
231 return (u32)offset;
232 }
233
234 // For loops.
235 void IncreaseOffset(int sz) {
236 offset += sz;
237 }
238
239private:
240 u8 scale;
241 u16 offsetOrBaseReg;
242 u16 indexReg;
243};
244
245inline OpArg M(const void *ptr) {return OpArg((u64)ptr, (int)SCALE_RIP);}
246template <typename T>
247inline OpArg M(const T *ptr) {return OpArg((u64)(const void *)ptr, (int)SCALE_RIP);}
248inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);}
249inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);}
250
251inline OpArg MDisp(X64Reg value, int offset)
252{
253 return OpArg((u32)offset, SCALE_ATREG, value);
254}
255
256inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset)
257{
258 return OpArg(offset, scale, base, scaled);
259}
260
261inline OpArg MScaled(X64Reg scaled, int scale, int offset)
262{
263 if (scale == SCALE_1)
264 return OpArg(offset, SCALE_ATREG, scaled);
265 else
266 return OpArg(offset, scale | 0x20, RAX, scaled);
267}
268
269inline OpArg MRegSum(X64Reg base, X64Reg offset)
270{
271 return MComplex(base, offset, 1, 0);
272}
273
274inline OpArg Imm8 (u8 imm) {return OpArg(imm, SCALE_IMM8);}
275inline OpArg Imm16(u16 imm) {return OpArg(imm, SCALE_IMM16);} //rarely used
276inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);}
277inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);}
278inline OpArg UImmAuto(u32 imm) {
279 return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
280}
281inline OpArg SImmAuto(s32 imm) {
282 return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8);
283}
284
285#ifdef _ARCH_64
286inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);}
287#else
288inline OpArg ImmPtr(const void* imm) {return Imm32((u32)imm);}
289#endif
290
291inline u32 PtrOffset(const void* ptr, const void* base)
292{
293#ifdef _ARCH_64
294 s64 distance = (s64)ptr-(s64)base;
295 if (distance >= 0x80000000LL ||
296 distance < -0x80000000LL)
297 {
298 ASSERT_MSG(0, "pointer offset out of range");
299 return 0;
300 }
301
302 return (u32)distance;
303#else
304 return (u32)ptr-(u32)base;
305#endif
306}
307
308//usage: int a[]; ARRAY_OFFSET(a,10)
309#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0]))
310//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
311#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str)))
312
313struct FixupBranch
314{
315 u8 *ptr;
316 int type; //0 = 8bit 1 = 32bit
317};
318
319enum SSECompare
320{
321 EQ = 0,
322 LT,
323 LE,
324 UNORD,
325 NEQ,
326 NLT,
327 NLE,
328 ORD,
329};
330
331typedef const u8* JumpTarget;
332
333class XEmitter
334{
335 friend struct OpArg; // for Write8 etc
336private:
337 u8 *code;
338 bool flags_locked;
339
340 void CheckFlags();
341
342 void Rex(int w, int r, int x, int b);
343 void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
344 void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
345 void WriteMulDivType(int bits, OpArg src, int ext);
346 void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
347 void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
348 void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
349 void WriteMXCSR(OpArg arg, int ext);
350 void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
351 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
352 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
353 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
354 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
355 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
356 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
357 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
358 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
359 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
360
361 void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
362
363protected:
364 inline void Write8(u8 value) {*code++ = value;}
365 inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
366 inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
367 inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
368
369public:
370 XEmitter() { code = nullptr; flags_locked = false; }
371 XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; }
372 virtual ~XEmitter() {}
373
374 void WriteModRM(int mod, int rm, int reg);
375 void WriteSIB(int scale, int index, int base);
376
377 void SetCodePtr(u8 *ptr);
378 void ReserveCodeSpace(int bytes);
379 const u8 *AlignCode4();
380 const u8 *AlignCode16();
381 const u8 *AlignCodePage();
382 const u8 *GetCodePtr() const;
383 u8 *GetWritableCodePtr();
384
385 void LockFlags() { flags_locked = true; }
386 void UnlockFlags() { flags_locked = false; }
387
388 // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
389 // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr.,
390 // INC and DEC are slow on Intel Core, but not on AMD. They create a
391 // false flag dependency because they only update a subset of the flags.
392 // XCHG is SLOW and should be avoided.
393
394 // Debug breakpoint
395 void INT3();
396
397 // Do nothing
398 void NOP(size_t count = 1);
399
400 // Save energy in wait-loops on P4 only. Probably not too useful.
401 void PAUSE();
402
403 // Flag control
404 void STC();
405 void CLC();
406 void CMC();
407
408 // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD!
409 void LAHF(); // 3 cycle vector path
410 void SAHF(); // direct path fast
411
412
413 // Stack control
414 void PUSH(X64Reg reg);
415 void POP(X64Reg reg);
416 void PUSH(int bits, const OpArg &reg);
417 void POP(int bits, const OpArg &reg);
418 void PUSHF();
419 void POPF();
420
421 // Flow control
422 void RET();
423 void RET_FAST();
424 void UD2();
425 FixupBranch J(bool force5bytes = false);
426
427 void JMP(const u8 * addr, bool force5Bytes = false);
428 void JMP(OpArg arg);
429 void JMPptr(const OpArg &arg);
430 void JMPself(); //infinite loop!
431#ifdef CALL
432#undef CALL
433#endif
434 void CALL(const void *fnptr);
435 void CALLptr(OpArg arg);
436
437 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
438 //void J_CC(CCFlags conditionCode, JumpTarget target);
439 void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
440
441 void SetJumpTarget(const FixupBranch &branch);
442
443 void SETcc(CCFlags flag, OpArg dest);
444 // Note: CMOV brings small if any benefit on current cpus.
445 void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag);
446
447 // Fences
448 void LFENCE();
449 void MFENCE();
450 void SFENCE();
451
452 // Bit scan
453 void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
454 void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
455
456 // Cache control
457 enum PrefetchLevel
458 {
459 PF_NTA, //Non-temporal (data used once and only once)
460 PF_T0, //All cache levels
461 PF_T1, //Levels 2+ (aliased to T0 on AMD)
462 PF_T2, //Levels 3+ (aliased to T0 on AMD)
463 };
464 void PREFETCH(PrefetchLevel level, OpArg arg);
465 void MOVNTI(int bits, OpArg dest, X64Reg src);
466 void MOVNTDQ(OpArg arg, X64Reg regOp);
467 void MOVNTPS(OpArg arg, X64Reg regOp);
468 void MOVNTPD(OpArg arg, X64Reg regOp);
469
470 // Multiplication / division
471 void MUL(int bits, OpArg src); //UNSIGNED
472 void IMUL(int bits, OpArg src); //SIGNED
473 void IMUL(int bits, X64Reg regOp, OpArg src);
474 void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
475 void DIV(int bits, OpArg src);
476 void IDIV(int bits, OpArg src);
477
478 // Shift
479 void ROL(int bits, OpArg dest, OpArg shift);
480 void ROR(int bits, OpArg dest, OpArg shift);
481 void RCL(int bits, OpArg dest, OpArg shift);
482 void RCR(int bits, OpArg dest, OpArg shift);
483 void SHL(int bits, OpArg dest, OpArg shift);
484 void SHR(int bits, OpArg dest, OpArg shift);
485 void SAR(int bits, OpArg dest, OpArg shift);
486
487 // Bit Test
488 void BT(int bits, OpArg dest, OpArg index);
489 void BTS(int bits, OpArg dest, OpArg index);
490 void BTR(int bits, OpArg dest, OpArg index);
491 void BTC(int bits, OpArg dest, OpArg index);
492
493 // Double-Precision Shift
494 void SHRD(int bits, OpArg dest, OpArg src, OpArg shift);
495 void SHLD(int bits, OpArg dest, OpArg src, OpArg shift);
496
497 // Extend EAX into EDX in various ways
498 void CWD(int bits = 16);
499 inline void CDQ() {CWD(32);}
500 inline void CQO() {CWD(64);}
501 void CBW(int bits = 8);
502 inline void CWDE() {CBW(16);}
503 inline void CDQE() {CBW(32);}
504
505 // Load effective address
506 void LEA(int bits, X64Reg dest, OpArg src);
507
508 // Integer arithmetic
509 void NEG (int bits, OpArg src);
510 void ADD (int bits, const OpArg &a1, const OpArg &a2);
511 void ADC (int bits, const OpArg &a1, const OpArg &a2);
512 void SUB (int bits, const OpArg &a1, const OpArg &a2);
513 void SBB (int bits, const OpArg &a1, const OpArg &a2);
514 void AND (int bits, const OpArg &a1, const OpArg &a2);
515 void CMP (int bits, const OpArg &a1, const OpArg &a2);
516
517 // Bit operations
518 void NOT (int bits, OpArg src);
519 void OR (int bits, const OpArg &a1, const OpArg &a2);
520 void XOR (int bits, const OpArg &a1, const OpArg &a2);
521 void MOV (int bits, const OpArg &a1, const OpArg &a2);
522 void TEST(int bits, const OpArg &a1, const OpArg &a2);
523
524 // Are these useful at all? Consider removing.
525 void XCHG(int bits, const OpArg &a1, const OpArg &a2);
526 void XCHG_AHAL();
527
528 // Byte swapping (32 and 64-bit only).
529 void BSWAP(int bits, X64Reg reg);
530
531 // Sign/zero extension
532 void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary
533 void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
534
535 // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe.
536 void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
537
538 // Available only on AMD >= Phenom or Intel >= Haswell
539 void LZCNT(int bits, X64Reg dest, OpArg src);
540 // Note: this one is actually part of BMI1
541 void TZCNT(int bits, X64Reg dest, OpArg src);
542
543 // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
544 void STMXCSR(OpArg memloc);
545 void LDMXCSR(OpArg memloc);
546
547 // Prefixes
548 void LOCK();
549 void REP();
550 void REPNE();
551 void FSOverride();
552 void GSOverride();
553
554 // x87
555 enum x87StatusWordBits {
556 x87_InvalidOperation = 0x1,
557 x87_DenormalizedOperand = 0x2,
558 x87_DivisionByZero = 0x4,
559 x87_Overflow = 0x8,
560 x87_Underflow = 0x10,
561 x87_Precision = 0x20,
562 x87_StackFault = 0x40,
563 x87_ErrorSummary = 0x80,
564 x87_C0 = 0x100,
565 x87_C1 = 0x200,
566 x87_C2 = 0x400,
567 x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
568 x87_C3 = 0x4000,
569 x87_FPUBusy = 0x8000,
570 };
571
572 void FLD(int bits, OpArg src);
573 void FST(int bits, OpArg dest);
574 void FSTP(int bits, OpArg dest);
575 void FNSTSW_AX();
576 void FWAIT();
577
578 // SSE/SSE2: Floating point arithmetic
579 void ADDSS(X64Reg regOp, OpArg arg);
580 void ADDSD(X64Reg regOp, OpArg arg);
581 void SUBSS(X64Reg regOp, OpArg arg);
582 void SUBSD(X64Reg regOp, OpArg arg);
583 void MULSS(X64Reg regOp, OpArg arg);
584 void MULSD(X64Reg regOp, OpArg arg);
585 void DIVSS(X64Reg regOp, OpArg arg);
586 void DIVSD(X64Reg regOp, OpArg arg);
587 void MINSS(X64Reg regOp, OpArg arg);
588 void MINSD(X64Reg regOp, OpArg arg);
589 void MAXSS(X64Reg regOp, OpArg arg);
590 void MAXSD(X64Reg regOp, OpArg arg);
591 void SQRTSS(X64Reg regOp, OpArg arg);
592 void SQRTSD(X64Reg regOp, OpArg arg);
593 void RSQRTSS(X64Reg regOp, OpArg arg);
594
595 // SSE/SSE2: Floating point bitwise (yes)
596 void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
597 void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
598
599 inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
600 inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
601 inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
602 inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
603 inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
604 inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
605 inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
606
607 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
608 void ADDPS(X64Reg regOp, OpArg arg);
609 void ADDPD(X64Reg regOp, OpArg arg);
610 void SUBPS(X64Reg regOp, OpArg arg);
611 void SUBPD(X64Reg regOp, OpArg arg);
612 void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
613 void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
614 void MULPS(X64Reg regOp, OpArg arg);
615 void MULPD(X64Reg regOp, OpArg arg);
616 void DIVPS(X64Reg regOp, OpArg arg);
617 void DIVPD(X64Reg regOp, OpArg arg);
618 void MINPS(X64Reg regOp, OpArg arg);
619 void MINPD(X64Reg regOp, OpArg arg);
620 void MAXPS(X64Reg regOp, OpArg arg);
621 void MAXPD(X64Reg regOp, OpArg arg);
622 void SQRTPS(X64Reg regOp, OpArg arg);
623 void SQRTPD(X64Reg regOp, OpArg arg);
624 void RCPPS(X64Reg regOp, OpArg arg);
625 void RSQRTPS(X64Reg regOp, OpArg arg);
626
627 // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
628 void ANDPS(X64Reg regOp, OpArg arg);
629 void ANDPD(X64Reg regOp, OpArg arg);
630 void ANDNPS(X64Reg regOp, OpArg arg);
631 void ANDNPD(X64Reg regOp, OpArg arg);
632 void ORPS(X64Reg regOp, OpArg arg);
633 void ORPD(X64Reg regOp, OpArg arg);
634 void XORPS(X64Reg regOp, OpArg arg);
635 void XORPD(X64Reg regOp, OpArg arg);
636
637 // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
638 void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
639 void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
640
641 // SSE/SSE2: Useful alternative to shuffle in some cases.
642 void MOVDDUP(X64Reg regOp, OpArg arg);
643
644 // TODO: Actually implement
645#if 0
646 // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
647 void ADDSUBPS(X64Reg dest, OpArg src);
648 void ADDSUBPD(X64Reg dest, OpArg src);
649 void HADDPD(X64Reg dest, OpArg src);
650 void HSUBPS(X64Reg dest, OpArg src);
651 void HSUBPD(X64Reg dest, OpArg src);
652
653 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
654 void DPPD(X64Reg dest, OpArg src, u8 arg);
655
656 // These are probably useful for VFPU emulation.
657 void INSERTPS(X64Reg dest, OpArg src, u8 arg);
658 void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
659#endif
660
661 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
662 void HADDPS(X64Reg dest, OpArg src);
663
664 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
665 void DPPS(X64Reg dest, OpArg src, u8 arg);
666
667 void UNPCKLPS(X64Reg dest, OpArg src);
668 void UNPCKHPS(X64Reg dest, OpArg src);
669 void UNPCKLPD(X64Reg dest, OpArg src);
670 void UNPCKHPD(X64Reg dest, OpArg src);
671
672 // SSE/SSE2: Compares.
673 void COMISS(X64Reg regOp, OpArg arg);
674 void COMISD(X64Reg regOp, OpArg arg);
675 void UCOMISS(X64Reg regOp, OpArg arg);
676 void UCOMISD(X64Reg regOp, OpArg arg);
677
678 // SSE/SSE2: Moves. Use the right data type for your data, in most cases.
679 void MOVAPS(X64Reg regOp, OpArg arg);
680 void MOVAPD(X64Reg regOp, OpArg arg);
681 void MOVAPS(OpArg arg, X64Reg regOp);
682 void MOVAPD(OpArg arg, X64Reg regOp);
683
684 void MOVUPS(X64Reg regOp, OpArg arg);
685 void MOVUPD(X64Reg regOp, OpArg arg);
686 void MOVUPS(OpArg arg, X64Reg regOp);
687 void MOVUPD(OpArg arg, X64Reg regOp);
688
689 void MOVDQA(X64Reg regOp, OpArg arg);
690 void MOVDQA(OpArg arg, X64Reg regOp);
691 void MOVDQU(X64Reg regOp, OpArg arg);
692 void MOVDQU(OpArg arg, X64Reg regOp);
693
694 void MOVSS(X64Reg regOp, OpArg arg);
695 void MOVSD(X64Reg regOp, OpArg arg);
696 void MOVSS(OpArg arg, X64Reg regOp);
697 void MOVSD(OpArg arg, X64Reg regOp);
698
699 void MOVLPS(X64Reg regOp, OpArg arg);
700 void MOVLPD(X64Reg regOp, OpArg arg);
701 void MOVLPS(OpArg arg, X64Reg regOp);
702 void MOVLPD(OpArg arg, X64Reg regOp);
703
704 void MOVHPS(X64Reg regOp, OpArg arg);
705 void MOVHPD(X64Reg regOp, OpArg arg);
706 void MOVHPS(OpArg arg, X64Reg regOp);
707 void MOVHPD(OpArg arg, X64Reg regOp);
708
709 void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
710 void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
711
712 void MOVD_xmm(X64Reg dest, const OpArg &arg);
713 void MOVQ_xmm(X64Reg dest, OpArg arg);
714 void MOVD_xmm(const OpArg &arg, X64Reg src);
715 void MOVQ_xmm(OpArg arg, X64Reg src);
716
717 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
718 void MOVMSKPS(X64Reg dest, OpArg arg);
719 void MOVMSKPD(X64Reg dest, OpArg arg);
720
721 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
722 void MASKMOVDQU(X64Reg dest, X64Reg src);
723 void LDDQU(X64Reg dest, OpArg src);
724
725 // SSE/SSE2: Data type conversions.
726 void CVTPS2PD(X64Reg dest, OpArg src);
727 void CVTPD2PS(X64Reg dest, OpArg src);
728 void CVTSS2SD(X64Reg dest, OpArg src);
729 void CVTSI2SS(X64Reg dest, OpArg src);
730 void CVTSD2SS(X64Reg dest, OpArg src);
731 void CVTSI2SD(X64Reg dest, OpArg src);
732 void CVTDQ2PD(X64Reg regOp, OpArg arg);
733 void CVTPD2DQ(X64Reg regOp, OpArg arg);
734 void CVTDQ2PS(X64Reg regOp, OpArg arg);
735 void CVTPS2DQ(X64Reg regOp, OpArg arg);
736
737 void CVTTPS2DQ(X64Reg regOp, OpArg arg);
738 void CVTTPD2DQ(X64Reg regOp, OpArg arg);
739
740 // Destinations are X64 regs (rax, rbx, ...) for these instructions.
741 void CVTSS2SI(X64Reg xregdest, OpArg src);
742 void CVTSD2SI(X64Reg xregdest, OpArg src);
743 void CVTTSS2SI(X64Reg xregdest, OpArg arg);
744 void CVTTSD2SI(X64Reg xregdest, OpArg arg);
745
746 // SSE2: Packed integer instructions
747 void PACKSSDW(X64Reg dest, OpArg arg);
748 void PACKSSWB(X64Reg dest, OpArg arg);
749 void PACKUSDW(X64Reg dest, OpArg arg);
750 void PACKUSWB(X64Reg dest, OpArg arg);
751
752 void PUNPCKLBW(X64Reg dest, const OpArg &arg);
753 void PUNPCKLWD(X64Reg dest, const OpArg &arg);
754 void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
755 void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
756
757 void PTEST(X64Reg dest, OpArg arg);
758 void PAND(X64Reg dest, OpArg arg);
759 void PANDN(X64Reg dest, OpArg arg);
760 void PXOR(X64Reg dest, OpArg arg);
761 void POR(X64Reg dest, OpArg arg);
762
763 void PADDB(X64Reg dest, OpArg arg);
764 void PADDW(X64Reg dest, OpArg arg);
765 void PADDD(X64Reg dest, OpArg arg);
766 void PADDQ(X64Reg dest, OpArg arg);
767
768 void PADDSB(X64Reg dest, OpArg arg);
769 void PADDSW(X64Reg dest, OpArg arg);
770 void PADDUSB(X64Reg dest, OpArg arg);
771 void PADDUSW(X64Reg dest, OpArg arg);
772
773 void PSUBB(X64Reg dest, OpArg arg);
774 void PSUBW(X64Reg dest, OpArg arg);
775 void PSUBD(X64Reg dest, OpArg arg);
776 void PSUBQ(X64Reg dest, OpArg arg);
777
778 void PSUBSB(X64Reg dest, OpArg arg);
779 void PSUBSW(X64Reg dest, OpArg arg);
780 void PSUBUSB(X64Reg dest, OpArg arg);
781 void PSUBUSW(X64Reg dest, OpArg arg);
782
783 void PAVGB(X64Reg dest, OpArg arg);
784 void PAVGW(X64Reg dest, OpArg arg);
785
786 void PCMPEQB(X64Reg dest, OpArg arg);
787 void PCMPEQW(X64Reg dest, OpArg arg);
788 void PCMPEQD(X64Reg dest, OpArg arg);
789
790 void PCMPGTB(X64Reg dest, OpArg arg);
791 void PCMPGTW(X64Reg dest, OpArg arg);
792 void PCMPGTD(X64Reg dest, OpArg arg);
793
794 void PEXTRW(X64Reg dest, OpArg arg, u8 subreg);
795 void PINSRW(X64Reg dest, OpArg arg, u8 subreg);
796
797 void PMADDWD(X64Reg dest, OpArg arg);
798 void PSADBW(X64Reg dest, OpArg arg);
799
800 void PMAXSW(X64Reg dest, OpArg arg);
801 void PMAXUB(X64Reg dest, OpArg arg);
802 void PMINSW(X64Reg dest, OpArg arg);
803 void PMINUB(X64Reg dest, OpArg arg);
804 // SSE4: More MAX/MIN instructions.
805 void PMINSB(X64Reg dest, OpArg arg);
806 void PMINSD(X64Reg dest, OpArg arg);
807 void PMINUW(X64Reg dest, OpArg arg);
808 void PMINUD(X64Reg dest, OpArg arg);
809 void PMAXSB(X64Reg dest, OpArg arg);
810 void PMAXSD(X64Reg dest, OpArg arg);
811 void PMAXUW(X64Reg dest, OpArg arg);
812 void PMAXUD(X64Reg dest, OpArg arg);
813
814 void PMOVMSKB(X64Reg dest, OpArg arg);
815 void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
816 void PSHUFB(X64Reg dest, OpArg arg);
817
818 void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
819 void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
820
821 void PSRLW(X64Reg reg, int shift);
822 void PSRLD(X64Reg reg, int shift);
823 void PSRLQ(X64Reg reg, int shift);
824 void PSRLQ(X64Reg reg, OpArg arg);
825 void PSRLDQ(X64Reg reg, int shift);
826
827 void PSLLW(X64Reg reg, int shift);
828 void PSLLD(X64Reg reg, int shift);
829 void PSLLQ(X64Reg reg, int shift);
830 void PSLLDQ(X64Reg reg, int shift);
831
832 void PSRAW(X64Reg reg, int shift);
833 void PSRAD(X64Reg reg, int shift);
834
835 // SSE4: data type conversions
836 void PMOVSXBW(X64Reg dest, OpArg arg);
837 void PMOVSXBD(X64Reg dest, OpArg arg);
838 void PMOVSXBQ(X64Reg dest, OpArg arg);
839 void PMOVSXWD(X64Reg dest, OpArg arg);
840 void PMOVSXWQ(X64Reg dest, OpArg arg);
841 void PMOVSXDQ(X64Reg dest, OpArg arg);
842 void PMOVZXBW(X64Reg dest, OpArg arg);
843 void PMOVZXBD(X64Reg dest, OpArg arg);
844 void PMOVZXBQ(X64Reg dest, OpArg arg);
845 void PMOVZXWD(X64Reg dest, OpArg arg);
846 void PMOVZXWQ(X64Reg dest, OpArg arg);
847 void PMOVZXDQ(X64Reg dest, OpArg arg);
848
849 // SSE4: variable blend instructions (xmm0 implicit argument)
850 void PBLENDVB(X64Reg dest, OpArg arg);
851 void BLENDVPS(X64Reg dest, OpArg arg);
852 void BLENDVPD(X64Reg dest, OpArg arg);
853 void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
854 void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
855
856 // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
857 void ROUNDSS(X64Reg dest, OpArg arg, u8 mode);
858 void ROUNDSD(X64Reg dest, OpArg arg, u8 mode);
859 void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
860 void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
861
862 inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
863 inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
864 inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
865 inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
866
867 inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
868 inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
869 inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
870 inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
871
872 inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
873 inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
874 inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
875 inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
876
877 inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
878 inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
879 inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
880 inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
881
882 // AVX
883 void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
884 void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
885 void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
886 void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
887 void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
888 void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
889 void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
890 void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
891 void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
892 void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
893 void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
894 void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
895
896 void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
897 void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
898 void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
899 void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
900 void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
901 void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
902 void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
903 void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
904
905 void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
906 void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
907 void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
908 void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
909
910 // FMA3
911 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
912 void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
913 void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
914 void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
915 void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
916 void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
917 void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
918 void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
919 void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
920 void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
921 void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
922 void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
923 void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
924 void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
925 void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
926 void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
927 void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
928 void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
929 void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
930 void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
931 void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
932 void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
933 void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
934 void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
935 void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
936 void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
937 void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
938 void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
939 void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
940 void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
941 void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
942 void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
943 void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
944 void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
945 void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
946 void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
947 void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
948 void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
949 void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
950 void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
951 void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
952 void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
953 void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
954 void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
955 void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
956 void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
957 void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
958 void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
959 void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
960 void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
961 void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
962 void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
963 void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
964 void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
965 void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
966 void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
967 void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
968 void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
969 void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
970 void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
971
972 // VEX GPR instructions
973 void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
974 void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
975 void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
976 void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
977 void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
978 void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
979 void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
980 void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
981 void BLSR(int bits, X64Reg regOp, OpArg arg);
982 void BLSMSK(int bits, X64Reg regOp, OpArg arg);
983 void BLSI(int bits, X64Reg regOp, OpArg arg);
984 void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
985 void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
986
987 void RDTSC();
988
989 // Utility functions
990 // The difference between this and CALL is that this aligns the stack
991 // where appropriate.
992 void ABI_CallFunction(const void *func);
993 template <typename T>
994 void ABI_CallFunction(T (*func)()) {
995 ABI_CallFunction((const void *)func);
996 }
997
998 void ABI_CallFunction(const u8 *func) {
999 ABI_CallFunction((const void *)func);
1000 }
1001 void ABI_CallFunctionC16(const void *func, u16 param1);
1002 void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
1003
1004
1005 // These only support u32 parameters, but that's enough for a lot of uses.
1006 // These will destroy the 1 or 2 first "parameter regs".
1007 void ABI_CallFunctionC(const void *func, u32 param1);
1008 void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2);
1009 void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3);
1010 void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3);
1011 void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4);
1012 void ABI_CallFunctionP(const void *func, void *param1);
1013 void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2);
1014 void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3);
1015 void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3);
1016 void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2);
1017 void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3);
1018 void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1);
1019 void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
1020
1021 // Pass a register as a parameter.
1022 void ABI_CallFunctionR(const void *func, X64Reg reg1);
1023 void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
1024
1025 template <typename Tr, typename T1>
1026 void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
1027 ABI_CallFunctionC((const void *)func, param1);
1028 }
1029
1030 // A function that doesn't have any control over what it will do to regs,
1031 // such as the dispatcher, should be surrounded by these.
1032 void ABI_PushAllCalleeSavedRegsAndAdjustStack();
1033 void ABI_PopAllCalleeSavedRegsAndAdjustStack();
1034
1035 // A function that doesn't know anything about it's surroundings, should
1036 // be surrounded by these to establish a safe environment, where it can roam free.
1037 // An example is a backpatch injected function.
1038 void ABI_PushAllCallerSavedRegsAndAdjustStack();
1039 void ABI_PopAllCallerSavedRegsAndAdjustStack();
1040
1041 unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize);
1042 void ABI_AlignStack(unsigned int frameSize);
1043 void ABI_RestoreStack(unsigned int frameSize);
1044
1045 // Sets up a __cdecl function.
1046 // Only x64 really needs the parameter count.
1047 void ABI_EmitPrologue(int maxCallParams);
1048 void ABI_EmitEpilogue(int maxCallParams);
1049
1050 #ifdef _M_IX86
1051 inline int ABI_GetNumXMMRegs() { return 8; }
1052 #else
1053 inline int ABI_GetNumXMMRegs() { return 16; }
1054 #endif
1055}; // class XEmitter
1056
1057
1058// Everything that needs to generate X86 code should inherit from this.
1059// You get memory management for free, plus, you can use all the MOV etc functions without
1060// having to prefix them with gen-> or something similar.
1061
1062class XCodeBlock : public CodeBlock<XEmitter> {
1063public:
1064 void PoisonMemory() override;
1065};
1066
1067} // namespace