summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt30
-rw-r--r--src/common/bit_set.h33
-rw-r--r--src/common/common_paths.h1
-rw-r--r--src/common/file_util.cpp4
-rw-r--r--src/common/hash.cpp8
-rw-r--r--src/common/hash.h5
-rw-r--r--src/common/logging/backend.cpp2
-rw-r--r--src/common/logging/log.h2
-rw-r--r--src/common/profiler.cpp101
-rw-r--r--src/common/profiler_reporting.h83
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/scm_rev.h1
-rw-r--r--src/common/synchronized_wrapper.h43
-rw-r--r--src/common/x64/abi.cpp350
-rw-r--r--src/common/x64/abi.h58
-rw-r--r--src/common/x64/cpu_detect.cpp12
-rw-r--r--src/common/x64/emitter.cpp2583
-rw-r--r--src/common/x64/emitter.h1206
18 files changed, 92 insertions, 4432 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index a7a4a688c..8a6170257 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,4 +1,27 @@
1# Generate cpp with Git revision from template 1# Generate cpp with Git revision from template
2# Also if this is a CI build, add the build name (ie: Nightly, Bleeding Edge) to the scm_rev file as well
3set(REPO_NAME "")
4if ($ENV{CI})
5 if ($ENV{TRAVIS})
6 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
7 elseif($ENV{APPVEYOR})
8 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
9 endif()
10 # regex capture the string nightly or bleeding-edge into CMAKE_MATCH_1
11 string(REGEX MATCH "citra-emu/citra-?(.*)" OUTVAR ${BUILD_REPOSITORY})
12 if (${CMAKE_MATCH_COUNT} GREATER 0)
13 # capitalize the first letter of each word in the repo name.
14 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
15 foreach(WORD ${REPO_NAME_LIST})
16 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
17 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
18 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
19 # this leaves a trailing space on the last word, but we actually want that
20 # because of how its styled in the title bar.
21 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER} ")
22 endforeach()
23 endif()
24endif()
2configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) 25configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY)
3 26
4set(SRCS 27set(SRCS
@@ -12,7 +35,6 @@ set(SRCS
12 memory_util.cpp 35 memory_util.cpp
13 microprofile.cpp 36 microprofile.cpp
14 misc.cpp 37 misc.cpp
15 profiler.cpp
16 scm_rev.cpp 38 scm_rev.cpp
17 string_util.cpp 39 string_util.cpp
18 symbols.cpp 40 symbols.cpp
@@ -45,7 +67,6 @@ set(HEADERS
45 microprofile.h 67 microprofile.h
46 microprofileui.h 68 microprofileui.h
47 platform.h 69 platform.h
48 profiler_reporting.h
49 quaternion.h 70 quaternion.h
50 scm_rev.h 71 scm_rev.h
51 scope_exit.h 72 scope_exit.h
@@ -61,14 +82,11 @@ set(HEADERS
61 82
62if(ARCHITECTURE_x86_64) 83if(ARCHITECTURE_x86_64)
63 set(SRCS ${SRCS} 84 set(SRCS ${SRCS}
64 x64/abi.cpp
65 x64/cpu_detect.cpp 85 x64/cpu_detect.cpp
66 x64/emitter.cpp) 86 )
67 87
68 set(HEADERS ${HEADERS} 88 set(HEADERS ${HEADERS}
69 x64/abi.h
70 x64/cpu_detect.h 89 x64/cpu_detect.h
71 x64/emitter.h
72 x64/xbyak_abi.h 90 x64/xbyak_abi.h
73 x64/xbyak_util.h 91 x64/xbyak_util.h
74 ) 92 )
diff --git a/src/common/bit_set.h b/src/common/bit_set.h
index 3059d0cb0..9c2e6b28c 100644
--- a/src/common/bit_set.h
+++ b/src/common/bit_set.h
@@ -121,22 +121,19 @@ public:
121 class Iterator { 121 class Iterator {
122 public: 122 public:
123 Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {} 123 Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
124 Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {} 124 Iterator(IntTy val) : m_val(val), m_bit(0) {}
125 Iterator& operator=(Iterator other) { 125 Iterator& operator=(Iterator other) {
126 new (this) Iterator(other); 126 new (this) Iterator(other);
127 return *this; 127 return *this;
128 } 128 }
129 int operator*() { 129 int operator*() {
130 return m_bit; 130 return m_bit + ComputeLsb();
131 } 131 }
132 Iterator& operator++() { 132 Iterator& operator++() {
133 if (m_val == 0) { 133 int lsb = ComputeLsb();
134 m_bit = -1; 134 m_val >>= lsb + 1;
135 } else { 135 m_bit += lsb + 1;
136 int bit = LeastSignificantSetBit(m_val); 136 m_has_lsb = false;
137 m_val &= ~(1 << bit);
138 m_bit = bit;
139 }
140 return *this; 137 return *this;
141 } 138 }
142 Iterator operator++(int _) { 139 Iterator operator++(int _) {
@@ -145,15 +142,24 @@ public:
145 return other; 142 return other;
146 } 143 }
147 bool operator==(Iterator other) const { 144 bool operator==(Iterator other) const {
148 return m_bit == other.m_bit; 145 return m_val == other.m_val;
149 } 146 }
150 bool operator!=(Iterator other) const { 147 bool operator!=(Iterator other) const {
151 return m_bit != other.m_bit; 148 return m_val != other.m_val;
152 } 149 }
153 150
154 private: 151 private:
152 int ComputeLsb() {
153 if (!m_has_lsb) {
154 m_lsb = LeastSignificantSetBit(m_val);
155 m_has_lsb = true;
156 }
157 return m_lsb;
158 }
155 IntTy m_val; 159 IntTy m_val;
156 int m_bit; 160 int m_bit;
161 int m_lsb = -1;
162 bool m_has_lsb = false;
157 }; 163 };
158 164
159 BitSet() : m_val(0) {} 165 BitSet() : m_val(0) {}
@@ -221,11 +227,10 @@ public:
221 } 227 }
222 228
223 Iterator begin() const { 229 Iterator begin() const {
224 Iterator it(m_val, 0); 230 return Iterator(m_val);
225 return ++it;
226 } 231 }
227 Iterator end() const { 232 Iterator end() const {
228 return Iterator(m_val, -1); 233 return Iterator(0);
229 } 234 }
230 235
231 IntTy m_val; 236 IntTy m_val;
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index b56105306..d5b510cdb 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -45,3 +45,4 @@
45 45
46// Sys files 46// Sys files
47#define SHARED_FONT "shared_font.bin" 47#define SHARED_FONT "shared_font.bin"
48#define AES_KEYS "aes_keys.txt"
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 1a1f5d9b5..df234c225 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -303,7 +303,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
303 // copy loop 303 // copy loop
304 while (!feof(input)) { 304 while (!feof(input)) {
305 // read input 305 // read input
306 int rnum = fread(buffer, sizeof(char), BSIZE, input); 306 size_t rnum = fread(buffer, sizeof(char), BSIZE, input);
307 if (rnum != BSIZE) { 307 if (rnum != BSIZE) {
308 if (ferror(input) != 0) { 308 if (ferror(input) != 0) {
309 LOG_ERROR(Common_Filesystem, "failed reading from source, %s --> %s: %s", 309 LOG_ERROR(Common_Filesystem, "failed reading from source, %s --> %s: %s",
@@ -313,7 +313,7 @@ bool Copy(const std::string& srcFilename, const std::string& destFilename) {
313 } 313 }
314 314
315 // write output 315 // write output
316 int wnum = fwrite(buffer, sizeof(char), rnum, output); 316 size_t wnum = fwrite(buffer, sizeof(char), rnum, output);
317 if (wnum != rnum) { 317 if (wnum != rnum) {
318 LOG_ERROR(Common_Filesystem, "failed writing to output, %s --> %s: %s", 318 LOG_ERROR(Common_Filesystem, "failed writing to output, %s --> %s: %s",
319 srcFilename.c_str(), destFilename.c_str(), GetLastErrorMsg()); 319 srcFilename.c_str(), destFilename.c_str(), GetLastErrorMsg());
diff --git a/src/common/hash.cpp b/src/common/hash.cpp
index 2309320bb..f3d390dc5 100644
--- a/src/common/hash.cpp
+++ b/src/common/hash.cpp
@@ -16,7 +16,7 @@ namespace Common {
16 16
17// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do 17// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
18// the conversion here 18// the conversion here
19static FORCE_INLINE u64 getblock64(const u64* p, int i) { 19static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
20 return p[i]; 20 return p[i];
21} 21}
22 22
@@ -34,9 +34,9 @@ static FORCE_INLINE u64 fmix64(u64 k) {
34// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit 34// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
35// platforms (MurmurHash3_x64_128). It was taken from: 35// platforms (MurmurHash3_x64_128). It was taken from:
36// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 36// https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
37void MurmurHash3_128(const void* key, int len, u32 seed, void* out) { 37void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
38 const u8* data = (const u8*)key; 38 const u8* data = (const u8*)key;
39 const int nblocks = len / 16; 39 const size_t nblocks = len / 16;
40 40
41 u64 h1 = seed; 41 u64 h1 = seed;
42 u64 h2 = seed; 42 u64 h2 = seed;
@@ -48,7 +48,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out) {
48 48
49 const u64* blocks = (const u64*)(data); 49 const u64* blocks = (const u64*)(data);
50 50
51 for (int i = 0; i < nblocks; i++) { 51 for (size_t i = 0; i < nblocks; i++) {
52 u64 k1 = getblock64(blocks, i * 2 + 0); 52 u64 k1 = getblock64(blocks, i * 2 + 0);
53 u64 k2 = getblock64(blocks, i * 2 + 1); 53 u64 k2 = getblock64(blocks, i * 2 + 1);
54 54
diff --git a/src/common/hash.h b/src/common/hash.h
index a3850be68..ee2560dad 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -4,11 +4,12 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
7#include "common/common_types.h" 8#include "common/common_types.h"
8 9
9namespace Common { 10namespace Common {
10 11
11void MurmurHash3_128(const void* key, int len, u32 seed, void* out); 12void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
12 13
13/** 14/**
14 * Computes a 64-bit hash over the specified block of data 15 * Computes a 64-bit hash over the specified block of data
@@ -16,7 +17,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out);
16 * @param len Length of data (in bytes) to compute hash over 17 * @param len Length of data (in bytes) to compute hash over
17 * @returns 64-bit hash value that was computed over the data block 18 * @returns 64-bit hash value that was computed over the data block
18 */ 19 */
19static inline u64 ComputeHash64(const void* data, int len) { 20static inline u64 ComputeHash64(const void* data, size_t len) {
20 u64 res[2]; 21 u64 res[2];
21 MurmurHash3_128(data, len, 0, res); 22 MurmurHash3_128(data, len, 0, res);
22 return res[0]; 23 return res[0];
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 2ef3e6b05..737e1d57f 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -55,6 +55,7 @@ namespace Log {
55 SUB(Service, DSP) \ 55 SUB(Service, DSP) \
56 SUB(Service, DLP) \ 56 SUB(Service, DLP) \
57 SUB(Service, HID) \ 57 SUB(Service, HID) \
58 SUB(Service, HTTP) \
58 SUB(Service, SOC) \ 59 SUB(Service, SOC) \
59 SUB(Service, IR) \ 60 SUB(Service, IR) \
60 SUB(Service, Y2R) \ 61 SUB(Service, Y2R) \
@@ -62,6 +63,7 @@ namespace Log {
62 SUB(HW, Memory) \ 63 SUB(HW, Memory) \
63 SUB(HW, LCD) \ 64 SUB(HW, LCD) \
64 SUB(HW, GPU) \ 65 SUB(HW, GPU) \
66 SUB(HW, AES) \
65 CLS(Frontend) \ 67 CLS(Frontend) \
66 CLS(Render) \ 68 CLS(Render) \
67 SUB(Render, Software) \ 69 SUB(Render, Software) \
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 4330ef879..4b0f8ff03 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -72,6 +72,7 @@ enum class Class : ClassType {
72 Service_DSP, ///< The DSP (DSP control) service 72 Service_DSP, ///< The DSP (DSP control) service
73 Service_DLP, ///< The DLP (Download Play) service 73 Service_DLP, ///< The DLP (Download Play) service
74 Service_HID, ///< The HID (Human interface device) service 74 Service_HID, ///< The HID (Human interface device) service
75 Service_HTTP, ///< The HTTP service
75 Service_SOC, ///< The SOC (Socket) service 76 Service_SOC, ///< The SOC (Socket) service
76 Service_IR, ///< The IR service 77 Service_IR, ///< The IR service
77 Service_Y2R, ///< The Y2R (YUV to RGB conversion) service 78 Service_Y2R, ///< The Y2R (YUV to RGB conversion) service
@@ -79,6 +80,7 @@ enum class Class : ClassType {
79 HW_Memory, ///< Memory-map and address translation 80 HW_Memory, ///< Memory-map and address translation
80 HW_LCD, ///< LCD register emulation 81 HW_LCD, ///< LCD register emulation
81 HW_GPU, ///< GPU control emulation 82 HW_GPU, ///< GPU control emulation
83 HW_AES, ///< AES engine emulation
82 Frontend, ///< Emulator UI 84 Frontend, ///< Emulator UI
83 Render, ///< Emulator video output and hardware acceleration 85 Render, ///< Emulator video output and hardware acceleration
84 Render_Software, ///< Software renderer backend 86 Render_Software, ///< Software renderer backend
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
deleted file mode 100644
index b40e7205d..000000000
--- a/src/common/profiler.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <vector>
8#include "common/assert.h"
9#include "common/profiler_reporting.h"
10#include "common/synchronized_wrapper.h"
11
12namespace Common {
13namespace Profiling {
14
15ProfilingManager::ProfilingManager()
16 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {}
17
18void ProfilingManager::BeginFrame() {
19 this_frame_start = Clock::now();
20}
21
22void ProfilingManager::FinishFrame() {
23 Clock::time_point now = Clock::now();
24
25 results.interframe_time = now - last_frame_end;
26 results.frame_time = now - this_frame_start;
27
28 last_frame_end = now;
29}
30
31TimingResultsAggregator::TimingResultsAggregator(size_t window_size)
32 : max_window_size(window_size), window_size(0) {
33 interframe_times.resize(window_size, Duration::zero());
34 frame_times.resize(window_size, Duration::zero());
35}
36
37void TimingResultsAggregator::Clear() {
38 window_size = cursor = 0;
39}
40
41void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
42 interframe_times[cursor] = frame_result.interframe_time;
43 frame_times[cursor] = frame_result.frame_time;
44
45 ++cursor;
46 if (cursor == max_window_size)
47 cursor = 0;
48 if (window_size < max_window_size)
49 ++window_size;
50}
51
52static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) {
53 AggregatedDuration result;
54 result.avg = Duration::zero();
55 result.min = result.max = (len == 0 ? Duration::zero() : v[0]);
56
57 for (size_t i = 0; i < len; ++i) {
58 Duration value = v[i];
59 result.avg += value;
60 result.min = std::min(result.min, value);
61 result.max = std::max(result.max, value);
62 }
63 if (len != 0)
64 result.avg /= len;
65
66 return result;
67}
68
69static float tof(Common::Profiling::Duration dur) {
70 using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
71 return std::chrono::duration_cast<FloatMs>(dur).count();
72}
73
74AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
75 AggregatedFrameResult result;
76
77 result.interframe_time = AggregateField(interframe_times, window_size);
78 result.frame_time = AggregateField(frame_times, window_size);
79
80 if (result.interframe_time.avg != Duration::zero()) {
81 result.fps = 1000.0f / tof(result.interframe_time.avg);
82 } else {
83 result.fps = 0.0f;
84 }
85
86 return result;
87}
88
89ProfilingManager& GetProfilingManager() {
90 // Takes advantage of "magic" static initialization for race-free initialization.
91 static ProfilingManager manager;
92 return manager;
93}
94
95SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() {
96 static SynchronizedWrapper<TimingResultsAggregator> aggregator(30);
97 return SynchronizedRef<TimingResultsAggregator>(aggregator);
98}
99
100} // namespace Profiling
101} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
deleted file mode 100644
index e9ce6d41c..000000000
--- a/src/common/profiler_reporting.h
+++ /dev/null
@@ -1,83 +0,0 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <cstddef>
9#include <vector>
10#include "common/synchronized_wrapper.h"
11
12namespace Common {
13namespace Profiling {
14
15using Clock = std::chrono::high_resolution_clock;
16using Duration = Clock::duration;
17
18struct ProfilingFrameResult {
19 /// Time since the last delivered frame
20 Duration interframe_time;
21
22 /// Time spent processing a frame, excluding VSync
23 Duration frame_time;
24};
25
26class ProfilingManager final {
27public:
28 ProfilingManager();
29
30 /// This should be called after swapping screen buffers.
31 void BeginFrame();
32 /// This should be called before swapping screen buffers.
33 void FinishFrame();
34
35 /// Get the timing results from the previous frame. This is updated when you call FinishFrame().
36 const ProfilingFrameResult& GetPreviousFrameResults() const {
37 return results;
38 }
39
40private:
41 Clock::time_point last_frame_end;
42 Clock::time_point this_frame_start;
43
44 ProfilingFrameResult results;
45};
46
47struct AggregatedDuration {
48 Duration avg, min, max;
49};
50
51struct AggregatedFrameResult {
52 /// Time since the last delivered frame
53 AggregatedDuration interframe_time;
54
55 /// Time spent processing a frame, excluding VSync
56 AggregatedDuration frame_time;
57
58 float fps;
59};
60
61class TimingResultsAggregator final {
62public:
63 TimingResultsAggregator(size_t window_size);
64
65 void Clear();
66
67 void AddFrame(const ProfilingFrameResult& frame_result);
68
69 AggregatedFrameResult GetAggregatedResults() const;
70
71 size_t max_window_size;
72 size_t window_size;
73 size_t cursor;
74
75 std::vector<Duration> interframe_times;
76 std::vector<Duration> frame_times;
77};
78
79ProfilingManager& GetProfilingManager();
80SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator();
81
82} // namespace Profiling
83} // namespace Common
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 79b404bb8..0080db5d5 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -7,12 +7,14 @@
7#define GIT_REV "@GIT_REV@" 7#define GIT_REV "@GIT_REV@"
8#define GIT_BRANCH "@GIT_BRANCH@" 8#define GIT_BRANCH "@GIT_BRANCH@"
9#define GIT_DESC "@GIT_DESC@" 9#define GIT_DESC "@GIT_DESC@"
10#define BUILD_NAME "@REPO_NAME@"
10 11
11namespace Common { 12namespace Common {
12 13
13const char g_scm_rev[] = GIT_REV; 14const char g_scm_rev[] = GIT_REV;
14const char g_scm_branch[] = GIT_BRANCH; 15const char g_scm_branch[] = GIT_BRANCH;
15const char g_scm_desc[] = GIT_DESC; 16const char g_scm_desc[] = GIT_DESC;
17const char g_build_name[] = BUILD_NAME;
16 18
17} // namespace 19} // namespace
18 20
diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h
index 0ef190afa..e22389803 100644
--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -9,5 +9,6 @@ namespace Common {
9extern const char g_scm_rev[]; 9extern const char g_scm_rev[];
10extern const char g_scm_branch[]; 10extern const char g_scm_branch[];
11extern const char g_scm_desc[]; 11extern const char g_scm_desc[];
12extern const char g_build_name[];
12 13
13} // namespace 14} // namespace
diff --git a/src/common/synchronized_wrapper.h b/src/common/synchronized_wrapper.h
index 04b4f2e51..4a1984c46 100644
--- a/src/common/synchronized_wrapper.h
+++ b/src/common/synchronized_wrapper.h
@@ -9,25 +9,8 @@
9 9
10namespace Common { 10namespace Common {
11 11
12/**
13 * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
14 * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
15 * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type
16 * (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
17 */
18template <typename T> 12template <typename T>
19class SynchronizedWrapper { 13class SynchronizedWrapper;
20public:
21 template <typename... Args>
22 SynchronizedWrapper(Args&&... args) : data(std::forward<Args>(args)...) {}
23
24private:
25 template <typename U>
26 friend class SynchronizedRef;
27
28 std::mutex mutex;
29 T data;
30};
31 14
32/** 15/**
33 * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This 16 * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
@@ -75,4 +58,28 @@ private:
75 SynchronizedWrapper<T>* wrapper; 58 SynchronizedWrapper<T>* wrapper;
76}; 59};
77 60
61/**
62 * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
63 * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
64 * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type
65 * (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
66 */
67template <typename T>
68class SynchronizedWrapper {
69public:
70 template <typename... Args>
71 SynchronizedWrapper(Args&&... args) : data(std::forward<Args>(args)...) {}
72
73 SynchronizedRef<T> Lock() {
74 return {*this};
75 }
76
77private:
78 template <typename U>
79 friend class SynchronizedRef;
80
81 std::mutex mutex;
82 T data;
83};
84
78} // namespace Common 85} // namespace Common
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp
deleted file mode 100644
index 504b9c940..000000000
--- a/src/common/x64/abi.cpp
+++ /dev/null
@@ -1,350 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include "abi.h"
19#include "emitter.h"
20
21using namespace Gen;
22
23// Shared code between Win64 and Unix64
24
25void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
26 size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
27 size_t shadow = 0;
28#if defined(_WIN32)
29 shadow = 0x20;
30#endif
31
32 int count = (mask & ABI_ALL_GPRS).Count();
33 rsp_alignment -= count * 8;
34 size_t subtraction = 0;
35 int fpr_count = (mask & ABI_ALL_FPRS).Count();
36 if (fpr_count) {
37 // If we have any XMMs to save, we must align the stack here.
38 subtraction = rsp_alignment & 0xf;
39 }
40 subtraction += 16 * fpr_count;
41 size_t xmm_base_subtraction = subtraction;
42 subtraction += needed_frame_size;
43 subtraction += shadow;
44 // Final alignment.
45 rsp_alignment -= subtraction;
46 subtraction += rsp_alignment & 0xf;
47
48 *shadowp = shadow;
49 *subtractionp = subtraction;
50 *xmm_offsetp = subtraction - xmm_base_subtraction;
51}
52
53size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
54 size_t needed_frame_size) {
55 size_t shadow, subtraction, xmm_offset;
56 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
57 &xmm_offset);
58
59 for (int r : mask& ABI_ALL_GPRS)
60 PUSH((X64Reg)r);
61
62 if (subtraction)
63 SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
64
65 for (int x : mask& ABI_ALL_FPRS) {
66 MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
67 xmm_offset += 16;
68 }
69
70 return shadow;
71}
72
73void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
74 size_t needed_frame_size) {
75 size_t shadow, subtraction, xmm_offset;
76 ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction,
77 &xmm_offset);
78
79 for (int x : mask& ABI_ALL_FPRS) {
80 MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset));
81 xmm_offset += 16;
82 }
83
84 if (subtraction)
85 ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
86
87 for (int r = 15; r >= 0; r--) {
88 if (mask[r])
89 POP((X64Reg)r);
90 }
91}
92
93// Common functions
94void XEmitter::ABI_CallFunction(const void* func) {
95 u64 distance = u64(func) - (u64(code) + 5);
96 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
97 // Far call
98 MOV(64, R(RAX), ImmPtr(func));
99 CALLptr(R(RAX));
100 } else {
101 CALL(func);
102 }
103}
104
105void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) {
106 MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
107 u64 distance = u64(func) - (u64(code) + 5);
108 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
109 // Far call
110 MOV(64, R(RAX), ImmPtr(func));
111 CALLptr(R(RAX));
112 } else {
113 CALL(func);
114 }
115}
116
117void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) {
118 MOV(32, R(ABI_PARAM1), Imm32(param1));
119 MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
120 u64 distance = u64(func) - (u64(code) + 5);
121 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
122 // Far call
123 MOV(64, R(RAX), ImmPtr(func));
124 CALLptr(R(RAX));
125 } else {
126 CALL(func);
127 }
128}
129
130void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) {
131 MOV(32, R(ABI_PARAM1), Imm32(param1));
132 u64 distance = u64(func) - (u64(code) + 5);
133 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
134 // Far call
135 MOV(64, R(RAX), ImmPtr(func));
136 CALLptr(R(RAX));
137 } else {
138 CALL(func);
139 }
140}
141
142void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) {
143 MOV(32, R(ABI_PARAM1), Imm32(param1));
144 MOV(32, R(ABI_PARAM2), Imm32(param2));
145 u64 distance = u64(func) - (u64(code) + 5);
146 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
147 // Far call
148 MOV(64, R(RAX), ImmPtr(func));
149 CALLptr(R(RAX));
150 } else {
151 CALL(func);
152 }
153}
154
155void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) {
156 MOV(32, R(ABI_PARAM1), Imm32(param1));
157 MOV(32, R(ABI_PARAM2), Imm32(param2));
158 MOV(32, R(ABI_PARAM3), Imm32(param3));
159 u64 distance = u64(func) - (u64(code) + 5);
160 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
161 // Far call
162 MOV(64, R(RAX), ImmPtr(func));
163 CALLptr(R(RAX));
164 } else {
165 CALL(func);
166 }
167}
168
169void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) {
170 MOV(32, R(ABI_PARAM1), Imm32(param1));
171 MOV(32, R(ABI_PARAM2), Imm32(param2));
172 MOV(64, R(ABI_PARAM3), ImmPtr(param3));
173 u64 distance = u64(func) - (u64(code) + 5);
174 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
175 // Far call
176 MOV(64, R(RAX), ImmPtr(func));
177 CALLptr(R(RAX));
178 } else {
179 CALL(func);
180 }
181}
182
183void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3,
184 void* param4) {
185 MOV(32, R(ABI_PARAM1), Imm32(param1));
186 MOV(32, R(ABI_PARAM2), Imm32(param2));
187 MOV(32, R(ABI_PARAM3), Imm32(param3));
188 MOV(64, R(ABI_PARAM4), ImmPtr(param4));
189 u64 distance = u64(func) - (u64(code) + 5);
190 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
191 // Far call
192 MOV(64, R(RAX), ImmPtr(func));
193 CALLptr(R(RAX));
194 } else {
195 CALL(func);
196 }
197}
198
199void XEmitter::ABI_CallFunctionP(const void* func, void* param1) {
200 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
201 u64 distance = u64(func) - (u64(code) + 5);
202 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
203 // Far call
204 MOV(64, R(RAX), ImmPtr(func));
205 CALLptr(R(RAX));
206 } else {
207 CALL(func);
208 }
209}
210
211void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) {
212 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
213 if (!arg2.IsSimpleReg(ABI_PARAM2))
214 MOV(32, R(ABI_PARAM2), arg2);
215 u64 distance = u64(func) - (u64(code) + 5);
216 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
217 // Far call
218 MOV(64, R(RAX), ImmPtr(func));
219 CALLptr(R(RAX));
220 } else {
221 CALL(func);
222 }
223}
224
225void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2,
226 const Gen::OpArg& arg3) {
227 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
228 if (!arg2.IsSimpleReg(ABI_PARAM2))
229 MOV(32, R(ABI_PARAM2), arg2);
230 if (!arg3.IsSimpleReg(ABI_PARAM3))
231 MOV(32, R(ABI_PARAM3), arg3);
232 u64 distance = u64(func) - (u64(code) + 5);
233 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
234 // Far call
235 MOV(64, R(RAX), ImmPtr(func));
236 CALLptr(R(RAX));
237 } else {
238 CALL(func);
239 }
240}
241
242void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) {
243 MOV(64, R(ABI_PARAM1), ImmPtr(param1));
244 MOV(64, R(ABI_PARAM2), ImmPtr(param2));
245 MOV(32, R(ABI_PARAM3), Imm32(param3));
246 u64 distance = u64(func) - (u64(code) + 5);
247 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
248 // Far call
249 MOV(64, R(RAX), ImmPtr(func));
250 CALLptr(R(RAX));
251 } else {
252 CALL(func);
253 }
254}
255
256// Pass a register as a parameter.
257void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) {
258 if (reg1 != ABI_PARAM1)
259 MOV(32, R(ABI_PARAM1), R(reg1));
260 u64 distance = u64(func) - (u64(code) + 5);
261 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
262 // Far call
263 MOV(64, R(RAX), ImmPtr(func));
264 CALLptr(R(RAX));
265 } else {
266 CALL(func);
267 }
268}
269
270// Pass two registers as parameters.
271void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) {
272 if (reg2 != ABI_PARAM1) {
273 if (reg1 != ABI_PARAM1)
274 MOV(64, R(ABI_PARAM1), R(reg1));
275 if (reg2 != ABI_PARAM2)
276 MOV(64, R(ABI_PARAM2), R(reg2));
277 } else {
278 if (reg2 != ABI_PARAM2)
279 MOV(64, R(ABI_PARAM2), R(reg2));
280 if (reg1 != ABI_PARAM1)
281 MOV(64, R(ABI_PARAM1), R(reg1));
282 }
283 u64 distance = u64(func) - (u64(code) + 5);
284 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
285 // Far call
286 MOV(64, R(RAX), ImmPtr(func));
287 CALLptr(R(RAX));
288 } else {
289 CALL(func);
290 }
291}
292
293void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) {
294 if (!arg1.IsSimpleReg(ABI_PARAM1))
295 MOV(32, R(ABI_PARAM1), arg1);
296 MOV(32, R(ABI_PARAM2), Imm32(param2));
297 u64 distance = u64(func) - (u64(code) + 5);
298 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
299 // Far call
300 MOV(64, R(RAX), ImmPtr(func));
301 CALLptr(R(RAX));
302 } else {
303 CALL(func);
304 }
305}
306
307void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2,
308 u32 param3) {
309 if (!arg1.IsSimpleReg(ABI_PARAM1))
310 MOV(32, R(ABI_PARAM1), arg1);
311 MOV(32, R(ABI_PARAM2), Imm32(param2));
312 MOV(64, R(ABI_PARAM3), Imm64(param3));
313 u64 distance = u64(func) - (u64(code) + 5);
314 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
315 // Far call
316 MOV(64, R(RAX), ImmPtr(func));
317 CALLptr(R(RAX));
318 } else {
319 CALL(func);
320 }
321}
322
323void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) {
324 if (!arg1.IsSimpleReg(ABI_PARAM1))
325 MOV(32, R(ABI_PARAM1), arg1);
326 u64 distance = u64(func) - (u64(code) + 5);
327 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
328 // Far call
329 MOV(64, R(RAX), ImmPtr(func));
330 CALLptr(R(RAX));
331 } else {
332 CALL(func);
333 }
334}
335
336void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1,
337 const Gen::OpArg& arg2) {
338 if (!arg1.IsSimpleReg(ABI_PARAM1))
339 MOV(32, R(ABI_PARAM1), arg1);
340 if (!arg2.IsSimpleReg(ABI_PARAM2))
341 MOV(32, R(ABI_PARAM2), arg2);
342 u64 distance = u64(func) - (u64(code) + 5);
343 if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) {
344 // Far call
345 MOV(64, R(RAX), ImmPtr(func));
346 CALLptr(R(RAX));
347 } else {
348 CALL(func);
349 }
350} \ No newline at end of file
diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h
deleted file mode 100644
index eaaf81d89..000000000
--- a/src/common/x64/abi.h
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2008 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_set.h"
8#include "emitter.h"
9
10// x64 ABI:s, and helpers to help follow them when JIT-ing code.
11// All convensions return values in EAX (+ possibly EDX).
12
13// Windows 64-bit
14// * 4-reg "fastcall" variant, very new-skool stack handling
15// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself
16// calls_
17// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
18// Scratch: RAX RCX RDX R8 R9 R10 R11
19// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
20// Parameters: RCX RDX R8 R9, further MOV-ed
21
22// Linux 64-bit
23// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
24// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
25// Callee-save: RBX RBP R12 R13 R14 R15
26// Parameters: RDI RSI RDX RCX R8 R9
27
28#define ABI_ALL_FPRS BitSet32(0xffff0000)
29#define ABI_ALL_GPRS BitSet32(0x0000ffff)
30
31#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
32
33#define ABI_PARAM1 RCX
34#define ABI_PARAM2 RDX
35#define ABI_PARAM3 R8
36#define ABI_PARAM4 R9
37
38// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
39#define ABI_ALL_CALLER_SAVED \
40 (BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
41 XMM4 + 16, XMM5 + 16})
42#else // 64-bit Unix / OS X
43
44#define ABI_PARAM1 RDI
45#define ABI_PARAM2 RSI
46#define ABI_PARAM3 RDX
47#define ABI_PARAM4 RCX
48#define ABI_PARAM5 R8
49#define ABI_PARAM6 R9
50
51// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
52// don't actually clobber them.
53#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
54#endif // WIN32
55
56#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
57
58#define ABI_RETURN RAX
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 370ae2c80..2cb3ab9cc 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -8,9 +8,9 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "cpu_detect.h" 9#include "cpu_detect.h"
10 10
11namespace Common { 11#ifdef _MSC_VER
12 12#include <intrin.h>
13#ifndef _MSC_VER 13#else
14 14
15#if defined(__DragonFly__) || defined(__FreeBSD__) 15#if defined(__DragonFly__) || defined(__FreeBSD__)
16// clang-format off 16// clang-format off
@@ -37,13 +37,15 @@ static inline void __cpuid(int info[4], int function_id) {
37} 37}
38 38
39#define _XCR_XFEATURE_ENABLED_MASK 0 39#define _XCR_XFEATURE_ENABLED_MASK 0
40static u64 _xgetbv(u32 index) { 40static inline u64 _xgetbv(u32 index) {
41 u32 eax, edx; 41 u32 eax, edx;
42 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); 42 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
43 return ((u64)edx << 32) | eax; 43 return ((u64)edx << 32) | eax;
44} 44}
45 45
46#endif // ifndef _MSC_VER 46#endif // _MSC_VER
47
48namespace Common {
47 49
48// Detects the various CPU features 50// Detects the various CPU features
49static CPUCaps Detect() { 51static CPUCaps Detect() {
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
deleted file mode 100644
index f5930abec..000000000
--- a/src/common/x64/emitter.cpp
+++ /dev/null
@@ -1,2583 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#include <cinttypes>
19#include <cstring>
20#include "abi.h"
21#include "common/assert.h"
22#include "common/logging/log.h"
23#include "common/memory_util.h"
24#include "cpu_detect.h"
25#include "emitter.h"
26
27namespace Gen {
28
29struct NormalOpDef {
30 u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext;
31};
32
33// 0xCC is code for invalid combination of immediates
34static const NormalOpDef normalops[11] = {
35 {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD
36 {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC
37
38 {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB
39 {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB
40
41 {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND
42 {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR
43
44 {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR
45 {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV
46
47 {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from)
48 {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP
49
50 {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG
51};
52
53enum NormalSSEOps {
54 sseCMP = 0xC2,
55 sseADD = 0x58, // ADD
56 sseSUB = 0x5C, // SUB
57 sseAND = 0x54, // AND
58 sseANDN = 0x55, // ANDN
59 sseOR = 0x56,
60 sseXOR = 0x57,
61 sseMUL = 0x59, // MUL
62 sseDIV = 0x5E, // DIV
63 sseMIN = 0x5D, // MIN
64 sseMAX = 0x5F, // MAX
65 sseCOMIS = 0x2F, // COMIS
66 sseUCOMIS = 0x2E, // UCOMIS
67 sseSQRT = 0x51, // SQRT
68 sseRSQRT = 0x52, // RSQRT (NO DOUBLE PRECISION!!!)
69 sseRCP = 0x53, // RCP
70 sseMOVAPfromRM = 0x28, // MOVAP from RM
71 sseMOVAPtoRM = 0x29, // MOVAP to RM
72 sseMOVUPfromRM = 0x10, // MOVUP from RM
73 sseMOVUPtoRM = 0x11, // MOVUP to RM
74 sseMOVLPfromRM = 0x12,
75 sseMOVLPtoRM = 0x13,
76 sseMOVHPfromRM = 0x16,
77 sseMOVHPtoRM = 0x17,
78 sseMOVHLPS = 0x12,
79 sseMOVLHPS = 0x16,
80 sseMOVDQfromRM = 0x6F,
81 sseMOVDQtoRM = 0x7F,
82 sseMASKMOVDQU = 0xF7,
83 sseLDDQU = 0xF0,
84 sseSHUF = 0xC6,
85 sseMOVNTDQ = 0xE7,
86 sseMOVNTP = 0x2B,
87 sseHADD = 0x7C,
88};
89
90void XEmitter::SetCodePtr(u8* ptr) {
91 code = ptr;
92}
93
94const u8* XEmitter::GetCodePtr() const {
95 return code;
96}
97
98u8* XEmitter::GetWritableCodePtr() {
99 return code;
100}
101
102void XEmitter::Write8(u8 value) {
103 *code++ = value;
104}
105
106void XEmitter::Write16(u16 value) {
107 std::memcpy(code, &value, sizeof(u16));
108 code += sizeof(u16);
109}
110
111void XEmitter::Write32(u32 value) {
112 std::memcpy(code, &value, sizeof(u32));
113 code += sizeof(u32);
114}
115
116void XEmitter::Write64(u64 value) {
117 std::memcpy(code, &value, sizeof(u64));
118 code += sizeof(u64);
119}
120
121void XEmitter::ReserveCodeSpace(int bytes) {
122 for (int i = 0; i < bytes; i++)
123 *code++ = 0xCC;
124}
125
126const u8* XEmitter::AlignCode4() {
127 int c = int((u64)code & 3);
128 if (c)
129 ReserveCodeSpace(4 - c);
130 return code;
131}
132
133const u8* XEmitter::AlignCode16() {
134 int c = int((u64)code & 15);
135 if (c)
136 ReserveCodeSpace(16 - c);
137 return code;
138}
139
140const u8* XEmitter::AlignCodePage() {
141 int c = int((u64)code & 4095);
142 if (c)
143 ReserveCodeSpace(4096 - c);
144 return code;
145}
146
147// This operation modifies flags; check to see the flags are locked.
148// If the flags are locked, we should immediately and loudly fail before
149// causing a subtle JIT bug.
150void XEmitter::CheckFlags() {
151 ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!");
152}
153
154void XEmitter::WriteModRM(int mod, int reg, int rm) {
155 Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7)));
156}
157
158void XEmitter::WriteSIB(int scale, int index, int base) {
159 Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7)));
160}
161
162void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const {
163 if (customOp == -1)
164 customOp = operandReg;
165#ifdef ARCHITECTURE_x86_64
166 u8 op = 0x40;
167 // REX.W (whether operation is a 64-bit operation)
168 if (opBits == 64)
169 op |= 8;
170 // REX.R (whether ModR/M reg field refers to R8-R15.
171 if (customOp & 8)
172 op |= 4;
173 // REX.X (whether ModR/M SIB index field refers to R8-R15)
174 if (indexReg & 8)
175 op |= 2;
176 // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15)
177 if (offsetOrBaseReg & 8)
178 op |= 1;
179 // Write REX if wr have REX bits to write, or if the operation accesses
180 // SIL, DIL, BPL, or SPL.
181 if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) ||
182 (opBits == 8 && (customOp & 0x10c) == 4)) {
183 emit->Write8(op);
184 // Check the operation doesn't access AH, BH, CH, or DH.
185 DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0);
186 DEBUG_ASSERT((customOp & 0x100) == 0);
187 }
188#else
189 DEBUG_ASSERT(opBits != 64);
190 DEBUG_ASSERT((customOp & 8) == 0 || customOp == -1);
191 DEBUG_ASSERT((indexReg & 8) == 0);
192 DEBUG_ASSERT((offsetOrBaseReg & 8) == 0);
193 DEBUG_ASSERT(opBits != 8 || (customOp & 0x10c) != 4 || customOp == -1);
194 DEBUG_ASSERT(scale == SCALE_ATREG || bits != 8 || (offsetOrBaseReg & 0x10c) != 4);
195#endif
196}
197
198void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
199 int W) const {
200 int R = !(regOp1 & 8);
201 int X = !(indexReg & 8);
202 int B = !(offsetOrBaseReg & 8);
203
204 int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
205
206 // do we need any VEX fields that only appear in the three-byte form?
207 if (X == 1 && B == 1 && W == 0 && mmmmm == 1) {
208 u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp;
209 emit->Write8(0xC5);
210 emit->Write8(RvvvvLpp);
211 } else {
212 u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm;
213 u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp;
214 emit->Write8(0xC4);
215 emit->Write8(RXBmmmmm);
216 emit->Write8(WvvvvLpp);
217 }
218}
219
220void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg,
221 bool warn_64bit_offset) const {
222 if (_operandReg == INVALID_REG)
223 _operandReg = (X64Reg)this->operandReg;
224 int mod = 0;
225 int ireg = indexReg;
226 bool SIB = false;
227 int _offsetOrBaseReg = this->offsetOrBaseReg;
228
229 if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address
230 {
231 // Oh, RIP addressing.
232 _offsetOrBaseReg = 5;
233 emit->WriteModRM(0, _operandReg, _offsetOrBaseReg);
234// TODO : add some checks
235#ifdef ARCHITECTURE_x86_64
236 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes;
237 s64 distance = (s64)offset - (s64)ripAddr;
238 ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset,
239 "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr,
240 offset);
241 s32 offs = (s32)distance;
242 emit->Write32((u32)offs);
243#else
244 emit->Write32((u32)offset);
245#endif
246 return;
247 }
248
249 if (scale == 0) {
250 // Oh, no memory, Just a reg.
251 mod = 3; // 11
252 } else if (scale >= 1) {
253 // Ah good, no scaling.
254 if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) {
255 // Okay, we're good. No SIB necessary.
256 int ioff = (int)offset;
257 if (ioff == 0) {
258 mod = 0;
259 } else if (ioff < -128 || ioff > 127) {
260 mod = 2; // 32-bit displacement
261 } else {
262 mod = 1; // 8-bit displacement
263 }
264 } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) {
265 SIB = true;
266 mod = 0;
267 _offsetOrBaseReg = 5;
268 } else // if (scale != SCALE_ATREG)
269 {
270 if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :(
271 {
272 // So we have to fake it with SIB encoding :(
273 SIB = true;
274 }
275
276 if (scale >= SCALE_1 && scale < SCALE_ATREG) {
277 SIB = true;
278 }
279
280 if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) {
281 SIB = true;
282 ireg = _offsetOrBaseReg;
283 }
284
285 // Okay, we're fine. Just disp encoding.
286 // We need displacement. Which size?
287 int ioff = (int)(s64)offset;
288 if (ioff < -128 || ioff > 127) {
289 mod = 2; // 32-bit displacement
290 } else {
291 mod = 1; // 8-bit displacement
292 }
293 }
294 }
295
296 // Okay. Time to do the actual writing
297 // ModRM byte:
298 int oreg = _offsetOrBaseReg;
299 if (SIB)
300 oreg = 4;
301
302 // TODO(ector): WTF is this if about? I don't remember writing it :-)
303 // if (RIP)
304 // oreg = 5;
305
306 emit->WriteModRM(mod, _operandReg & 7, oreg & 7);
307
308 if (SIB) {
309 // SIB byte
310 int ss;
311 switch (scale) {
312 case SCALE_NONE:
313 _offsetOrBaseReg = 4;
314 ss = 0;
315 break; // RSP
316 case SCALE_1:
317 ss = 0;
318 break;
319 case SCALE_2:
320 ss = 1;
321 break;
322 case SCALE_4:
323 ss = 2;
324 break;
325 case SCALE_8:
326 ss = 3;
327 break;
328 case SCALE_NOBASE_2:
329 ss = 1;
330 break;
331 case SCALE_NOBASE_4:
332 ss = 2;
333 break;
334 case SCALE_NOBASE_8:
335 ss = 3;
336 break;
337 case SCALE_ATREG:
338 ss = 0;
339 break;
340 default:
341 ASSERT_MSG(0, "Invalid scale for SIB byte");
342 ss = 0;
343 break;
344 }
345 emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7)));
346 }
347
348 if (mod == 1) // 8-bit disp
349 {
350 emit->Write8((u8)(s8)(s32)offset);
351 } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp
352 {
353 emit->Write32((u32)offset);
354 }
355}
356
357// W = operand extended width (1 if 64-bit)
358// R = register# upper bit
359// X = scale amnt upper bit
360// B = base register# upper bit
361void XEmitter::Rex(int w, int r, int x, int b) {
362 w = w ? 1 : 0;
363 r = r ? 1 : 0;
364 x = x ? 1 : 0;
365 b = b ? 1 : 0;
366 u8 rx = (u8)(0x40 | (w << 3) | (r << 2) | (x << 1) | (b));
367 if (rx != 0x40)
368 Write8(rx);
369}
370
371void XEmitter::JMP(const u8* addr, bool force5Bytes) {
372 u64 fn = (u64)addr;
373 if (!force5Bytes) {
374 s64 distance = (s64)(fn - ((u64)code + 2));
375 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
376 "Jump target too far away, needs force5Bytes = true");
377 // 8 bits will do
378 Write8(0xEB);
379 Write8((u8)(s8)distance);
380 } else {
381 s64 distance = (s64)(fn - ((u64)code + 5));
382
383 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
384 "Jump target too far away, needs indirect register");
385 Write8(0xE9);
386 Write32((u32)(s32)distance);
387 }
388}
389
390void XEmitter::JMPptr(const OpArg& arg2) {
391 OpArg arg = arg2;
392 if (arg.IsImm())
393 ASSERT_MSG(0, "JMPptr - Imm argument");
394 arg.operandReg = 4;
395 arg.WriteRex(this, 0, 0);
396 Write8(0xFF);
397 arg.WriteRest(this);
398}
399
400// Can be used to trap other processors, before overwriting their code
401// not used in dolphin
402void XEmitter::JMPself() {
403 Write8(0xEB);
404 Write8(0xFE);
405}
406
407void XEmitter::CALLptr(OpArg arg) {
408 if (arg.IsImm())
409 ASSERT_MSG(0, "CALLptr - Imm argument");
410 arg.operandReg = 2;
411 arg.WriteRex(this, 0, 0);
412 Write8(0xFF);
413 arg.WriteRest(this);
414}
415
416void XEmitter::CALL(const void* fnptr) {
417 u64 distance = u64(fnptr) - (u64(code) + 5);
418 ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL,
419 "CALL out of range (%p calls %p)", code, fnptr);
420 Write8(0xE8);
421 Write32(u32(distance));
422}
423
424FixupBranch XEmitter::CALL() {
425 FixupBranch branch;
426 branch.type = 1;
427 branch.ptr = code + 5;
428
429 Write8(0xE8);
430 Write32(0);
431
432 return branch;
433}
434
435FixupBranch XEmitter::J(bool force5bytes) {
436 FixupBranch branch;
437 branch.type = force5bytes ? 1 : 0;
438 branch.ptr = code + (force5bytes ? 5 : 2);
439 if (!force5bytes) {
440 // 8 bits will do
441 Write8(0xEB);
442 Write8(0);
443 } else {
444 Write8(0xE9);
445 Write32(0);
446 }
447 return branch;
448}
449
450FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) {
451 FixupBranch branch;
452 branch.type = force5bytes ? 1 : 0;
453 branch.ptr = code + (force5bytes ? 6 : 2);
454 if (!force5bytes) {
455 // 8 bits will do
456 Write8(0x70 + conditionCode);
457 Write8(0);
458 } else {
459 Write8(0x0F);
460 Write8(0x80 + conditionCode);
461 Write32(0);
462 }
463 return branch;
464}
465
466void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) {
467 u64 fn = (u64)addr;
468 s64 distance = (s64)(fn - ((u64)code + 2));
469 if (distance < -0x80 || distance >= 0x80 || force5bytes) {
470 distance = (s64)(fn - ((u64)code + 6));
471 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
472 "Jump target too far away, needs indirect register");
473 Write8(0x0F);
474 Write8(0x80 + conditionCode);
475 Write32((u32)(s32)distance);
476 } else {
477 Write8(0x70 + conditionCode);
478 Write8((u8)(s8)distance);
479 }
480}
481
482void XEmitter::SetJumpTarget(const FixupBranch& branch) {
483 if (branch.type == 0) {
484 s64 distance = (s64)(code - branch.ptr);
485 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
486 "Jump target too far away, needs force5Bytes = true");
487 branch.ptr[-1] = (u8)(s8)distance;
488 } else if (branch.type == 1) {
489 s64 distance = (s64)(code - branch.ptr);
490 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
491 "Jump target too far away, needs indirect register");
492 ((s32*)branch.ptr)[-1] = (s32)distance;
493 }
494}
495
496void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) {
497 if (branch.type == 0) {
498 s64 distance = (s64)(target - branch.ptr);
499 ASSERT_MSG(distance >= -0x80 && distance < 0x80,
500 "Jump target too far away, needs force5Bytes = true");
501 branch.ptr[-1] = (u8)(s8)distance;
502 } else if (branch.type == 1) {
503 s64 distance = (s64)(target - branch.ptr);
504 ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL,
505 "Jump target too far away, needs indirect register");
506 ((s32*)branch.ptr)[-1] = (s32)distance;
507 }
508}
509
510// Single byte opcodes
511// There is no PUSHAD/POPAD in 64-bit mode.
512void XEmitter::INT3() {
513 Write8(0xCC);
514}
515void XEmitter::RET() {
516 Write8(0xC3);
517}
518void XEmitter::RET_FAST() {
519 Write8(0xF3);
520 Write8(0xC3);
521} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a
522 // ret
523
524// The first sign of decadence: optimized NOPs.
525void XEmitter::NOP(size_t size) {
526 DEBUG_ASSERT((int)size > 0);
527 while (true) {
528 switch (size) {
529 case 0:
530 return;
531 case 1:
532 Write8(0x90);
533 return;
534 case 2:
535 Write8(0x66);
536 Write8(0x90);
537 return;
538 case 3:
539 Write8(0x0F);
540 Write8(0x1F);
541 Write8(0x00);
542 return;
543 case 4:
544 Write8(0x0F);
545 Write8(0x1F);
546 Write8(0x40);
547 Write8(0x00);
548 return;
549 case 5:
550 Write8(0x0F);
551 Write8(0x1F);
552 Write8(0x44);
553 Write8(0x00);
554 Write8(0x00);
555 return;
556 case 6:
557 Write8(0x66);
558 Write8(0x0F);
559 Write8(0x1F);
560 Write8(0x44);
561 Write8(0x00);
562 Write8(0x00);
563 return;
564 case 7:
565 Write8(0x0F);
566 Write8(0x1F);
567 Write8(0x80);
568 Write8(0x00);
569 Write8(0x00);
570 Write8(0x00);
571 Write8(0x00);
572 return;
573 case 8:
574 Write8(0x0F);
575 Write8(0x1F);
576 Write8(0x84);
577 Write8(0x00);
578 Write8(0x00);
579 Write8(0x00);
580 Write8(0x00);
581 Write8(0x00);
582 return;
583 case 9:
584 Write8(0x66);
585 Write8(0x0F);
586 Write8(0x1F);
587 Write8(0x84);
588 Write8(0x00);
589 Write8(0x00);
590 Write8(0x00);
591 Write8(0x00);
592 Write8(0x00);
593 return;
594 case 10:
595 Write8(0x66);
596 Write8(0x66);
597 Write8(0x0F);
598 Write8(0x1F);
599 Write8(0x84);
600 Write8(0x00);
601 Write8(0x00);
602 Write8(0x00);
603 Write8(0x00);
604 Write8(0x00);
605 return;
606 default:
607 // Even though x86 instructions are allowed to be up to 15 bytes long,
608 // AMD advises against using NOPs longer than 11 bytes because they
609 // carry a performance penalty on CPUs older than AMD family 16h.
610 Write8(0x66);
611 Write8(0x66);
612 Write8(0x66);
613 Write8(0x0F);
614 Write8(0x1F);
615 Write8(0x84);
616 Write8(0x00);
617 Write8(0x00);
618 Write8(0x00);
619 Write8(0x00);
620 Write8(0x00);
621 size -= 11;
622 continue;
623 }
624 }
625}
626
627void XEmitter::PAUSE() {
628 Write8(0xF3);
629 NOP();
630} // use in tight spinloops for energy saving on some cpu
631void XEmitter::CLC() {
632 CheckFlags();
633 Write8(0xF8);
634} // clear carry
635void XEmitter::CMC() {
636 CheckFlags();
637 Write8(0xF5);
638} // flip carry
639void XEmitter::STC() {
640 CheckFlags();
641 Write8(0xF9);
642} // set carry
643
644// TODO: xchg ah, al ???
645void XEmitter::XCHG_AHAL() {
646 Write8(0x86);
647 Write8(0xe0);
648 // alt. 86 c4
649}
650
651// These two can not be executed on early Intel 64-bit CPU:s, only on AMD!
652void XEmitter::LAHF() {
653 Write8(0x9F);
654}
655void XEmitter::SAHF() {
656 CheckFlags();
657 Write8(0x9E);
658}
659
660void XEmitter::PUSHF() {
661 Write8(0x9C);
662}
663void XEmitter::POPF() {
664 CheckFlags();
665 Write8(0x9D);
666}
667
668void XEmitter::LFENCE() {
669 Write8(0x0F);
670 Write8(0xAE);
671 Write8(0xE8);
672}
673void XEmitter::MFENCE() {
674 Write8(0x0F);
675 Write8(0xAE);
676 Write8(0xF0);
677}
678void XEmitter::SFENCE() {
679 Write8(0x0F);
680 Write8(0xAE);
681 Write8(0xF8);
682}
683
684void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) {
685 if (bits == 16)
686 Write8(0x66);
687 Rex(bits == 64, 0, 0, (int)reg >> 3);
688 Write8(byte + ((int)reg & 7));
689}
690
691void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) {
692 if (bits == 16)
693 Write8(0x66);
694 Rex(bits == 64, 0, 0, (int)reg >> 3);
695 Write8(byte1);
696 Write8(byte2 + ((int)reg & 7));
697}
698
699void XEmitter::CWD(int bits) {
700 if (bits == 16)
701 Write8(0x66);
702 Rex(bits == 64, 0, 0, 0);
703 Write8(0x99);
704}
705
706void XEmitter::CBW(int bits) {
707 if (bits == 8)
708 Write8(0x66);
709 Rex(bits == 32, 0, 0, 0);
710 Write8(0x98);
711}
712
713// Simple opcodes
714
715// push/pop do not need wide to be 64-bit
716void XEmitter::PUSH(X64Reg reg) {
717 WriteSimple1Byte(32, 0x50, reg);
718}
719void XEmitter::POP(X64Reg reg) {
720 WriteSimple1Byte(32, 0x58, reg);
721}
722
723void XEmitter::PUSH(int bits, const OpArg& reg) {
724 if (reg.IsSimpleReg())
725 PUSH(reg.GetSimpleReg());
726 else if (reg.IsImm()) {
727 switch (reg.GetImmBits()) {
728 case 8:
729 Write8(0x6A);
730 Write8((u8)(s8)reg.offset);
731 break;
732 case 16:
733 Write8(0x66);
734 Write8(0x68);
735 Write16((u16)(s16)(s32)reg.offset);
736 break;
737 case 32:
738 Write8(0x68);
739 Write32((u32)reg.offset);
740 break;
741 default:
742 ASSERT_MSG(0, "PUSH - Bad imm bits");
743 break;
744 }
745 } else {
746 if (bits == 16)
747 Write8(0x66);
748 reg.WriteRex(this, bits, bits);
749 Write8(0xFF);
750 reg.WriteRest(this, 0, (X64Reg)6);
751 }
752}
753
754void XEmitter::POP(int /*bits*/, const OpArg& reg) {
755 if (reg.IsSimpleReg())
756 POP(reg.GetSimpleReg());
757 else
758 ASSERT_MSG(0, "POP - Unsupported encoding");
759}
760
761void XEmitter::BSWAP(int bits, X64Reg reg) {
762 if (bits >= 32) {
763 WriteSimple2Byte(bits, 0x0F, 0xC8, reg);
764 } else if (bits == 16) {
765 ROL(16, R(reg), Imm8(8));
766 } else if (bits == 8) {
767 // Do nothing - can't bswap a single byte...
768 } else {
769 ASSERT_MSG(0, "BSWAP - Wrong number of bits");
770 }
771}
772
773// Undefined opcode - reserved
774// If we ever need a way to always cause a non-breakpoint hard exception...
775void XEmitter::UD2() {
776 Write8(0x0F);
777 Write8(0x0B);
778}
779
780void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) {
781 ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument");
782 arg.operandReg = (u8)level;
783 arg.WriteRex(this, 0, 0);
784 Write8(0x0F);
785 Write8(0x18);
786 arg.WriteRest(this);
787}
788
789void XEmitter::SETcc(CCFlags flag, OpArg dest) {
790 ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument");
791 dest.operandReg = 0;
792 dest.WriteRex(this, 0, 8);
793 Write8(0x0F);
794 Write8(0x90 + (u8)flag);
795 dest.WriteRest(this);
796}
797
798void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) {
799 ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument");
800 ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported");
801 if (bits == 16)
802 Write8(0x66);
803 src.operandReg = dest;
804 src.WriteRex(this, bits, bits);
805 Write8(0x0F);
806 Write8(0x40 + (u8)flag);
807 src.WriteRest(this);
808}
809
810void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) {
811 ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument");
812 CheckFlags();
813 src.operandReg = ext;
814 if (bits == 16)
815 Write8(0x66);
816 src.WriteRex(this, bits, bits, 0);
817 if (bits == 8) {
818 Write8(0xF6);
819 } else {
820 Write8(0xF7);
821 }
822 src.WriteRest(this);
823}
824
825void XEmitter::MUL(int bits, const OpArg& src) {
826 WriteMulDivType(bits, src, 4);
827}
828void XEmitter::DIV(int bits, const OpArg& src) {
829 WriteMulDivType(bits, src, 6);
830}
831void XEmitter::IMUL(int bits, const OpArg& src) {
832 WriteMulDivType(bits, src, 5);
833}
834void XEmitter::IDIV(int bits, const OpArg& src) {
835 WriteMulDivType(bits, src, 7);
836}
837void XEmitter::NEG(int bits, const OpArg& src) {
838 WriteMulDivType(bits, src, 3);
839}
840void XEmitter::NOT(int bits, const OpArg& src) {
841 WriteMulDivType(bits, src, 2);
842}
843
844void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) {
845 ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument");
846 CheckFlags();
847 src.operandReg = (u8)dest;
848 if (bits == 16)
849 Write8(0x66);
850 if (rep)
851 Write8(0xF3);
852 src.WriteRex(this, bits, bits);
853 Write8(0x0F);
854 Write8(byte2);
855 src.WriteRest(this);
856}
857
858void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) {
859 if (bits <= 16)
860 ASSERT_MSG(0, "MOVNTI - bits<=16");
861 WriteBitSearchType(bits, src, dest, 0xC3);
862}
863
864void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {
865 WriteBitSearchType(bits, dest, src, 0xBC);
866} // Bottom bit to top bit
867void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {
868 WriteBitSearchType(bits, dest, src, 0xBD);
869} // Top bit to bottom bit
870
871void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) {
872 CheckFlags();
873 if (!Common::GetCPUCaps().bmi1)
874 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
875 WriteBitSearchType(bits, dest, src, 0xBC, true);
876}
877void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) {
878 CheckFlags();
879 if (!Common::GetCPUCaps().lzcnt)
880 ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer.");
881 WriteBitSearchType(bits, dest, src, 0xBD, true);
882}
883
884void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) {
885 ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument");
886 if (dbits == sbits) {
887 MOV(dbits, R(dest), src);
888 return;
889 }
890 src.operandReg = (u8)dest;
891 if (dbits == 16)
892 Write8(0x66);
893 src.WriteRex(this, dbits, sbits);
894 if (sbits == 8) {
895 Write8(0x0F);
896 Write8(0xBE);
897 } else if (sbits == 16) {
898 Write8(0x0F);
899 Write8(0xBF);
900 } else if (sbits == 32 && dbits == 64) {
901 Write8(0x63);
902 } else {
903 Crash();
904 }
905 src.WriteRest(this);
906}
907
908void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) {
909 ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument");
910 if (dbits == sbits) {
911 MOV(dbits, R(dest), src);
912 return;
913 }
914 src.operandReg = (u8)dest;
915 if (dbits == 16)
916 Write8(0x66);
917 // the 32bit result is automatically zero extended to 64bit
918 src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits);
919 if (sbits == 8) {
920 Write8(0x0F);
921 Write8(0xB6);
922 } else if (sbits == 16) {
923 Write8(0x0F);
924 Write8(0xB7);
925 } else if (sbits == 32 && dbits == 64) {
926 Write8(0x8B);
927 } else {
928 ASSERT_MSG(0, "MOVZX - Invalid size");
929 }
930 src.WriteRest(this);
931}
932
933void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) {
934 ASSERT_MSG(Common::GetCPUCaps().movbe,
935 "Generating MOVBE on a system that does not support it.");
936 if (bits == 8) {
937 MOV(bits, dest, src);
938 return;
939 }
940
941 if (bits == 16)
942 Write8(0x66);
943
944 if (dest.IsSimpleReg()) {
945 ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem");
946 src.WriteRex(this, bits, bits, dest.GetSimpleReg());
947 Write8(0x0F);
948 Write8(0x38);
949 Write8(0xF0);
950 src.WriteRest(this, 0, dest.GetSimpleReg());
951 } else if (src.IsSimpleReg()) {
952 ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem");
953 dest.WriteRex(this, bits, bits, src.GetSimpleReg());
954 Write8(0x0F);
955 Write8(0x38);
956 Write8(0xF1);
957 dest.WriteRest(this, 0, src.GetSimpleReg());
958 } else {
959 ASSERT_MSG(0, "MOVBE: Not loading or storing to mem");
960 }
961}
962
963void XEmitter::LEA(int bits, X64Reg dest, OpArg src) {
964 ASSERT_MSG(!src.IsImm(), "LEA - Imm argument");
965 src.operandReg = (u8)dest;
966 if (bits == 16)
967 Write8(0x66); // TODO: performance warning
968 src.WriteRex(this, bits, bits);
969 Write8(0x8D);
970 src.WriteRest(this, 0, INVALID_REG, bits == 64);
971}
972
973// shift can be either imm8 or cl
974void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) {
975 CheckFlags();
976 bool writeImm = false;
977 if (dest.IsImm()) {
978 ASSERT_MSG(0, "WriteShift - can't shift imms");
979 }
980 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
981 (shift.IsImm() && shift.GetImmBits() != 8)) {
982 ASSERT_MSG(0, "WriteShift - illegal argument");
983 }
984 dest.operandReg = ext;
985 if (bits == 16)
986 Write8(0x66);
987 dest.WriteRex(this, bits, bits, 0);
988 if (shift.GetImmBits() == 8) {
989 // ok an imm
990 u8 imm = (u8)shift.offset;
991 if (imm == 1) {
992 Write8(bits == 8 ? 0xD0 : 0xD1);
993 } else {
994 writeImm = true;
995 Write8(bits == 8 ? 0xC0 : 0xC1);
996 }
997 } else {
998 Write8(bits == 8 ? 0xD2 : 0xD3);
999 }
1000 dest.WriteRest(this, writeImm ? 1 : 0);
1001 if (writeImm)
1002 Write8((u8)shift.offset);
1003}
1004
1005// large rotates and shift are slower on intel than amd
1006// intel likes to rotate by 1, and the op is smaller too
1007void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {
1008 WriteShift(bits, dest, shift, 0);
1009}
1010void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {
1011 WriteShift(bits, dest, shift, 1);
1012}
1013void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {
1014 WriteShift(bits, dest, shift, 2);
1015}
1016void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {
1017 WriteShift(bits, dest, shift, 3);
1018}
1019void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {
1020 WriteShift(bits, dest, shift, 4);
1021}
1022void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {
1023 WriteShift(bits, dest, shift, 5);
1024}
1025void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {
1026 WriteShift(bits, dest, shift, 7);
1027}
1028
1029// index can be either imm8 or register, don't use memory destination because it's slow
1030void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) {
1031 CheckFlags();
1032 if (dest.IsImm()) {
1033 ASSERT_MSG(0, "WriteBitTest - can't test imms");
1034 }
1035 if ((index.IsImm() && index.GetImmBits() != 8)) {
1036 ASSERT_MSG(0, "WriteBitTest - illegal argument");
1037 }
1038 if (bits == 16)
1039 Write8(0x66);
1040 if (index.IsImm()) {
1041 dest.WriteRex(this, bits, bits);
1042 Write8(0x0F);
1043 Write8(0xBA);
1044 dest.WriteRest(this, 1, (X64Reg)ext);
1045 Write8((u8)index.offset);
1046 } else {
1047 X64Reg operand = index.GetSimpleReg();
1048 dest.WriteRex(this, bits, bits, operand);
1049 Write8(0x0F);
1050 Write8(0x83 + 8 * ext);
1051 dest.WriteRest(this, 1, operand);
1052 }
1053}
1054
1055void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {
1056 WriteBitTest(bits, dest, index, 4);
1057}
1058void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {
1059 WriteBitTest(bits, dest, index, 5);
1060}
1061void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {
1062 WriteBitTest(bits, dest, index, 6);
1063}
1064void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {
1065 WriteBitTest(bits, dest, index, 7);
1066}
1067
1068// shift can be either imm8 or cl
1069void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1070 CheckFlags();
1071 if (dest.IsImm()) {
1072 ASSERT_MSG(0, "SHRD - can't use imms as destination");
1073 }
1074 if (!src.IsSimpleReg()) {
1075 ASSERT_MSG(0, "SHRD - must use simple register as source");
1076 }
1077 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1078 (shift.IsImm() && shift.GetImmBits() != 8)) {
1079 ASSERT_MSG(0, "SHRD - illegal shift");
1080 }
1081 if (bits == 16)
1082 Write8(0x66);
1083 X64Reg operand = src.GetSimpleReg();
1084 dest.WriteRex(this, bits, bits, operand);
1085 if (shift.GetImmBits() == 8) {
1086 Write8(0x0F);
1087 Write8(0xAC);
1088 dest.WriteRest(this, 1, operand);
1089 Write8((u8)shift.offset);
1090 } else {
1091 Write8(0x0F);
1092 Write8(0xAD);
1093 dest.WriteRest(this, 0, operand);
1094 }
1095}
1096
1097void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) {
1098 CheckFlags();
1099 if (dest.IsImm()) {
1100 ASSERT_MSG(0, "SHLD - can't use imms as destination");
1101 }
1102 if (!src.IsSimpleReg()) {
1103 ASSERT_MSG(0, "SHLD - must use simple register as source");
1104 }
1105 if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) ||
1106 (shift.IsImm() && shift.GetImmBits() != 8)) {
1107 ASSERT_MSG(0, "SHLD - illegal shift");
1108 }
1109 if (bits == 16)
1110 Write8(0x66);
1111 X64Reg operand = src.GetSimpleReg();
1112 dest.WriteRex(this, bits, bits, operand);
1113 if (shift.GetImmBits() == 8) {
1114 Write8(0x0F);
1115 Write8(0xA4);
1116 dest.WriteRest(this, 1, operand);
1117 Write8((u8)shift.offset);
1118 } else {
1119 Write8(0x0F);
1120 Write8(0xA5);
1121 dest.WriteRest(this, 0, operand);
1122 }
1123}
1124
1125void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) {
1126 if (bits == 16)
1127 emit->Write8(0x66);
1128
1129 this->operandReg = (u8)_operandReg;
1130 WriteRex(emit, bits, bits);
1131 emit->Write8(op);
1132 WriteRest(emit);
1133}
1134
1135// operand can either be immediate or register
1136void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
1137 int bits) const {
1138 X64Reg _operandReg;
1139 if (IsImm()) {
1140 ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order");
1141 }
1142
1143 if (bits == 16)
1144 emit->Write8(0x66);
1145
1146 int immToWrite = 0;
1147
1148 if (operand.IsImm()) {
1149 WriteRex(emit, bits, bits);
1150
1151 if (!toRM) {
1152 ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)");
1153 }
1154
1155 if (operand.scale == SCALE_IMM8 && bits == 8) {
1156 // op al, imm8
1157 if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) {
1158 emit->Write8(normalops[op].eaximm8);
1159 emit->Write8((u8)operand.offset);
1160 return;
1161 }
1162 // mov reg, imm8
1163 if (!scale && op == nrmMOV) {
1164 emit->Write8(0xB0 + (offsetOrBaseReg & 7));
1165 emit->Write8((u8)operand.offset);
1166 return;
1167 }
1168 // op r/m8, imm8
1169 emit->Write8(normalops[op].imm8);
1170 immToWrite = 8;
1171 } else if ((operand.scale == SCALE_IMM16 && bits == 16) ||
1172 (operand.scale == SCALE_IMM32 && bits == 32) ||
1173 (operand.scale == SCALE_IMM32 && bits == 64)) {
1174 // Try to save immediate size if we can, but first check to see
1175 // if the instruction supports simm8.
1176 // op r/m, imm8
1177 if (normalops[op].simm8 != 0xCC &&
1178 ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) ||
1179 (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) {
1180 emit->Write8(normalops[op].simm8);
1181 immToWrite = 8;
1182 } else {
1183 // mov reg, imm
1184 if (!scale && op == nrmMOV && bits != 64) {
1185 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1186 if (bits == 16)
1187 emit->Write16((u16)operand.offset);
1188 else
1189 emit->Write32((u32)operand.offset);
1190 return;
1191 }
1192 // op eax, imm
1193 if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) {
1194 emit->Write8(normalops[op].eaximm32);
1195 if (bits == 16)
1196 emit->Write16((u16)operand.offset);
1197 else
1198 emit->Write32((u32)operand.offset);
1199 return;
1200 }
1201 // op r/m, imm
1202 emit->Write8(normalops[op].imm32);
1203 immToWrite = bits == 16 ? 16 : 32;
1204 }
1205 } else if ((operand.scale == SCALE_IMM8 && bits == 16) ||
1206 (operand.scale == SCALE_IMM8 && bits == 32) ||
1207 (operand.scale == SCALE_IMM8 && bits == 64)) {
1208 // op r/m, imm8
1209 emit->Write8(normalops[op].simm8);
1210 immToWrite = 8;
1211 } else if (operand.scale == SCALE_IMM64 && bits == 64) {
1212 if (scale) {
1213 ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination");
1214 }
1215 // mov reg64, imm64
1216 else if (op == nrmMOV) {
1217 emit->Write8(0xB8 + (offsetOrBaseReg & 7));
1218 emit->Write64((u64)operand.offset);
1219 return;
1220 }
1221 ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm");
1222 } else {
1223 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1224 }
1225 _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM
1226 } else {
1227 _operandReg = (X64Reg)operand.offsetOrBaseReg;
1228 WriteRex(emit, bits, bits, _operandReg);
1229 // op r/m, reg
1230 if (toRM) {
1231 emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32);
1232 }
1233 // op reg, r/m
1234 else {
1235 emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32);
1236 }
1237 }
1238 WriteRest(emit, immToWrite >> 3, _operandReg);
1239 switch (immToWrite) {
1240 case 0:
1241 break;
1242 case 8:
1243 emit->Write8((u8)operand.offset);
1244 break;
1245 case 16:
1246 emit->Write16((u16)operand.offset);
1247 break;
1248 case 32:
1249 emit->Write32((u32)operand.offset);
1250 break;
1251 default:
1252 ASSERT_MSG(0, "WriteNormalOp - Unhandled case");
1253 }
1254}
1255
1256void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1,
1257 const OpArg& a2) {
1258 if (a1.IsImm()) {
1259 // Booh! Can't write to an imm
1260 ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm");
1261 return;
1262 }
1263 if (a2.IsImm()) {
1264 a1.WriteNormalOp(emit, true, op, a2, bits);
1265 } else {
1266 if (a1.IsSimpleReg()) {
1267 a2.WriteNormalOp(emit, false, op, a1, bits);
1268 } else {
1269 ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(),
1270 "WriteNormalOp - a1 and a2 cannot both be memory");
1271 a1.WriteNormalOp(emit, true, op, a2, bits);
1272 }
1273 }
1274}
1275
1276void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) {
1277 CheckFlags();
1278 WriteNormalOp(this, bits, nrmADD, a1, a2);
1279}
1280void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) {
1281 CheckFlags();
1282 WriteNormalOp(this, bits, nrmADC, a1, a2);
1283}
1284void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) {
1285 CheckFlags();
1286 WriteNormalOp(this, bits, nrmSUB, a1, a2);
1287}
1288void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) {
1289 CheckFlags();
1290 WriteNormalOp(this, bits, nrmSBB, a1, a2);
1291}
1292void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) {
1293 CheckFlags();
1294 WriteNormalOp(this, bits, nrmAND, a1, a2);
1295}
1296void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) {
1297 CheckFlags();
1298 WriteNormalOp(this, bits, nrmOR, a1, a2);
1299}
1300void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) {
1301 CheckFlags();
1302 WriteNormalOp(this, bits, nrmXOR, a1, a2);
1303}
1304void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) {
1305 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1306 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1307 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1308}
1309void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {
1310 CheckFlags();
1311 WriteNormalOp(this, bits, nrmTEST, a1, a2);
1312}
1313void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) {
1314 CheckFlags();
1315 WriteNormalOp(this, bits, nrmCMP, a1, a2);
1316}
1317void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {
1318 WriteNormalOp(this, bits, nrmXCHG, a1, a2);
1319}
1320
1321void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) {
1322 CheckFlags();
1323 if (bits == 8) {
1324 ASSERT_MSG(0, "IMUL - illegal bit size!");
1325 return;
1326 }
1327
1328 if (a1.IsImm()) {
1329 ASSERT_MSG(0, "IMUL - second arg cannot be imm!");
1330 return;
1331 }
1332
1333 if (!a2.IsImm()) {
1334 ASSERT_MSG(0, "IMUL - third arg must be imm!");
1335 return;
1336 }
1337
1338 if (bits == 16)
1339 Write8(0x66);
1340 a1.WriteRex(this, bits, bits, regOp);
1341
1342 if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) ||
1343 (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) {
1344 Write8(0x6B);
1345 a1.WriteRest(this, 1, regOp);
1346 Write8((u8)a2.offset);
1347 } else {
1348 Write8(0x69);
1349 if (a2.GetImmBits() == 16 && bits == 16) {
1350 a1.WriteRest(this, 2, regOp);
1351 Write16((u16)a2.offset);
1352 } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) {
1353 a1.WriteRest(this, 4, regOp);
1354 Write32((u32)a2.offset);
1355 } else {
1356 ASSERT_MSG(0, "IMUL - unhandled case!");
1357 }
1358 }
1359}
1360
1361void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) {
1362 CheckFlags();
1363 if (bits == 8) {
1364 ASSERT_MSG(0, "IMUL - illegal bit size!");
1365 return;
1366 }
1367
1368 if (a.IsImm()) {
1369 IMUL(bits, regOp, R(regOp), a);
1370 return;
1371 }
1372
1373 if (bits == 16)
1374 Write8(0x66);
1375 a.WriteRex(this, bits, bits, regOp);
1376 Write8(0x0F);
1377 Write8(0xAF);
1378 a.WriteRest(this, 0, regOp);
1379}
1380
1381void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) {
1382 if (opPrefix)
1383 Write8(opPrefix);
1384 arg.operandReg = regOp;
1385 arg.WriteRex(this, 0, 0);
1386 Write8(0x0F);
1387 if (op > 0xFF)
1388 Write8((op >> 8) & 0xFF);
1389 Write8(op & 0xFF);
1390 arg.WriteRest(this, extrabytes);
1391}
1392
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1394 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1395}
1396
1397static int GetVEXmmmmm(u16 op) {
1398 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1399 if ((op >> 8) == 0x3A)
1400 return 3;
1401 if ((op >> 8) == 0x38)
1402 return 2;
1403
1404 return 1;
1405}
1406
1407static int GetVEXpp(u8 opPrefix) {
1408 if (opPrefix == 0x66)
1409 return 1;
1410 if (opPrefix == 0xF3)
1411 return 2;
1412 if (opPrefix == 0xF2)
1413 return 3;
1414
1415 return 0;
1416}
1417
1418void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
1419 int extrabytes) {
1420 if (!Common::GetCPUCaps().avx)
1421 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
1422 int mmmmm = GetVEXmmmmm(op);
1423 int pp = GetVEXpp(opPrefix);
1424 // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size
1425 // here
1426 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm);
1427 Write8(op & 0xFF);
1428 arg.WriteRest(this, extrabytes, regOp1);
1429}
1430
1431// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1432void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1433 const OpArg& arg, int extrabytes) {
1434 if (size != 32 && size != 64)
1435 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
1436 int mmmmm = GetVEXmmmmm(op);
1437 int pp = GetVEXpp(opPrefix);
1438 arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
1439 Write8(op & 0xFF);
1440 arg.WriteRest(this, extrabytes, regOp1);
1441}
1442
1443void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1444 const OpArg& arg, int extrabytes) {
1445 CheckFlags();
1446 if (!Common::GetCPUCaps().bmi1)
1447 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
1448 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1449}
1450
1451void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2,
1452 const OpArg& arg, int extrabytes) {
1453 CheckFlags();
1454 if (!Common::GetCPUCaps().bmi2)
1455 ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
1456 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1457}
1458
1459void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) {
1460 WriteSSEOp(0x66, 0x6E, dest, arg, 0);
1461}
1462void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) {
1463 WriteSSEOp(0x66, 0x7E, src, arg, 0);
1464}
1465
1466void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
1467#ifdef ARCHITECTURE_x86_64
1468 // Alternate encoding
1469 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1470 arg.operandReg = dest;
1471 Write8(0x66);
1472 arg.WriteRex(this, 64, 0);
1473 Write8(0x0f);
1474 Write8(0x6E);
1475 arg.WriteRest(this, 0);
1476#else
1477 arg.operandReg = dest;
1478 Write8(0xF3);
1479 Write8(0x0f);
1480 Write8(0x7E);
1481 arg.WriteRest(this, 0);
1482#endif
1483}
1484
1485void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
1486 if (src > 7 || arg.IsSimpleReg()) {
1487 // Alternate encoding
1488 // This does not display correctly in MSVC's debugger, it thinks it's a MOVD
1489 arg.operandReg = src;
1490 Write8(0x66);
1491 arg.WriteRex(this, 64, 0);
1492 Write8(0x0f);
1493 Write8(0x7E);
1494 arg.WriteRest(this, 0);
1495 } else {
1496 arg.operandReg = src;
1497 arg.WriteRex(this, 0, 0);
1498 Write8(0x66);
1499 Write8(0x0f);
1500 Write8(0xD6);
1501 arg.WriteRest(this, 0);
1502 }
1503}
1504
1505void XEmitter::WriteMXCSR(OpArg arg, int ext) {
1506 if (arg.IsImm() || arg.IsSimpleReg())
1507 ASSERT_MSG(0, "MXCSR - invalid operand");
1508
1509 arg.operandReg = ext;
1510 arg.WriteRex(this, 0, 0);
1511 Write8(0x0F);
1512 Write8(0xAE);
1513 arg.WriteRest(this);
1514}
1515
1516void XEmitter::STMXCSR(const OpArg& memloc) {
1517 WriteMXCSR(memloc, 3);
1518}
1519void XEmitter::LDMXCSR(const OpArg& memloc) {
1520 WriteMXCSR(memloc, 2);
1521}
1522
1523void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {
1524 WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);
1525}
1526void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {
1527 WriteSSEOp(0x00, sseMOVNTP, regOp, arg);
1528}
1529void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {
1530 WriteSSEOp(0x66, sseMOVNTP, regOp, arg);
1531}
1532
1533void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {
1534 WriteSSEOp(0xF3, sseADD, regOp, arg);
1535}
1536void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {
1537 WriteSSEOp(0xF2, sseADD, regOp, arg);
1538}
1539void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {
1540 WriteSSEOp(0xF3, sseSUB, regOp, arg);
1541}
1542void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {
1543 WriteSSEOp(0xF2, sseSUB, regOp, arg);
1544}
1545void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {
1546 WriteSSEOp(0xF3, sseCMP, regOp, arg, 1);
1547 Write8(compare);
1548}
1549void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {
1550 WriteSSEOp(0xF2, sseCMP, regOp, arg, 1);
1551 Write8(compare);
1552}
1553void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {
1554 WriteSSEOp(0xF3, sseMUL, regOp, arg);
1555}
1556void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {
1557 WriteSSEOp(0xF2, sseMUL, regOp, arg);
1558}
1559void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {
1560 WriteSSEOp(0xF3, sseDIV, regOp, arg);
1561}
1562void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {
1563 WriteSSEOp(0xF2, sseDIV, regOp, arg);
1564}
1565void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {
1566 WriteSSEOp(0xF3, sseMIN, regOp, arg);
1567}
1568void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {
1569 WriteSSEOp(0xF2, sseMIN, regOp, arg);
1570}
1571void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {
1572 WriteSSEOp(0xF3, sseMAX, regOp, arg);
1573}
1574void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {
1575 WriteSSEOp(0xF2, sseMAX, regOp, arg);
1576}
1577void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {
1578 WriteSSEOp(0xF3, sseSQRT, regOp, arg);
1579}
1580void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {
1581 WriteSSEOp(0xF2, sseSQRT, regOp, arg);
1582}
1583void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {
1584 WriteSSEOp(0xF3, sseRCP, regOp, arg);
1585}
1586void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {
1587 WriteSSEOp(0xF3, sseRSQRT, regOp, arg);
1588}
1589
1590void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {
1591 WriteSSEOp(0x00, sseADD, regOp, arg);
1592}
1593void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {
1594 WriteSSEOp(0x66, sseADD, regOp, arg);
1595}
1596void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {
1597 WriteSSEOp(0x00, sseSUB, regOp, arg);
1598}
1599void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {
1600 WriteSSEOp(0x66, sseSUB, regOp, arg);
1601}
1602void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {
1603 WriteSSEOp(0x00, sseCMP, regOp, arg, 1);
1604 Write8(compare);
1605}
1606void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {
1607 WriteSSEOp(0x66, sseCMP, regOp, arg, 1);
1608 Write8(compare);
1609}
1610void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {
1611 WriteSSEOp(0x00, sseAND, regOp, arg);
1612}
1613void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {
1614 WriteSSEOp(0x66, sseAND, regOp, arg);
1615}
1616void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {
1617 WriteSSEOp(0x00, sseANDN, regOp, arg);
1618}
1619void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {
1620 WriteSSEOp(0x66, sseANDN, regOp, arg);
1621}
1622void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {
1623 WriteSSEOp(0x00, sseOR, regOp, arg);
1624}
1625void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {
1626 WriteSSEOp(0x66, sseOR, regOp, arg);
1627}
1628void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {
1629 WriteSSEOp(0x00, sseXOR, regOp, arg);
1630}
1631void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {
1632 WriteSSEOp(0x66, sseXOR, regOp, arg);
1633}
1634void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {
1635 WriteSSEOp(0x00, sseMUL, regOp, arg);
1636}
1637void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {
1638 WriteSSEOp(0x66, sseMUL, regOp, arg);
1639}
1640void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {
1641 WriteSSEOp(0x00, sseDIV, regOp, arg);
1642}
1643void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {
1644 WriteSSEOp(0x66, sseDIV, regOp, arg);
1645}
1646void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {
1647 WriteSSEOp(0x00, sseMIN, regOp, arg);
1648}
1649void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {
1650 WriteSSEOp(0x66, sseMIN, regOp, arg);
1651}
1652void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {
1653 WriteSSEOp(0x00, sseMAX, regOp, arg);
1654}
1655void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {
1656 WriteSSEOp(0x66, sseMAX, regOp, arg);
1657}
1658void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {
1659 WriteSSEOp(0x00, sseSQRT, regOp, arg);
1660}
1661void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {
1662 WriteSSEOp(0x66, sseSQRT, regOp, arg);
1663}
1664void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) {
1665 WriteSSEOp(0x00, sseRCP, regOp, arg);
1666}
1667void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {
1668 WriteSSEOp(0x00, sseRSQRT, regOp, arg);
1669}
1670void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1671 WriteSSEOp(0x00, sseSHUF, regOp, arg, 1);
1672 Write8(shuffle);
1673}
1674void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
1675 WriteSSEOp(0x66, sseSHUF, regOp, arg, 1);
1676 Write8(shuffle);
1677}
1678
1679void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {
1680 WriteSSEOp(0xF2, sseHADD, regOp, arg);
1681}
1682
1683void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {
1684 WriteSSEOp(0x00, sseCOMIS, regOp, arg);
1685} // weird that these should be packed
1686void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {
1687 WriteSSEOp(0x66, sseCOMIS, regOp, arg);
1688} // ordered
1689void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {
1690 WriteSSEOp(0x00, sseUCOMIS, regOp, arg);
1691} // unordered
1692void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {
1693 WriteSSEOp(0x66, sseUCOMIS, regOp, arg);
1694}
1695
1696void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {
1697 WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);
1698}
1699void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {
1700 WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);
1701}
1702void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {
1703 WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);
1704}
1705void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {
1706 WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);
1707}
1708
1709void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {
1710 WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);
1711}
1712void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {
1713 WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);
1714}
1715void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {
1716 WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);
1717}
1718void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {
1719 WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);
1720}
1721
1722void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {
1723 WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);
1724}
1725void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {
1726 WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);
1727}
1728void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {
1729 WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);
1730}
1731void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {
1732 WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);
1733}
1734
1735void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {
1736 WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);
1737}
1738void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {
1739 WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);
1740}
1741void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {
1742 WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);
1743}
1744void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {
1745 WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);
1746}
1747
1748void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) {
1749 WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg);
1750}
1751void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) {
1752 WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg);
1753}
1754void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) {
1755 WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg);
1756}
1757void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) {
1758 WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg);
1759}
1760
1761void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) {
1762 WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg);
1763}
1764void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) {
1765 WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg);
1766}
1767void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) {
1768 WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg);
1769}
1770void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) {
1771 WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg);
1772}
1773
1774void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {
1775 WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));
1776}
1777void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {
1778 WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));
1779}
1780
1781void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {
1782 WriteSSEOp(0x00, 0x5A, regOp, arg);
1783}
1784void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {
1785 WriteSSEOp(0x66, 0x5A, regOp, arg);
1786}
1787
1788void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {
1789 WriteSSEOp(0xF2, 0x5A, regOp, arg);
1790}
1791void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {
1792 WriteSSEOp(0xF3, 0x5A, regOp, arg);
1793}
1794void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {
1795 WriteSSEOp(0xF2, 0x2D, regOp, arg);
1796}
1797void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {
1798 WriteSSEOp(0xF3, 0x2D, regOp, arg);
1799}
1800void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {
1801 WriteSSEOp(0xF2, 0x2A, regOp, arg);
1802}
1803void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {
1804 WriteSSEOp(0xF3, 0x2A, regOp, arg);
1805}
1806
1807void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {
1808 WriteSSEOp(0xF3, 0xE6, regOp, arg);
1809}
1810void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {
1811 WriteSSEOp(0x00, 0x5B, regOp, arg);
1812}
1813void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {
1814 WriteSSEOp(0xF2, 0xE6, regOp, arg);
1815}
1816void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {
1817 WriteSSEOp(0x66, 0x5B, regOp, arg);
1818}
1819
1820void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {
1821 WriteSSEOp(0xF2, 0x2C, regOp, arg);
1822}
1823void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {
1824 WriteSSEOp(0xF3, 0x2C, regOp, arg);
1825}
1826void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {
1827 WriteSSEOp(0xF3, 0x5B, regOp, arg);
1828}
1829void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {
1830 WriteSSEOp(0x66, 0xE6, regOp, arg);
1831}
1832
1833void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {
1834 WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));
1835}
1836
1837void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {
1838 WriteSSEOp(0x00, 0x50, dest, arg);
1839}
1840void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {
1841 WriteSSEOp(0x66, 0x50, dest, arg);
1842}
1843
1844void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {
1845 WriteSSEOp(0xF2, sseLDDQU, dest, arg);
1846} // For integer data only
1847
1848// THESE TWO ARE UNTESTED.
1849void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {
1850 WriteSSEOp(0x00, 0x14, dest, arg);
1851}
1852void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {
1853 WriteSSEOp(0x00, 0x15, dest, arg);
1854}
1855
1856void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {
1857 WriteSSEOp(0x66, 0x14, dest, arg);
1858}
1859void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {
1860 WriteSSEOp(0x66, 0x15, dest, arg);
1861}
1862
1863void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) {
1864 if (Common::GetCPUCaps().sse3) {
1865 WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup
1866 } else {
1867 // Simulate this instruction with SSE2 instructions
1868 if (!arg.IsSimpleReg(regOp))
1869 MOVSD(regOp, arg);
1870 UNPCKLPD(regOp, R(regOp));
1871 }
1872}
1873
1874// There are a few more left
1875
1876// Also some integer instructions are missing
1877void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {
1878 WriteSSEOp(0x66, 0x6B, dest, arg);
1879}
1880void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {
1881 WriteSSEOp(0x66, 0x63, dest, arg);
1882}
1883void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {
1884 WriteSSEOp(0x66, 0x67, dest, arg);
1885}
1886
1887void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) {
1888 WriteSSEOp(0x66, 0x60, dest, arg);
1889}
1890void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) {
1891 WriteSSEOp(0x66, 0x61, dest, arg);
1892}
1893void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) {
1894 WriteSSEOp(0x66, 0x62, dest, arg);
1895}
1896void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) {
1897 WriteSSEOp(0x66, 0x6C, dest, arg);
1898}
1899
1900void XEmitter::PSRLW(X64Reg reg, int shift) {
1901 WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg));
1902 Write8(shift);
1903}
1904
1905void XEmitter::PSRLD(X64Reg reg, int shift) {
1906 WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg));
1907 Write8(shift);
1908}
1909
1910void XEmitter::PSRLQ(X64Reg reg, int shift) {
1911 WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg));
1912 Write8(shift);
1913}
1914
1915void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) {
1916 WriteSSEOp(0x66, 0xd3, reg, arg);
1917}
1918
1919void XEmitter::PSRLDQ(X64Reg reg, int shift) {
1920 WriteSSEOp(0x66, 0x73, (X64Reg)3, R(reg));
1921 Write8(shift);
1922}
1923
1924void XEmitter::PSLLW(X64Reg reg, int shift) {
1925 WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg));
1926 Write8(shift);
1927}
1928
1929void XEmitter::PSLLD(X64Reg reg, int shift) {
1930 WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg));
1931 Write8(shift);
1932}
1933
1934void XEmitter::PSLLQ(X64Reg reg, int shift) {
1935 WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg));
1936 Write8(shift);
1937}
1938
1939void XEmitter::PSLLDQ(X64Reg reg, int shift) {
1940 WriteSSEOp(0x66, 0x73, (X64Reg)7, R(reg));
1941 Write8(shift);
1942}
1943
1944void XEmitter::PSRAW(X64Reg reg, int shift) {
1945 WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg));
1946 Write8(shift);
1947}
1948
1949void XEmitter::PSRAD(X64Reg reg, int shift) {
1950 WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg));
1951 Write8(shift);
1952}
1953
1954void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1955 if (!Common::GetCPUCaps().ssse3)
1956 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1957 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1958}
1959
1960void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) {
1961 if (!Common::GetCPUCaps().sse4_1)
1962 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1963 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1964}
1965
1966void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {
1967 WriteSSSE3Op(0x66, 0x3800, dest, arg);
1968}
1969void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {
1970 WriteSSE41Op(0x66, 0x3817, dest, arg);
1971}
1972void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {
1973 WriteSSE41Op(0x66, 0x382b, dest, arg);
1974}
1975void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {
1976 WriteSSE41Op(0x66, 0x3A40, dest, arg, 1);
1977 Write8(mask);
1978}
1979
1980void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {
1981 WriteSSE41Op(0x66, 0x3838, dest, arg);
1982}
1983void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {
1984 WriteSSE41Op(0x66, 0x3839, dest, arg);
1985}
1986void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {
1987 WriteSSE41Op(0x66, 0x383a, dest, arg);
1988}
1989void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {
1990 WriteSSE41Op(0x66, 0x383b, dest, arg);
1991}
1992void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {
1993 WriteSSE41Op(0x66, 0x383c, dest, arg);
1994}
1995void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {
1996 WriteSSE41Op(0x66, 0x383d, dest, arg);
1997}
1998void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {
1999 WriteSSE41Op(0x66, 0x383e, dest, arg);
2000}
2001void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {
2002 WriteSSE41Op(0x66, 0x383f, dest, arg);
2003}
2004
2005void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {
2006 WriteSSE41Op(0x66, 0x3820, dest, arg);
2007}
2008void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {
2009 WriteSSE41Op(0x66, 0x3821, dest, arg);
2010}
2011void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {
2012 WriteSSE41Op(0x66, 0x3822, dest, arg);
2013}
2014void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {
2015 WriteSSE41Op(0x66, 0x3823, dest, arg);
2016}
2017void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {
2018 WriteSSE41Op(0x66, 0x3824, dest, arg);
2019}
2020void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {
2021 WriteSSE41Op(0x66, 0x3825, dest, arg);
2022}
2023void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {
2024 WriteSSE41Op(0x66, 0x3830, dest, arg);
2025}
2026void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {
2027 WriteSSE41Op(0x66, 0x3831, dest, arg);
2028}
2029void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {
2030 WriteSSE41Op(0x66, 0x3832, dest, arg);
2031}
2032void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {
2033 WriteSSE41Op(0x66, 0x3833, dest, arg);
2034}
2035void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {
2036 WriteSSE41Op(0x66, 0x3834, dest, arg);
2037}
2038void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {
2039 WriteSSE41Op(0x66, 0x3835, dest, arg);
2040}
2041
2042void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {
2043 WriteSSE41Op(0x66, 0x3810, dest, arg);
2044}
2045void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {
2046 WriteSSE41Op(0x66, 0x3814, dest, arg);
2047}
2048void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {
2049 WriteSSE41Op(0x66, 0x3815, dest, arg);
2050}
2051void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) {
2052 WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1);
2053 Write8(blend);
2054}
2055void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) {
2056 WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1);
2057 Write8(blend);
2058}
2059
2060void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {
2061 WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1);
2062 Write8(mode);
2063}
2064void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {
2065 WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1);
2066 Write8(mode);
2067}
2068void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {
2069 WriteSSE41Op(0x66, 0x3A08, dest, arg, 1);
2070 Write8(mode);
2071}
2072void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {
2073 WriteSSE41Op(0x66, 0x3A09, dest, arg, 1);
2074 Write8(mode);
2075}
2076
2077void XEmitter::PAND(X64Reg dest, const OpArg& arg) {
2078 WriteSSEOp(0x66, 0xDB, dest, arg);
2079}
2080void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {
2081 WriteSSEOp(0x66, 0xDF, dest, arg);
2082}
2083void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {
2084 WriteSSEOp(0x66, 0xEF, dest, arg);
2085}
2086void XEmitter::POR(X64Reg dest, const OpArg& arg) {
2087 WriteSSEOp(0x66, 0xEB, dest, arg);
2088}
2089
2090void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {
2091 WriteSSEOp(0x66, 0xFC, dest, arg);
2092}
2093void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {
2094 WriteSSEOp(0x66, 0xFD, dest, arg);
2095}
2096void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {
2097 WriteSSEOp(0x66, 0xFE, dest, arg);
2098}
2099void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {
2100 WriteSSEOp(0x66, 0xD4, dest, arg);
2101}
2102
2103void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {
2104 WriteSSEOp(0x66, 0xEC, dest, arg);
2105}
2106void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {
2107 WriteSSEOp(0x66, 0xED, dest, arg);
2108}
2109void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {
2110 WriteSSEOp(0x66, 0xDC, dest, arg);
2111}
2112void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {
2113 WriteSSEOp(0x66, 0xDD, dest, arg);
2114}
2115
2116void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {
2117 WriteSSEOp(0x66, 0xF8, dest, arg);
2118}
2119void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {
2120 WriteSSEOp(0x66, 0xF9, dest, arg);
2121}
2122void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {
2123 WriteSSEOp(0x66, 0xFA, dest, arg);
2124}
2125void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {
2126 WriteSSEOp(0x66, 0xFB, dest, arg);
2127}
2128
2129void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {
2130 WriteSSEOp(0x66, 0xE8, dest, arg);
2131}
2132void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {
2133 WriteSSEOp(0x66, 0xE9, dest, arg);
2134}
2135void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {
2136 WriteSSEOp(0x66, 0xD8, dest, arg);
2137}
2138void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {
2139 WriteSSEOp(0x66, 0xD9, dest, arg);
2140}
2141
2142void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {
2143 WriteSSEOp(0x66, 0xE0, dest, arg);
2144}
2145void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {
2146 WriteSSEOp(0x66, 0xE3, dest, arg);
2147}
2148
2149void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {
2150 WriteSSEOp(0x66, 0x74, dest, arg);
2151}
2152void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {
2153 WriteSSEOp(0x66, 0x75, dest, arg);
2154}
2155void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {
2156 WriteSSEOp(0x66, 0x76, dest, arg);
2157}
2158
2159void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {
2160 WriteSSEOp(0x66, 0x64, dest, arg);
2161}
2162void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {
2163 WriteSSEOp(0x66, 0x65, dest, arg);
2164}
2165void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {
2166 WriteSSEOp(0x66, 0x66, dest, arg);
2167}
2168
2169void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2170 WriteSSEOp(0x66, 0xC5, dest, arg, 1);
2171 Write8(subreg);
2172}
2173void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {
2174 WriteSSEOp(0x66, 0xC4, dest, arg, 1);
2175 Write8(subreg);
2176}
2177
2178void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {
2179 WriteSSEOp(0x66, 0xF5, dest, arg);
2180}
2181void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {
2182 WriteSSEOp(0x66, 0xF6, dest, arg);
2183}
2184
2185void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {
2186 WriteSSEOp(0x66, 0xEE, dest, arg);
2187}
2188void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {
2189 WriteSSEOp(0x66, 0xDE, dest, arg);
2190}
2191void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {
2192 WriteSSEOp(0x66, 0xEA, dest, arg);
2193}
2194void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {
2195 WriteSSEOp(0x66, 0xDA, dest, arg);
2196}
2197
2198void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {
2199 WriteSSEOp(0x66, 0xD7, dest, arg);
2200}
2201void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2202 WriteSSEOp(0x66, 0x70, regOp, arg, 1);
2203 Write8(shuffle);
2204}
2205void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2206 WriteSSEOp(0xF2, 0x70, regOp, arg, 1);
2207 Write8(shuffle);
2208}
2209void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {
2210 WriteSSEOp(0xF3, 0x70, regOp, arg, 1);
2211 Write8(shuffle);
2212}
2213
2214// VEX
2215void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2216 WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);
2217}
2218void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2219 WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);
2220}
2221void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2222 WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);
2223}
2224void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2225 WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);
2226}
2227void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2228 WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);
2229}
2230void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2231 WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);
2232}
2233void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2234 WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);
2235}
2236void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2237 WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);
2238}
2239void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2240 WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);
2241}
2242void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {
2243 WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1);
2244 Write8(shuffle);
2245}
2246void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2247 WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);
2248}
2249void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2250 WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);
2251}
2252
2253void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2254 WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg);
2255}
2256void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2257 WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg);
2258}
2259void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2260 WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg);
2261}
2262void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2263 WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg);
2264}
2265void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2266 WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg);
2267}
2268void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2269 WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg);
2270}
2271void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2272 WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg);
2273}
2274void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2275 WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg);
2276}
2277
2278void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2279 WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg);
2280}
2281void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2282 WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg);
2283}
2284void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2285 WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg);
2286}
2287void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2288 WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg);
2289}
2290
2291void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2292 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg);
2293}
2294void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2295 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg);
2296}
2297void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2298 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg);
2299}
2300void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2301 WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1);
2302}
2303void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2304 WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1);
2305}
2306void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2307 WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1);
2308}
2309void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2310 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg);
2311}
2312void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2313 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg);
2314}
2315void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2316 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg);
2317}
2318void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2319 WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1);
2320}
2321void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2322 WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1);
2323}
2324void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2325 WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1);
2326}
2327void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2328 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg);
2329}
2330void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2331 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg);
2332}
2333void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2334 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg);
2335}
2336void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2337 WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1);
2338}
2339void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2340 WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1);
2341}
2342void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2343 WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1);
2344}
2345void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2346 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg);
2347}
2348void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2349 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg);
2350}
2351void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2352 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg);
2353}
2354void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2355 WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1);
2356}
2357void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2358 WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1);
2359}
2360void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2361 WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1);
2362}
2363void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2364 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg);
2365}
2366void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2367 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg);
2368}
2369void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2370 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg);
2371}
2372void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2373 WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1);
2374}
2375void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2376 WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1);
2377}
2378void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2379 WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1);
2380}
2381void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2382 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg);
2383}
2384void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2385 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg);
2386}
2387void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2388 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg);
2389}
2390void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2391 WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1);
2392}
2393void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2394 WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1);
2395}
2396void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2397 WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1);
2398}
2399void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2400 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg);
2401}
2402void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2403 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg);
2404}
2405void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2406 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg);
2407}
2408void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2409 WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1);
2410}
2411void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2412 WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1);
2413}
2414void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2415 WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1);
2416}
2417void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2418 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg);
2419}
2420void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2421 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg);
2422}
2423void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2424 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg);
2425}
2426void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2427 WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1);
2428}
2429void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2430 WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1);
2431}
2432void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2433 WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1);
2434}
2435void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2436 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg);
2437}
2438void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2439 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg);
2440}
2441void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2442 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg);
2443}
2444void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2445 WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1);
2446}
2447void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2448 WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1);
2449}
2450void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2451 WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1);
2452}
2453void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2454 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg);
2455}
2456void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2457 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg);
2458}
2459void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2460 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg);
2461}
2462void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2463 WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1);
2464}
2465void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2466 WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1);
2467}
2468void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2469 WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1);
2470}
2471
2472void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2473 WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);
2474}
2475void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2476 WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);
2477}
2478void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2479 WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);
2480}
2481void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {
2482 WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1);
2483 Write8(rotate);
2484}
2485void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2486 WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);
2487}
2488void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2489 WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);
2490}
2491void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2492 WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);
2493}
2494void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2495 WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);
2496}
2497void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {
2498 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);
2499}
2500void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {
2501 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);
2502}
2503void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {
2504 WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);
2505}
2506void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {
2507 WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);
2508}
2509void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {
2510 WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);
2511}
2512
2513// Prefixes
2514
2515void XEmitter::LOCK() {
2516 Write8(0xF0);
2517}
2518void XEmitter::REP() {
2519 Write8(0xF3);
2520}
2521void XEmitter::REPNE() {
2522 Write8(0xF2);
2523}
2524void XEmitter::FSOverride() {
2525 Write8(0x64);
2526}
2527void XEmitter::GSOverride() {
2528 Write8(0x65);
2529}
2530
2531void XEmitter::FWAIT() {
2532 Write8(0x9B);
2533}
2534
2535// TODO: make this more generic
2536void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) {
2537 int mf = 0;
2538 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID),
2539 "WriteFloatLoadStore: 80 bits not supported for this instruction");
2540 switch (bits) {
2541 case 32:
2542 mf = 0;
2543 break;
2544 case 64:
2545 mf = 4;
2546 break;
2547 case 80:
2548 mf = 2;
2549 break;
2550 default:
2551 ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)");
2552 }
2553 Write8(0xd9 | mf);
2554 // x87 instructions use the reg field of the ModR/M byte as opcode:
2555 if (bits == 80)
2556 op = op_80b;
2557 arg.WriteRest(this, 0, (X64Reg)op);
2558}
2559
2560void XEmitter::FLD(int bits, const OpArg& src) {
2561 WriteFloatLoadStore(bits, floatLD, floatLD80, src);
2562}
2563void XEmitter::FST(int bits, const OpArg& dest) {
2564 WriteFloatLoadStore(bits, floatST, floatINVALID, dest);
2565}
2566void XEmitter::FSTP(int bits, const OpArg& dest) {
2567 WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);
2568}
2569void XEmitter::FNSTSW_AX() {
2570 Write8(0xDF);
2571 Write8(0xE0);
2572}
2573
2574void XEmitter::RDTSC() {
2575 Write8(0x0F);
2576 Write8(0x31);
2577}
2578
2579void XCodeBlock::PoisonMemory() {
2580 // x86/64: 0xCC = breakpoint
2581 memset(region, 0xCC, region_size);
2582}
2583}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
deleted file mode 100644
index 7d7cdde16..000000000
--- a/src/common/x64/emitter.h
+++ /dev/null
@@ -1,1206 +0,0 @@
1// Copyright (C) 2003 Dolphin Project.
2
3// This program is free software: you can redistribute it and/or modify
4// it under the terms of the GNU General Public License as published by
5// the Free Software Foundation, version 2.0 or later versions.
6
7// This program is distributed in the hope that it will be useful,
8// but WITHOUT ANY WARRANTY; without even the implied warranty of
9// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10// GNU General Public License 2.0 for more details.
11
12// A copy of the GPL 2.0 should have been included with the program.
13// If not, see http://www.gnu.org/licenses/
14
15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/
17
18#pragma once
19
20#include <cstddef>
21#include "common/assert.h"
22#include "common/bit_set.h"
23#include "common/code_block.h"
24#include "common/common_types.h"
25
26#if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64)
27#define _ARCH_64
28#endif
29
30#ifdef _ARCH_64
31#define PTRBITS 64
32#else
33#define PTRBITS 32
34#endif
35
36namespace Gen {
37
38enum X64Reg {
39 EAX = 0,
40 EBX = 3,
41 ECX = 1,
42 EDX = 2,
43 ESI = 6,
44 EDI = 7,
45 EBP = 5,
46 ESP = 4,
47
48 RAX = 0,
49 RBX = 3,
50 RCX = 1,
51 RDX = 2,
52 RSI = 6,
53 RDI = 7,
54 RBP = 5,
55 RSP = 4,
56 R8 = 8,
57 R9 = 9,
58 R10 = 10,
59 R11 = 11,
60 R12 = 12,
61 R13 = 13,
62 R14 = 14,
63 R15 = 15,
64
65 AL = 0,
66 BL = 3,
67 CL = 1,
68 DL = 2,
69 SIL = 6,
70 DIL = 7,
71 BPL = 5,
72 SPL = 4,
73 AH = 0x104,
74 BH = 0x107,
75 CH = 0x105,
76 DH = 0x106,
77
78 AX = 0,
79 BX = 3,
80 CX = 1,
81 DX = 2,
82 SI = 6,
83 DI = 7,
84 BP = 5,
85 SP = 4,
86
87 XMM0 = 0,
88 XMM1,
89 XMM2,
90 XMM3,
91 XMM4,
92 XMM5,
93 XMM6,
94 XMM7,
95 XMM8,
96 XMM9,
97 XMM10,
98 XMM11,
99 XMM12,
100 XMM13,
101 XMM14,
102 XMM15,
103
104 YMM0 = 0,
105 YMM1,
106 YMM2,
107 YMM3,
108 YMM4,
109 YMM5,
110 YMM6,
111 YMM7,
112 YMM8,
113 YMM9,
114 YMM10,
115 YMM11,
116 YMM12,
117 YMM13,
118 YMM14,
119 YMM15,
120
121 INVALID_REG = 0xFFFFFFFF
122};
123
124enum CCFlags {
125 CC_O = 0,
126 CC_NO = 1,
127 CC_B = 2,
128 CC_C = 2,
129 CC_NAE = 2,
130 CC_NB = 3,
131 CC_NC = 3,
132 CC_AE = 3,
133 CC_Z = 4,
134 CC_E = 4,
135 CC_NZ = 5,
136 CC_NE = 5,
137 CC_BE = 6,
138 CC_NA = 6,
139 CC_NBE = 7,
140 CC_A = 7,
141 CC_S = 8,
142 CC_NS = 9,
143 CC_P = 0xA,
144 CC_PE = 0xA,
145 CC_NP = 0xB,
146 CC_PO = 0xB,
147 CC_L = 0xC,
148 CC_NGE = 0xC,
149 CC_NL = 0xD,
150 CC_GE = 0xD,
151 CC_LE = 0xE,
152 CC_NG = 0xE,
153 CC_NLE = 0xF,
154 CC_G = 0xF
155};
156
157enum {
158 NUMGPRs = 16,
159 NUMXMMs = 16,
160};
161
162enum {
163 SCALE_NONE = 0,
164 SCALE_1 = 1,
165 SCALE_2 = 2,
166 SCALE_4 = 4,
167 SCALE_8 = 8,
168 SCALE_ATREG = 16,
169 // SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG
170 SCALE_NOBASE_2 = 34,
171 SCALE_NOBASE_4 = 36,
172 SCALE_NOBASE_8 = 40,
173 SCALE_RIP = 0xFF,
174 SCALE_IMM8 = 0xF0,
175 SCALE_IMM16 = 0xF1,
176 SCALE_IMM32 = 0xF2,
177 SCALE_IMM64 = 0xF3,
178};
179
180enum NormalOp {
181 nrmADD,
182 nrmADC,
183 nrmSUB,
184 nrmSBB,
185 nrmAND,
186 nrmOR,
187 nrmXOR,
188 nrmMOV,
189 nrmTEST,
190 nrmCMP,
191 nrmXCHG,
192};
193
194enum {
195 CMP_EQ = 0,
196 CMP_LT = 1,
197 CMP_LE = 2,
198 CMP_UNORD = 3,
199 CMP_NEQ = 4,
200 CMP_NLT = 5,
201 CMP_NLE = 6,
202 CMP_ORD = 7,
203};
204
205enum FloatOp {
206 floatLD = 0,
207 floatST = 2,
208 floatSTP = 3,
209 floatLD80 = 5,
210 floatSTP80 = 7,
211
212 floatINVALID = -1,
213};
214
215enum FloatRound {
216 FROUND_NEAREST = 0,
217 FROUND_FLOOR = 1,
218 FROUND_CEIL = 2,
219 FROUND_ZERO = 3,
220 FROUND_MXCSR = 4,
221
222 FROUND_RAISE_PRECISION = 0,
223 FROUND_IGNORE_PRECISION = 8,
224};
225
226class XEmitter;
227
228// RIP addressing does not benefit from micro op fusion on Core arch
229struct OpArg {
230 friend class XEmitter;
231
232 constexpr OpArg() = default; // dummy op arg, used for storage
233 constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX)
234 : scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)),
235 indexReg(static_cast<u16>(scaledReg)), offset(offset_) {}
236
237 constexpr bool operator==(const OpArg& b) const {
238 return operandReg == b.operandReg && scale == b.scale &&
239 offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset;
240 }
241
242 void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const;
243 void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
244 int W = 0) const;
245 void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG,
246 bool warn_64bit_offset = true) const;
247 void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits);
248 void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand,
249 int bits) const;
250
251 constexpr bool IsImm() const {
252 return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 ||
253 scale == SCALE_IMM64;
254 }
255 constexpr bool IsSimpleReg() const {
256 return scale == SCALE_NONE;
257 }
258 constexpr bool IsSimpleReg(X64Reg reg) const {
259 return IsSimpleReg() && GetSimpleReg() == reg;
260 }
261
262 int GetImmBits() const {
263 switch (scale) {
264 case SCALE_IMM8:
265 return 8;
266 case SCALE_IMM16:
267 return 16;
268 case SCALE_IMM32:
269 return 32;
270 case SCALE_IMM64:
271 return 64;
272 default:
273 return -1;
274 }
275 }
276
277 void SetImmBits(int bits) {
278 switch (bits) {
279 case 8:
280 scale = SCALE_IMM8;
281 break;
282 case 16:
283 scale = SCALE_IMM16;
284 break;
285 case 32:
286 scale = SCALE_IMM32;
287 break;
288 case 64:
289 scale = SCALE_IMM64;
290 break;
291 }
292 }
293
294 constexpr X64Reg GetSimpleReg() const {
295 return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG;
296 }
297
298 constexpr u32 GetImmValue() const {
299 return static_cast<u32>(offset);
300 }
301
302 // For loops.
303 void IncreaseOffset(int sz) {
304 offset += sz;
305 }
306
307private:
308 u8 scale = 0;
309 u16 offsetOrBaseReg = 0;
310 u16 indexReg = 0;
311 u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available.
312 u16 operandReg = 0;
313};
314
315template <typename T>
316inline OpArg M(const T* ptr) {
317 return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP));
318}
319constexpr OpArg R(X64Reg value) {
320 return OpArg(0, SCALE_NONE, value);
321}
322constexpr OpArg MatR(X64Reg value) {
323 return OpArg(0, SCALE_ATREG, value);
324}
325
326constexpr OpArg MDisp(X64Reg value, int offset) {
327 return OpArg(static_cast<u32>(offset), SCALE_ATREG, value);
328}
329
330constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) {
331 return OpArg(offset, scale, base, scaled);
332}
333
334constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) {
335 return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled)
336 : OpArg(offset, scale | 0x20, RAX, scaled);
337}
338
339constexpr OpArg MRegSum(X64Reg base, X64Reg offset) {
340 return MComplex(base, offset, 1, 0);
341}
342
343constexpr OpArg Imm8(u8 imm) {
344 return OpArg(imm, SCALE_IMM8);
345}
346constexpr OpArg Imm16(u16 imm) {
347 return OpArg(imm, SCALE_IMM16);
348} // rarely used
349constexpr OpArg Imm32(u32 imm) {
350 return OpArg(imm, SCALE_IMM32);
351}
352constexpr OpArg Imm64(u64 imm) {
353 return OpArg(imm, SCALE_IMM64);
354}
355constexpr OpArg UImmAuto(u32 imm) {
356 return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8);
357}
358constexpr OpArg SImmAuto(s32 imm) {
359 return OpArg(imm, (imm >= 128 || imm < -128) ? SCALE_IMM32 : SCALE_IMM8);
360}
361
362template <typename T>
363OpArg ImmPtr(const T* imm) {
364#ifdef _ARCH_64
365 return Imm64(reinterpret_cast<u64>(imm));
366#else
367 return Imm32(reinterpret_cast<u32>(imm));
368#endif
369}
370
371inline u32 PtrOffset(const void* ptr, const void* base) {
372#ifdef _ARCH_64
373 s64 distance = (s64)ptr - (s64)base;
374 if (distance >= 0x80000000LL || distance < -0x80000000LL) {
375 ASSERT_MSG(0, "pointer offset out of range");
376 return 0;
377 }
378
379 return (u32)distance;
380#else
381 return (u32)ptr - (u32)base;
382#endif
383}
384
385// usage: int a[]; ARRAY_OFFSET(a,10)
386#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0]))
387// usage: struct {int e;} s; STRUCT_OFFSET(s,e)
388#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str)))
389
390struct FixupBranch {
391 u8* ptr;
392 int type; // 0 = 8bit 1 = 32bit
393};
394
395enum SSECompare {
396 EQ = 0,
397 LT,
398 LE,
399 UNORD,
400 NEQ,
401 NLT,
402 NLE,
403 ORD,
404};
405
406class XEmitter {
407 friend struct OpArg; // for Write8 etc
408private:
409 u8* code;
410 bool flags_locked;
411
412 void CheckFlags();
413
414 void Rex(int w, int r, int x, int b);
415 void WriteSimple1Byte(int bits, u8 byte, X64Reg reg);
416 void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
417 void WriteMulDivType(int bits, OpArg src, int ext);
418 void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
419 void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
420 void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
421 void WriteMXCSR(OpArg arg, int ext);
422 void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
423 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
424 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
425 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
426 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
427 int extrabytes = 0);
428 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
429 int extrabytes = 0);
430 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
431 int extrabytes = 0);
432 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
433 int extrabytes = 0);
434 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
435 void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
436
437 void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
438 size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
439
440protected:
441 void Write8(u8 value);
442 void Write16(u16 value);
443 void Write32(u32 value);
444 void Write64(u64 value);
445
446public:
447 XEmitter() {
448 code = nullptr;
449 flags_locked = false;
450 }
451 XEmitter(u8* code_ptr) {
452 code = code_ptr;
453 flags_locked = false;
454 }
455 virtual ~XEmitter() {}
456
457 void WriteModRM(int mod, int rm, int reg);
458 void WriteSIB(int scale, int index, int base);
459
460 void SetCodePtr(u8* ptr);
461 void ReserveCodeSpace(int bytes);
462 const u8* AlignCode4();
463 const u8* AlignCode16();
464 const u8* AlignCodePage();
465 const u8* GetCodePtr() const;
466 u8* GetWritableCodePtr();
467
468 void LockFlags() {
469 flags_locked = true;
470 }
471 void UnlockFlags() {
472 flags_locked = false;
473 }
474
475 // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU
476 // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other
477 // string instr.,
478 // INC and DEC are slow on Intel Core, but not on AMD. They create a
479 // false flag dependency because they only update a subset of the flags.
480 // XCHG is SLOW and should be avoided.
481
482 // Debug breakpoint
483 void INT3();
484
485 // Do nothing
486 void NOP(size_t count = 1);
487
488 // Save energy in wait-loops on P4 only. Probably not too useful.
489 void PAUSE();
490
491 // Flag control
492 void STC();
493 void CLC();
494 void CMC();
495
496 // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and
497 // AMD!
498 void LAHF(); // 3 cycle vector path
499 void SAHF(); // direct path fast
500
501 // Stack control
502 void PUSH(X64Reg reg);
503 void POP(X64Reg reg);
504 void PUSH(int bits, const OpArg& reg);
505 void POP(int bits, const OpArg& reg);
506 void PUSHF();
507 void POPF();
508
509 // Flow control
510 void RET();
511 void RET_FAST();
512 void UD2();
513 FixupBranch J(bool force5bytes = false);
514
515 void JMP(const u8* addr, bool force5Bytes = false);
516 void JMPptr(const OpArg& arg);
517 void JMPself(); // infinite loop!
518#ifdef CALL
519#undef CALL
520#endif
521 void CALL(const void* fnptr);
522 FixupBranch CALL();
523 void CALLptr(OpArg arg);
524
525 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
526 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
527
528 void SetJumpTarget(const FixupBranch& branch);
529 void SetJumpTarget(const FixupBranch& branch, const u8* target);
530
531 void SETcc(CCFlags flag, OpArg dest);
532 // Note: CMOV brings small if any benefit on current cpus.
533 void CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag);
534
535 // Fences
536 void LFENCE();
537 void MFENCE();
538 void SFENCE();
539
540 // Bit scan
541 void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
542 void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
543
544 // Cache control
545 enum PrefetchLevel {
546 PF_NTA, // Non-temporal (data used once and only once)
547 PF_T0, // All cache levels
548 PF_T1, // Levels 2+ (aliased to T0 on AMD)
549 PF_T2, // Levels 3+ (aliased to T0 on AMD)
550 };
551 void PREFETCH(PrefetchLevel level, OpArg arg);
552 void MOVNTI(int bits, const OpArg& dest, X64Reg src);
553 void MOVNTDQ(const OpArg& arg, X64Reg regOp);
554 void MOVNTPS(const OpArg& arg, X64Reg regOp);
555 void MOVNTPD(const OpArg& arg, X64Reg regOp);
556
557 // Multiplication / division
558 void MUL(int bits, const OpArg& src); // UNSIGNED
559 void IMUL(int bits, const OpArg& src); // SIGNED
560 void IMUL(int bits, X64Reg regOp, const OpArg& src);
561 void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
562 void DIV(int bits, const OpArg& src);
563 void IDIV(int bits, const OpArg& src);
564
565 // Shift
566 void ROL(int bits, const OpArg& dest, const OpArg& shift);
567 void ROR(int bits, const OpArg& dest, const OpArg& shift);
568 void RCL(int bits, const OpArg& dest, const OpArg& shift);
569 void RCR(int bits, const OpArg& dest, const OpArg& shift);
570 void SHL(int bits, const OpArg& dest, const OpArg& shift);
571 void SHR(int bits, const OpArg& dest, const OpArg& shift);
572 void SAR(int bits, const OpArg& dest, const OpArg& shift);
573
574 // Bit Test
575 void BT(int bits, const OpArg& dest, const OpArg& index);
576 void BTS(int bits, const OpArg& dest, const OpArg& index);
577 void BTR(int bits, const OpArg& dest, const OpArg& index);
578 void BTC(int bits, const OpArg& dest, const OpArg& index);
579
580 // Double-Precision Shift
581 void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
582 void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
583
584 // Extend EAX into EDX in various ways
585 void CWD(int bits = 16);
586 void CDQ() {
587 CWD(32);
588 }
589 void CQO() {
590 CWD(64);
591 }
592 void CBW(int bits = 8);
593 void CWDE() {
594 CBW(16);
595 }
596 void CDQE() {
597 CBW(32);
598 }
599
600 // Load effective address
601 void LEA(int bits, X64Reg dest, OpArg src);
602
603 // Integer arithmetic
604 void NEG(int bits, const OpArg& src);
605 void ADD(int bits, const OpArg& a1, const OpArg& a2);
606 void ADC(int bits, const OpArg& a1, const OpArg& a2);
607 void SUB(int bits, const OpArg& a1, const OpArg& a2);
608 void SBB(int bits, const OpArg& a1, const OpArg& a2);
609 void AND(int bits, const OpArg& a1, const OpArg& a2);
610 void CMP(int bits, const OpArg& a1, const OpArg& a2);
611
612 // Bit operations
613 void NOT(int bits, const OpArg& src);
614 void OR(int bits, const OpArg& a1, const OpArg& a2);
615 void XOR(int bits, const OpArg& a1, const OpArg& a2);
616 void MOV(int bits, const OpArg& a1, const OpArg& a2);
617 void TEST(int bits, const OpArg& a1, const OpArg& a2);
618
619 // Are these useful at all? Consider removing.
620 void XCHG(int bits, const OpArg& a1, const OpArg& a2);
621 void XCHG_AHAL();
622
623 // Byte swapping (32 and 64-bit only).
624 void BSWAP(int bits, X64Reg reg);
625
626 // Sign/zero extension
627 void MOVSX(int dbits, int sbits, X64Reg dest,
628 OpArg src); // automatically uses MOVSXD if necessary
629 void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src);
630
631 // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe.
632 void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
633
634 // Available only on AMD >= Phenom or Intel >= Haswell
635 void LZCNT(int bits, X64Reg dest, const OpArg& src);
636 // Note: this one is actually part of BMI1
637 void TZCNT(int bits, X64Reg dest, const OpArg& src);
638
639 // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
640 void STMXCSR(const OpArg& memloc);
641 void LDMXCSR(const OpArg& memloc);
642
643 // Prefixes
644 void LOCK();
645 void REP();
646 void REPNE();
647 void FSOverride();
648 void GSOverride();
649
650 // x87
651 enum x87StatusWordBits {
652 x87_InvalidOperation = 0x1,
653 x87_DenormalizedOperand = 0x2,
654 x87_DivisionByZero = 0x4,
655 x87_Overflow = 0x8,
656 x87_Underflow = 0x10,
657 x87_Precision = 0x20,
658 x87_StackFault = 0x40,
659 x87_ErrorSummary = 0x80,
660 x87_C0 = 0x100,
661 x87_C1 = 0x200,
662 x87_C2 = 0x400,
663 x87_TopOfStack = 0x2000 | 0x1000 | 0x800,
664 x87_C3 = 0x4000,
665 x87_FPUBusy = 0x8000,
666 };
667
668 void FLD(int bits, const OpArg& src);
669 void FST(int bits, const OpArg& dest);
670 void FSTP(int bits, const OpArg& dest);
671 void FNSTSW_AX();
672 void FWAIT();
673
674 // SSE/SSE2: Floating point arithmetic
675 void ADDSS(X64Reg regOp, const OpArg& arg);
676 void ADDSD(X64Reg regOp, const OpArg& arg);
677 void SUBSS(X64Reg regOp, const OpArg& arg);
678 void SUBSD(X64Reg regOp, const OpArg& arg);
679 void MULSS(X64Reg regOp, const OpArg& arg);
680 void MULSD(X64Reg regOp, const OpArg& arg);
681 void DIVSS(X64Reg regOp, const OpArg& arg);
682 void DIVSD(X64Reg regOp, const OpArg& arg);
683 void MINSS(X64Reg regOp, const OpArg& arg);
684 void MINSD(X64Reg regOp, const OpArg& arg);
685 void MAXSS(X64Reg regOp, const OpArg& arg);
686 void MAXSD(X64Reg regOp, const OpArg& arg);
687 void SQRTSS(X64Reg regOp, const OpArg& arg);
688 void SQRTSD(X64Reg regOp, const OpArg& arg);
689 void RCPSS(X64Reg regOp, const OpArg& arg);
690 void RSQRTSS(X64Reg regOp, const OpArg& arg);
691
692 // SSE/SSE2: Floating point bitwise (yes)
693 void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
694 void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
695
696 void CMPEQSS(X64Reg regOp, const OpArg& arg) {
697 CMPSS(regOp, arg, CMP_EQ);
698 }
699 void CMPLTSS(X64Reg regOp, const OpArg& arg) {
700 CMPSS(regOp, arg, CMP_LT);
701 }
702 void CMPLESS(X64Reg regOp, const OpArg& arg) {
703 CMPSS(regOp, arg, CMP_LE);
704 }
705 void CMPUNORDSS(X64Reg regOp, const OpArg& arg) {
706 CMPSS(regOp, arg, CMP_UNORD);
707 }
708 void CMPNEQSS(X64Reg regOp, const OpArg& arg) {
709 CMPSS(regOp, arg, CMP_NEQ);
710 }
711 void CMPNLTSS(X64Reg regOp, const OpArg& arg) {
712 CMPSS(regOp, arg, CMP_NLT);
713 }
714 void CMPORDSS(X64Reg regOp, const OpArg& arg) {
715 CMPSS(regOp, arg, CMP_ORD);
716 }
717
718 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
719 void ADDPS(X64Reg regOp, const OpArg& arg);
720 void ADDPD(X64Reg regOp, const OpArg& arg);
721 void SUBPS(X64Reg regOp, const OpArg& arg);
722 void SUBPD(X64Reg regOp, const OpArg& arg);
723 void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
724 void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
725 void MULPS(X64Reg regOp, const OpArg& arg);
726 void MULPD(X64Reg regOp, const OpArg& arg);
727 void DIVPS(X64Reg regOp, const OpArg& arg);
728 void DIVPD(X64Reg regOp, const OpArg& arg);
729 void MINPS(X64Reg regOp, const OpArg& arg);
730 void MINPD(X64Reg regOp, const OpArg& arg);
731 void MAXPS(X64Reg regOp, const OpArg& arg);
732 void MAXPD(X64Reg regOp, const OpArg& arg);
733 void SQRTPS(X64Reg regOp, const OpArg& arg);
734 void SQRTPD(X64Reg regOp, const OpArg& arg);
735 void RCPPS(X64Reg regOp, const OpArg& arg);
736 void RSQRTPS(X64Reg regOp, const OpArg& arg);
737
738 // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
739 void ANDPS(X64Reg regOp, const OpArg& arg);
740 void ANDPD(X64Reg regOp, const OpArg& arg);
741 void ANDNPS(X64Reg regOp, const OpArg& arg);
742 void ANDNPD(X64Reg regOp, const OpArg& arg);
743 void ORPS(X64Reg regOp, const OpArg& arg);
744 void ORPD(X64Reg regOp, const OpArg& arg);
745 void XORPS(X64Reg regOp, const OpArg& arg);
746 void XORPD(X64Reg regOp, const OpArg& arg);
747
748 // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
749 void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
750 void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
751
752 // SSE/SSE2: Useful alternative to shuffle in some cases.
753 void MOVDDUP(X64Reg regOp, const OpArg& arg);
754
755 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily
756 // on Ivy.
757 void HADDPS(X64Reg dest, const OpArg& src);
758
759 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg
760 // contains both a read mask and a write "mask".
761 void DPPS(X64Reg dest, const OpArg& src, u8 arg);
762
763 void UNPCKLPS(X64Reg dest, const OpArg& src);
764 void UNPCKHPS(X64Reg dest, const OpArg& src);
765 void UNPCKLPD(X64Reg dest, const OpArg& src);
766 void UNPCKHPD(X64Reg dest, const OpArg& src);
767
768 // SSE/SSE2: Compares.
769 void COMISS(X64Reg regOp, const OpArg& arg);
770 void COMISD(X64Reg regOp, const OpArg& arg);
771 void UCOMISS(X64Reg regOp, const OpArg& arg);
772 void UCOMISD(X64Reg regOp, const OpArg& arg);
773
774 // SSE/SSE2: Moves. Use the right data type for your data, in most cases.
775 void MOVAPS(X64Reg regOp, const OpArg& arg);
776 void MOVAPD(X64Reg regOp, const OpArg& arg);
777 void MOVAPS(const OpArg& arg, X64Reg regOp);
778 void MOVAPD(const OpArg& arg, X64Reg regOp);
779
780 void MOVUPS(X64Reg regOp, const OpArg& arg);
781 void MOVUPD(X64Reg regOp, const OpArg& arg);
782 void MOVUPS(const OpArg& arg, X64Reg regOp);
783 void MOVUPD(const OpArg& arg, X64Reg regOp);
784
785 void MOVDQA(X64Reg regOp, const OpArg& arg);
786 void MOVDQA(const OpArg& arg, X64Reg regOp);
787 void MOVDQU(X64Reg regOp, const OpArg& arg);
788 void MOVDQU(const OpArg& arg, X64Reg regOp);
789
790 void MOVSS(X64Reg regOp, const OpArg& arg);
791 void MOVSD(X64Reg regOp, const OpArg& arg);
792 void MOVSS(const OpArg& arg, X64Reg regOp);
793 void MOVSD(const OpArg& arg, X64Reg regOp);
794
795 void MOVLPS(X64Reg regOp, const OpArg& arg);
796 void MOVLPD(X64Reg regOp, const OpArg& arg);
797 void MOVLPS(const OpArg& arg, X64Reg regOp);
798 void MOVLPD(const OpArg& arg, X64Reg regOp);
799
800 void MOVHPS(X64Reg regOp, const OpArg& arg);
801 void MOVHPD(X64Reg regOp, const OpArg& arg);
802 void MOVHPS(const OpArg& arg, X64Reg regOp);
803 void MOVHPD(const OpArg& arg, X64Reg regOp);
804
805 void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
806 void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
807
808 void MOVD_xmm(X64Reg dest, const OpArg& arg);
809 void MOVQ_xmm(X64Reg dest, OpArg arg);
810 void MOVD_xmm(const OpArg& arg, X64Reg src);
811 void MOVQ_xmm(OpArg arg, X64Reg src);
812
813 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in
814 // question.
815 void MOVMSKPS(X64Reg dest, const OpArg& arg);
816 void MOVMSKPD(X64Reg dest, const OpArg& arg);
817
818 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a
819 // weird one.
820 void MASKMOVDQU(X64Reg dest, X64Reg src);
821 void LDDQU(X64Reg dest, const OpArg& src);
822
823 // SSE/SSE2: Data type conversions.
824 void CVTPS2PD(X64Reg dest, const OpArg& src);
825 void CVTPD2PS(X64Reg dest, const OpArg& src);
826 void CVTSS2SD(X64Reg dest, const OpArg& src);
827 void CVTSI2SS(X64Reg dest, const OpArg& src);
828 void CVTSD2SS(X64Reg dest, const OpArg& src);
829 void CVTSI2SD(X64Reg dest, const OpArg& src);
830 void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
831 void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
832 void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
833 void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
834
835 void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
836 void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
837
838 // Destinations are X64 regs (rax, rbx, ...) for these instructions.
839 void CVTSS2SI(X64Reg xregdest, const OpArg& src);
840 void CVTSD2SI(X64Reg xregdest, const OpArg& src);
841 void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
842 void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
843
844 // SSE2: Packed integer instructions
845 void PACKSSDW(X64Reg dest, const OpArg& arg);
846 void PACKSSWB(X64Reg dest, const OpArg& arg);
847 void PACKUSDW(X64Reg dest, const OpArg& arg);
848 void PACKUSWB(X64Reg dest, const OpArg& arg);
849
850 void PUNPCKLBW(X64Reg dest, const OpArg& arg);
851 void PUNPCKLWD(X64Reg dest, const OpArg& arg);
852 void PUNPCKLDQ(X64Reg dest, const OpArg& arg);
853 void PUNPCKLQDQ(X64Reg dest, const OpArg& arg);
854
855 void PTEST(X64Reg dest, const OpArg& arg);
856 void PAND(X64Reg dest, const OpArg& arg);
857 void PANDN(X64Reg dest, const OpArg& arg);
858 void PXOR(X64Reg dest, const OpArg& arg);
859 void POR(X64Reg dest, const OpArg& arg);
860
861 void PADDB(X64Reg dest, const OpArg& arg);
862 void PADDW(X64Reg dest, const OpArg& arg);
863 void PADDD(X64Reg dest, const OpArg& arg);
864 void PADDQ(X64Reg dest, const OpArg& arg);
865
866 void PADDSB(X64Reg dest, const OpArg& arg);
867 void PADDSW(X64Reg dest, const OpArg& arg);
868 void PADDUSB(X64Reg dest, const OpArg& arg);
869 void PADDUSW(X64Reg dest, const OpArg& arg);
870
871 void PSUBB(X64Reg dest, const OpArg& arg);
872 void PSUBW(X64Reg dest, const OpArg& arg);
873 void PSUBD(X64Reg dest, const OpArg& arg);
874 void PSUBQ(X64Reg dest, const OpArg& arg);
875
876 void PSUBSB(X64Reg dest, const OpArg& arg);
877 void PSUBSW(X64Reg dest, const OpArg& arg);
878 void PSUBUSB(X64Reg dest, const OpArg& arg);
879 void PSUBUSW(X64Reg dest, const OpArg& arg);
880
881 void PAVGB(X64Reg dest, const OpArg& arg);
882 void PAVGW(X64Reg dest, const OpArg& arg);
883
884 void PCMPEQB(X64Reg dest, const OpArg& arg);
885 void PCMPEQW(X64Reg dest, const OpArg& arg);
886 void PCMPEQD(X64Reg dest, const OpArg& arg);
887
888 void PCMPGTB(X64Reg dest, const OpArg& arg);
889 void PCMPGTW(X64Reg dest, const OpArg& arg);
890 void PCMPGTD(X64Reg dest, const OpArg& arg);
891
892 void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
893 void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
894
895 void PMADDWD(X64Reg dest, const OpArg& arg);
896 void PSADBW(X64Reg dest, const OpArg& arg);
897
898 void PMAXSW(X64Reg dest, const OpArg& arg);
899 void PMAXUB(X64Reg dest, const OpArg& arg);
900 void PMINSW(X64Reg dest, const OpArg& arg);
901 void PMINUB(X64Reg dest, const OpArg& arg);
902 // SSE4: More MAX/MIN instructions.
903 void PMINSB(X64Reg dest, const OpArg& arg);
904 void PMINSD(X64Reg dest, const OpArg& arg);
905 void PMINUW(X64Reg dest, const OpArg& arg);
906 void PMINUD(X64Reg dest, const OpArg& arg);
907 void PMAXSB(X64Reg dest, const OpArg& arg);
908 void PMAXSD(X64Reg dest, const OpArg& arg);
909 void PMAXUW(X64Reg dest, const OpArg& arg);
910 void PMAXUD(X64Reg dest, const OpArg& arg);
911
912 void PMOVMSKB(X64Reg dest, const OpArg& arg);
913 void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
914 void PSHUFB(X64Reg dest, const OpArg& arg);
915
916 void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
917 void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
918
919 void PSRLW(X64Reg reg, int shift);
920 void PSRLD(X64Reg reg, int shift);
921 void PSRLQ(X64Reg reg, int shift);
922 void PSRLQ(X64Reg reg, const OpArg& arg);
923 void PSRLDQ(X64Reg reg, int shift);
924
925 void PSLLW(X64Reg reg, int shift);
926 void PSLLD(X64Reg reg, int shift);
927 void PSLLQ(X64Reg reg, int shift);
928 void PSLLDQ(X64Reg reg, int shift);
929
930 void PSRAW(X64Reg reg, int shift);
931 void PSRAD(X64Reg reg, int shift);
932
933 // SSE4: data type conversions
934 void PMOVSXBW(X64Reg dest, const OpArg& arg);
935 void PMOVSXBD(X64Reg dest, const OpArg& arg);
936 void PMOVSXBQ(X64Reg dest, const OpArg& arg);
937 void PMOVSXWD(X64Reg dest, const OpArg& arg);
938 void PMOVSXWQ(X64Reg dest, const OpArg& arg);
939 void PMOVSXDQ(X64Reg dest, const OpArg& arg);
940 void PMOVZXBW(X64Reg dest, const OpArg& arg);
941 void PMOVZXBD(X64Reg dest, const OpArg& arg);
942 void PMOVZXBQ(X64Reg dest, const OpArg& arg);
943 void PMOVZXWD(X64Reg dest, const OpArg& arg);
944 void PMOVZXWQ(X64Reg dest, const OpArg& arg);
945 void PMOVZXDQ(X64Reg dest, const OpArg& arg);
946
947 // SSE4: variable blend instructions (xmm0 implicit argument)
948 void PBLENDVB(X64Reg dest, const OpArg& arg);
949 void BLENDVPS(X64Reg dest, const OpArg& arg);
950 void BLENDVPD(X64Reg dest, const OpArg& arg);
951 void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
952 void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
953
954 // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
955 void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
956 void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
957 void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
958 void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
959
960 void ROUNDNEARSS(X64Reg dest, const OpArg& arg) {
961 ROUNDSS(dest, arg, FROUND_NEAREST);
962 }
963 void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) {
964 ROUNDSS(dest, arg, FROUND_FLOOR);
965 }
966 void ROUNDCEILSS(X64Reg dest, const OpArg& arg) {
967 ROUNDSS(dest, arg, FROUND_CEIL);
968 }
969 void ROUNDZEROSS(X64Reg dest, const OpArg& arg) {
970 ROUNDSS(dest, arg, FROUND_ZERO);
971 }
972
973 void ROUNDNEARSD(X64Reg dest, const OpArg& arg) {
974 ROUNDSD(dest, arg, FROUND_NEAREST);
975 }
976 void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) {
977 ROUNDSD(dest, arg, FROUND_FLOOR);
978 }
979 void ROUNDCEILSD(X64Reg dest, const OpArg& arg) {
980 ROUNDSD(dest, arg, FROUND_CEIL);
981 }
982 void ROUNDZEROSD(X64Reg dest, const OpArg& arg) {
983 ROUNDSD(dest, arg, FROUND_ZERO);
984 }
985
986 void ROUNDNEARPS(X64Reg dest, const OpArg& arg) {
987 ROUNDPS(dest, arg, FROUND_NEAREST);
988 }
989 void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) {
990 ROUNDPS(dest, arg, FROUND_FLOOR);
991 }
992 void ROUNDCEILPS(X64Reg dest, const OpArg& arg) {
993 ROUNDPS(dest, arg, FROUND_CEIL);
994 }
995 void ROUNDZEROPS(X64Reg dest, const OpArg& arg) {
996 ROUNDPS(dest, arg, FROUND_ZERO);
997 }
998
999 void ROUNDNEARPD(X64Reg dest, const OpArg& arg) {
1000 ROUNDPD(dest, arg, FROUND_NEAREST);
1001 }
1002 void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) {
1003 ROUNDPD(dest, arg, FROUND_FLOOR);
1004 }
1005 void ROUNDCEILPD(X64Reg dest, const OpArg& arg) {
1006 ROUNDPD(dest, arg, FROUND_CEIL);
1007 }
1008 void ROUNDZEROPD(X64Reg dest, const OpArg& arg) {
1009 ROUNDPD(dest, arg, FROUND_ZERO);
1010 }
1011
1012 // AVX
1013 void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1014 void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1015 void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1016 void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1017 void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1018 void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1019 void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1020 void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1021 void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1022 void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
1023 void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1024 void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1025
1026 void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1027 void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1028 void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1029 void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1030 void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1031 void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1032 void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1033 void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1034
1035 void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1036 void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1037 void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1038 void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1039
1040 // FMA3
1041 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1042 void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1043 void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1044 void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1045 void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1046 void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1047 void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1048 void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1049 void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1050 void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1051 void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1052 void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1053 void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1054 void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1055 void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1056 void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1057 void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1058 void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1059 void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1060 void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1061 void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1062 void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1063 void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1064 void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1065 void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1066 void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1067 void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1068 void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1069 void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1070 void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1071 void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1072 void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1073 void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1074 void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1075 void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1076 void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1077 void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1078 void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1079 void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1080 void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1081 void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1082 void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1083 void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1084 void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1085 void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1086 void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1087 void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1088 void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1089 void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1090 void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1091 void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1092 void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1093 void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1094 void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1095 void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1096 void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1097 void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1098 void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1099 void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1100 void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1101
1102 // VEX GPR instructions
1103 void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1104 void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1105 void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1106 void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
1107 void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1108 void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1109 void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1110 void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1111 void BLSR(int bits, X64Reg regOp, const OpArg& arg);
1112 void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
1113 void BLSI(int bits, X64Reg regOp, const OpArg& arg);
1114 void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
1115 void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
1116
1117 void RDTSC();
1118
1119 // Utility functions
1120 // The difference between this and CALL is that this aligns the stack
1121 // where appropriate.
1122 void ABI_CallFunction(const void* func);
1123 template <typename T>
1124 void ABI_CallFunction(T (*func)()) {
1125 ABI_CallFunction((const void*)func);
1126 }
1127
1128 void ABI_CallFunction(const u8* func) {
1129 ABI_CallFunction((const void*)func);
1130 }
1131 void ABI_CallFunctionC16(const void* func, u16 param1);
1132 void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
1133
1134 // These only support u32 parameters, but that's enough for a lot of uses.
1135 // These will destroy the 1 or 2 first "parameter regs".
1136 void ABI_CallFunctionC(const void* func, u32 param1);
1137 void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
1138 void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
1139 void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
1140 void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
1141 void ABI_CallFunctionP(const void* func, void* param1);
1142 void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
1143 void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
1144 void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
1145 void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
1146 void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
1147 void ABI_CallFunctionA(const void* func, const OpArg& arg1);
1148 void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
1149
1150 // Pass a register as a parameter.
1151 void ABI_CallFunctionR(const void* func, X64Reg reg1);
1152 void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
1153
1154 template <typename Tr, typename T1>
1155 void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
1156 ABI_CallFunctionC((const void*)func, param1);
1157 }
1158
1159 /**
1160 * Saves specified registers and adjusts the stack to be 16-byte aligned as required by the ABI
1161 *
1162 * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs)
1163 * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8
1164 * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the
1165 * stack
1166 * @return Size of the shadow space, i.e., offset of the frame
1167 */
1168 size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
1169 size_t needed_frame_size = 0);
1170
1171 /**
1172 * Restores specified registers and adjusts the stack to its original alignment, i.e., the
1173 * alignment before
1174 * the matching PushRegistersAndAdjustStack.
1175 *
1176 * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are
1177 * GPRs)
1178 * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must
1179 * be 0 or 8
1180 * @param needed_frame_size Additional space that was needed
1181 * @warning Stack must be currently 16-byte aligned
1182 */
1183 void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
1184 size_t needed_frame_size = 0);
1185
1186#ifdef _M_IX86
1187 static int ABI_GetNumXMMRegs() {
1188 return 8;
1189 }
1190#else
1191 static int ABI_GetNumXMMRegs() {
1192 return 16;
1193 }
1194#endif
1195}; // class XEmitter
1196
1197// Everything that needs to generate X86 code should inherit from this.
1198// You get memory management for free, plus, you can use all the MOV etc functions without
1199// having to prefix them with gen-> or something similar.
1200
1201class XCodeBlock : public CodeBlock<XEmitter> {
1202public:
1203 void PoisonMemory() override;
1204};
1205
1206} // namespace