summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra_qt/main.cpp26
-rw-r--r--src/citra_qt/main.h18
-rw-r--r--src/common/common_funcs.h12
-rw-r--r--src/common/file_util.h2
-rw-r--r--src/common/logging/log.h15
-rw-r--r--src/common/x64/emitter.cpp770
-rw-r--r--src/common/x64/emitter.h850
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/arm/skyeye_common/armstate.cpp1
-rw-r--r--src/core/arm/skyeye_common/armsupp.cpp1
-rw-r--r--src/core/hle/config_mem.cpp7
-rw-r--r--src/core/hle/config_mem.h1
-rw-r--r--src/core/hle/function_wrappers.h8
-rw-r--r--src/core/hle/hle.cpp4
-rw-r--r--src/core/hle/kernel/kernel.cpp19
-rw-r--r--src/core/hle/kernel/memory.cpp136
-rw-r--r--src/core/hle/kernel/memory.h35
-rw-r--r--src/core/hle/kernel/process.cpp152
-rw-r--r--src/core/hle/kernel/process.h39
-rw-r--r--src/core/hle/kernel/resource_limit.cpp1
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/core/hle/kernel/vm_manager.cpp118
-rw-r--r--src/core/hle/kernel/vm_manager.h38
-rw-r--r--src/core/hle/service/apt/apt.cpp21
-rw-r--r--src/core/hle/service/gsp_gpu.cpp26
-rw-r--r--src/core/hle/service/gsp_gpu.h11
-rw-r--r--src/core/hle/service/y2r_u.cpp1
-rw-r--r--src/core/hle/shared_page.cpp3
-rw-r--r--src/core/hle/shared_page.h1
-rw-r--r--src/core/hle/svc.cpp158
-rw-r--r--src/core/hw/gpu.cpp69
-rw-r--r--src/core/hw/gpu.h32
-rw-r--r--src/core/mem_map.cpp163
-rw-r--r--src/core/mem_map.h46
-rw-r--r--src/core/memory.cpp40
-rw-r--r--src/core/memory.h22
-rw-r--r--src/core/memory_setup.h3
-rw-r--r--src/core/system.cpp3
-rw-r--r--src/video_core/shader/shader_interpreter.cpp69
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp151
-rw-r--r--src/video_core/shader/shader_jit_x64.h9
41 files changed, 1868 insertions, 1221 deletions
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index a1a4865bd..8bf2a3e13 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -287,6 +287,17 @@ void GMainWindow::ShutdownGame() {
287 render_window->hide(); 287 render_window->hide();
288} 288}
289 289
290void GMainWindow::StoreRecentFile(const QString& filename)
291{
292 QSettings settings;
293 QStringList recent_files = settings.value("recentFiles").toStringList();
294 recent_files.prepend(filename);
295 recent_files.removeDuplicates();
296 settings.setValue("recentFiles", recent_files);
297
298 UpdateRecentFiles();
299}
300
290void GMainWindow::UpdateRecentFiles() { 301void GMainWindow::UpdateRecentFiles() {
291 QSettings settings; 302 QSettings settings;
292 QStringList recent_files = settings.value("recentFiles").toStringList(); 303 QStringList recent_files = settings.value("recentFiles").toStringList();
@@ -297,6 +308,7 @@ void GMainWindow::UpdateRecentFiles() {
297 QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName()); 308 QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName());
298 actions_recent_files[i]->setText(text); 309 actions_recent_files[i]->setText(text);
299 actions_recent_files[i]->setData(recent_files[i]); 310 actions_recent_files[i]->setData(recent_files[i]);
311 actions_recent_files[i]->setToolTip(recent_files[i]);
300 actions_recent_files[i]->setVisible(true); 312 actions_recent_files[i]->setVisible(true);
301 } 313 }
302 314
@@ -319,11 +331,7 @@ void GMainWindow::OnMenuLoadFile() {
319 QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)")); 331 QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)"));
320 if (filename.size()) { 332 if (filename.size()) {
321 settings.setValue("romsPath", QFileInfo(filename).path()); 333 settings.setValue("romsPath", QFileInfo(filename).path());
322 // Update recent files list 334 StoreRecentFile(filename);
323 QStringList recent_files = settings.value("recentFiles").toStringList();
324 recent_files.prepend(filename);
325 settings.setValue("recentFiles", recent_files);
326 UpdateRecentFiles(); // Update UI
327 335
328 BootGame(filename.toLatin1().data()); 336 BootGame(filename.toLatin1().data());
329 } 337 }
@@ -349,6 +357,7 @@ void GMainWindow::OnMenuRecentFile() {
349 QFileInfo file_info(filename); 357 QFileInfo file_info(filename);
350 if (file_info.exists()) { 358 if (file_info.exists()) {
351 BootGame(filename.toLatin1().data()); 359 BootGame(filename.toLatin1().data());
360 StoreRecentFile(filename); // Put the filename on top of the list
352 } else { 361 } else {
353 // Display an error message and remove the file from the list. 362 // Display an error message and remove the file from the list.
354 QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename)); 363 QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename));
@@ -357,12 +366,7 @@ void GMainWindow::OnMenuRecentFile() {
357 QStringList recent_files = settings.value("recentFiles").toStringList(); 366 QStringList recent_files = settings.value("recentFiles").toStringList();
358 recent_files.removeOne(filename); 367 recent_files.removeOne(filename);
359 settings.setValue("recentFiles", recent_files); 368 settings.setValue("recentFiles", recent_files);
360 369 UpdateRecentFiles();
361 action->setVisible(false);
362 // Grey out the recent files menu if the list is empty
363 if (ui.menu_recent_files->isEmpty()) {
364 ui.menu_recent_files->setEnabled(false);
365 }
366 } 370 }
367} 371}
368 372
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index 4b260ae8b..6f1292295 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -60,6 +60,24 @@ private:
60 void BootGame(const std::string& filename); 60 void BootGame(const std::string& filename);
61 void ShutdownGame(); 61 void ShutdownGame();
62 62
63 /**
64 * Stores the filename in the recently loaded files list.
65 * The new filename is stored at the beginning of the recently loaded files list.
66 * After inserting the new entry, duplicates are removed meaning that if
67 * this was inserted from \a OnMenuRecentFile(), the entry will be put on top
68 * and remove from its previous position.
69 *
70 * Finally, this function calls \a UpdateRecentFiles() to update the UI.
71 *
72 * @param filename the filename to store
73 */
74 void StoreRecentFile(const QString& filename);
75
76 /**
77 * Updates the recent files menu.
78 * Menu entries are rebuilt from the configuration file.
79 * If there is no entry in the menu, the menu is greyed out.
80 */
63 void UpdateRecentFiles(); 81 void UpdateRecentFiles();
64 82
65 void closeEvent(QCloseEvent* event) override; 83 void closeEvent(QCloseEvent* event) override;
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 88e452a16..ed20c3629 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -45,14 +45,20 @@
45 45
46// GCC 4.8 defines all the rotate functions now 46// GCC 4.8 defines all the rotate functions now
47// Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit 47// Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit
48#ifndef _rotl 48#ifdef _rotl
49inline u32 _rotl(u32 x, int shift) { 49#define rotl _rotl
50#else
51inline u32 rotl(u32 x, int shift) {
50 shift &= 31; 52 shift &= 31;
51 if (!shift) return x; 53 if (!shift) return x;
52 return (x << shift) | (x >> (32 - shift)); 54 return (x << shift) | (x >> (32 - shift));
53} 55}
56#endif
54 57
55inline u32 _rotr(u32 x, int shift) { 58#ifdef _rotr
59#define rotr _rotr
60#else
61inline u32 rotr(u32 x, int shift) {
56 shift &= 31; 62 shift &= 31;
57 if (!shift) return x; 63 if (!shift) return x;
58 return (x >> shift) | (x << (32 - shift)); 64 return (x >> shift) | (x << (32 - shift));
diff --git a/src/common/file_util.h b/src/common/file_util.h
index d0dccdf69..e71a9b2fa 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -244,7 +244,7 @@ private:
244template <typename T> 244template <typename T>
245void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) 245void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode)
246{ 246{
247#ifdef _WIN32 247#ifdef _MSC_VER
248 fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode); 248 fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode);
249#else 249#else
250 fstream.open(filename.c_str(), openmode); 250 fstream.open(filename.c_str(), openmode);
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index e16dde7fc..5fd3bd7f5 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -91,17 +91,16 @@ void LogMessage(Class log_class, Level log_level,
91} // namespace Log 91} // namespace Log
92 92
93#define LOG_GENERIC(log_class, log_level, ...) \ 93#define LOG_GENERIC(log_class, log_level, ...) \
94 ::Log::LogMessage(::Log::Class::log_class, ::Log::Level::log_level, \ 94 ::Log::LogMessage(log_class, log_level, __FILE__, __LINE__, __func__, __VA_ARGS__)
95 __FILE__, __LINE__, __func__, __VA_ARGS__)
96 95
97#ifdef _DEBUG 96#ifdef _DEBUG
98#define LOG_TRACE( log_class, ...) LOG_GENERIC(log_class, Trace, __VA_ARGS__) 97#define LOG_TRACE( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Trace, __VA_ARGS__)
99#else 98#else
100#define LOG_TRACE( log_class, ...) (void(0)) 99#define LOG_TRACE( log_class, ...) (void(0))
101#endif 100#endif
102 101
103#define LOG_DEBUG( log_class, ...) LOG_GENERIC(log_class, Debug, __VA_ARGS__) 102#define LOG_DEBUG( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Debug, __VA_ARGS__)
104#define LOG_INFO( log_class, ...) LOG_GENERIC(log_class, Info, __VA_ARGS__) 103#define LOG_INFO( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Info, __VA_ARGS__)
105#define LOG_WARNING( log_class, ...) LOG_GENERIC(log_class, Warning, __VA_ARGS__) 104#define LOG_WARNING( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Warning, __VA_ARGS__)
106#define LOG_ERROR( log_class, ...) LOG_GENERIC(log_class, Error, __VA_ARGS__) 105#define LOG_ERROR( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Error, __VA_ARGS__)
107#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(log_class, Critical, __VA_ARGS__) 106#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Critical, __VA_ARGS__)
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 4b79acd1f..939df210e 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -15,6 +15,7 @@
15// Official SVN repository and contact information can be found at 15// Official SVN repository and contact information can be found at
16// http://code.google.com/p/dolphin-emu/ 16// http://code.google.com/p/dolphin-emu/
17 17
18#include <cinttypes>
18#include <cstring> 19#include <cstring>
19 20
20#include "common/assert.h" 21#include "common/assert.h"
@@ -25,11 +26,6 @@
25#include "cpu_detect.h" 26#include "cpu_detect.h"
26#include "emitter.h" 27#include "emitter.h"
27 28
28#define PRIx64 "llx"
29
30// Minimize the diff against Dolphin
31#define DYNA_REC JIT
32
33namespace Gen 29namespace Gen
34{ 30{
35 31
@@ -113,6 +109,29 @@ u8 *XEmitter::GetWritableCodePtr()
113 return code; 109 return code;
114} 110}
115 111
112void XEmitter::Write8(u8 value)
113{
114 *code++ = value;
115}
116
117void XEmitter::Write16(u16 value)
118{
119 std::memcpy(code, &value, sizeof(u16));
120 code += sizeof(u16);
121}
122
123void XEmitter::Write32(u32 value)
124{
125 std::memcpy(code, &value, sizeof(u32));
126 code += sizeof(u32);
127}
128
129void XEmitter::Write64(u64 value)
130{
131 std::memcpy(code, &value, sizeof(u64));
132 code += sizeof(u64);
133}
134
116void XEmitter::ReserveCodeSpace(int bytes) 135void XEmitter::ReserveCodeSpace(int bytes)
117{ 136{
118 for (int i = 0; i < bytes; i++) 137 for (int i = 0; i < bytes; i++)
@@ -374,7 +393,7 @@ void XEmitter::Rex(int w, int r, int x, int b)
374 Write8(rx); 393 Write8(rx);
375} 394}
376 395
377void XEmitter::JMP(const u8 *addr, bool force5Bytes) 396void XEmitter::JMP(const u8* addr, bool force5Bytes)
378{ 397{
379 u64 fn = (u64)addr; 398 u64 fn = (u64)addr;
380 if (!force5Bytes) 399 if (!force5Bytes)
@@ -398,7 +417,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes)
398 } 417 }
399} 418}
400 419
401void XEmitter::JMPptr(const OpArg &arg2) 420void XEmitter::JMPptr(const OpArg& arg2)
402{ 421{
403 OpArg arg = arg2; 422 OpArg arg = arg2;
404 if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); 423 if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
@@ -425,7 +444,7 @@ void XEmitter::CALLptr(OpArg arg)
425 arg.WriteRest(this); 444 arg.WriteRest(this);
426} 445}
427 446
428void XEmitter::CALL(const void *fnptr) 447void XEmitter::CALL(const void* fnptr)
429{ 448{
430 u64 distance = u64(fnptr) - (u64(code) + 5); 449 u64 distance = u64(fnptr) - (u64(code) + 5);
431 ASSERT_MSG( 450 ASSERT_MSG(
@@ -496,7 +515,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
496 } 515 }
497} 516}
498 517
499void XEmitter::SetJumpTarget(const FixupBranch &branch) 518void XEmitter::SetJumpTarget(const FixupBranch& branch)
500{ 519{
501 if (branch.type == 0) 520 if (branch.type == 0)
502 { 521 {
@@ -512,30 +531,6 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch)
512 } 531 }
513} 532}
514 533
515// INC/DEC considered harmful on newer CPUs due to partial flag set.
516// Use ADD, SUB instead.
517
518/*
519void XEmitter::INC(int bits, OpArg arg)
520{
521 if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
522 arg.operandReg = 0;
523 if (bits == 16) {Write8(0x66);}
524 arg.WriteRex(this, bits, bits);
525 Write8(bits == 8 ? 0xFE : 0xFF);
526 arg.WriteRest(this);
527}
528void XEmitter::DEC(int bits, OpArg arg)
529{
530 if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
531 arg.operandReg = 1;
532 if (bits == 16) {Write8(0x66);}
533 arg.WriteRex(this, bits, bits);
534 Write8(bits == 8 ? 0xFE : 0xFF);
535 arg.WriteRest(this);
536}
537*/
538
539//Single byte opcodes 534//Single byte opcodes
540//There is no PUSHAD/POPAD in 64-bit mode. 535//There is no PUSHAD/POPAD in 64-bit mode.
541void XEmitter::INT3() {Write8(0xCC);} 536void XEmitter::INT3() {Write8(0xCC);}
@@ -667,7 +662,7 @@ void XEmitter::CBW(int bits)
667void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} 662void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
668void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} 663void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);}
669 664
670void XEmitter::PUSH(int bits, const OpArg &reg) 665void XEmitter::PUSH(int bits, const OpArg& reg)
671{ 666{
672 if (reg.IsSimpleReg()) 667 if (reg.IsSimpleReg())
673 PUSH(reg.GetSimpleReg()); 668 PUSH(reg.GetSimpleReg());
@@ -703,7 +698,7 @@ void XEmitter::PUSH(int bits, const OpArg &reg)
703 } 698 }
704} 699}
705 700
706void XEmitter::POP(int /*bits*/, const OpArg &reg) 701void XEmitter::POP(int /*bits*/, const OpArg& reg)
707{ 702{
708 if (reg.IsSimpleReg()) 703 if (reg.IsSimpleReg())
709 POP(reg.GetSimpleReg()); 704 POP(reg.GetSimpleReg());
@@ -791,12 +786,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
791 src.WriteRest(this); 786 src.WriteRest(this);
792} 787}
793 788
794void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);} 789void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);}
795void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);} 790void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);}
796void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} 791void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);}
797void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} 792void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);}
798void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} 793void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);}
799void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} 794void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);}
800 795
801void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) 796void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
802{ 797{
@@ -813,24 +808,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo
813 src.WriteRest(this); 808 src.WriteRest(this);
814} 809}
815 810
816void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) 811void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src)
817{ 812{
818 if (bits <= 16) 813 if (bits <= 16)
819 ASSERT_MSG(0, "MOVNTI - bits<=16"); 814 ASSERT_MSG(0, "MOVNTI - bits<=16");
820 WriteBitSearchType(bits, src, dest, 0xC3); 815 WriteBitSearchType(bits, src, dest, 0xC3);
821} 816}
822 817
823void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit 818void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit
824void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit 819void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit
825 820
826void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) 821void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src)
827{ 822{
828 CheckFlags(); 823 CheckFlags();
829 if (!Common::GetCPUCaps().bmi1) 824 if (!Common::GetCPUCaps().bmi1)
830 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); 825 ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
831 WriteBitSearchType(bits, dest, src, 0xBC, true); 826 WriteBitSearchType(bits, dest, src, 0xBC, true);
832} 827}
833void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) 828void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src)
834{ 829{
835 CheckFlags(); 830 CheckFlags();
836 if (!Common::GetCPUCaps().lzcnt) 831 if (!Common::GetCPUCaps().lzcnt)
@@ -950,7 +945,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
950} 945}
951 946
952//shift can be either imm8 or cl 947//shift can be either imm8 or cl
953void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) 948void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext)
954{ 949{
955 CheckFlags(); 950 CheckFlags();
956 bool writeImm = false; 951 bool writeImm = false;
@@ -991,16 +986,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
991 986
992// large rotates and shift are slower on intel than amd 987// large rotates and shift are slower on intel than amd
993// intel likes to rotate by 1, and the op is smaller too 988// intel likes to rotate by 1, and the op is smaller too
994void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} 989void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);}
995void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} 990void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);}
996void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} 991void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);}
997void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} 992void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);}
998void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} 993void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);}
999void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} 994void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);}
1000void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} 995void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);}
1001 996
1002// index can be either imm8 or register, don't use memory destination because it's slow 997// index can be either imm8 or register, don't use memory destination because it's slow
1003void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) 998void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext)
1004{ 999{
1005 CheckFlags(); 1000 CheckFlags();
1006 if (dest.IsImm()) 1001 if (dest.IsImm())
@@ -1029,13 +1024,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
1029 } 1024 }
1030} 1025}
1031 1026
1032void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);} 1027void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);}
1033void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} 1028void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);}
1034void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} 1029void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);}
1035void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} 1030void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);}
1036 1031
1037//shift can be either imm8 or cl 1032//shift can be either imm8 or cl
1038void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) 1033void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
1039{ 1034{
1040 CheckFlags(); 1035 CheckFlags();
1041 if (dest.IsImm()) 1036 if (dest.IsImm())
@@ -1067,7 +1062,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
1067 } 1062 }
1068} 1063}
1069 1064
1070void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) 1065void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
1071{ 1066{
1072 CheckFlags(); 1067 CheckFlags();
1073 if (dest.IsImm()) 1068 if (dest.IsImm())
@@ -1111,7 +1106,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit
1111} 1106}
1112 1107
1113//operand can either be immediate or register 1108//operand can either be immediate or register
1114void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const 1109void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const
1115{ 1110{
1116 X64Reg _operandReg; 1111 X64Reg _operandReg;
1117 if (IsImm()) 1112 if (IsImm())
@@ -1257,7 +1252,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o
1257 } 1252 }
1258} 1253}
1259 1254
1260void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2) 1255void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2)
1261{ 1256{
1262 if (a1.IsImm()) 1257 if (a1.IsImm())
1263 { 1258 {
@@ -1283,24 +1278,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg
1283 } 1278 }
1284} 1279}
1285 1280
1286void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} 1281void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
1287void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} 1282void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
1288void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} 1283void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
1289void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} 1284void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
1290void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} 1285void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
1291void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} 1286void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
1292void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} 1287void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
1293void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) 1288void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2)
1294{ 1289{
1295 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) 1290 if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
1296 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); 1291 LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
1297 WriteNormalOp(this, bits, nrmMOV, a1, a2); 1292 WriteNormalOp(this, bits, nrmMOV, a1, a2);
1298} 1293}
1299void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} 1294void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
1300void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} 1295void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
1301void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} 1296void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
1302 1297
1303void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) 1298void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
1304{ 1299{
1305 CheckFlags(); 1300 CheckFlags();
1306 if (bits == 8) 1301 if (bits == 8)
@@ -1353,7 +1348,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
1353 } 1348 }
1354} 1349}
1355 1350
1356void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) 1351void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a)
1357{ 1352{
1358 CheckFlags(); 1353 CheckFlags();
1359 if (bits == 8) 1354 if (bits == 8)
@@ -1390,7 +1385,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr
1390 arg.WriteRest(this, extrabytes); 1385 arg.WriteRest(this, extrabytes);
1391} 1386}
1392 1387
1393void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) 1388void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
1394{ 1389{
1395 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); 1390 WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
1396} 1391}
@@ -1400,25 +1395,25 @@ static int GetVEXmmmmm(u16 op)
1400 // Currently, only 0x38 and 0x3A are used as secondary escape byte. 1395 // Currently, only 0x38 and 0x3A are used as secondary escape byte.
1401 if ((op >> 8) == 0x3A) 1396 if ((op >> 8) == 0x3A)
1402 return 3; 1397 return 3;
1403 else if ((op >> 8) == 0x38) 1398 if ((op >> 8) == 0x38)
1404 return 2; 1399 return 2;
1405 else 1400
1406 return 1; 1401 return 1;
1407} 1402}
1408 1403
1409static int GetVEXpp(u8 opPrefix) 1404static int GetVEXpp(u8 opPrefix)
1410{ 1405{
1411 if (opPrefix == 0x66) 1406 if (opPrefix == 0x66)
1412 return 1; 1407 return 1;
1413 else if (opPrefix == 0xF3) 1408 if (opPrefix == 0xF3)
1414 return 2; 1409 return 2;
1415 else if (opPrefix == 0xF2) 1410 if (opPrefix == 0xF2)
1416 return 3; 1411 return 3;
1417 else 1412
1418 return 0; 1413 return 0;
1419} 1414}
1420 1415
1421void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) 1416void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
1422{ 1417{
1423 if (!Common::GetCPUCaps().avx) 1418 if (!Common::GetCPUCaps().avx)
1424 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); 1419 ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
@@ -1431,7 +1426,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA
1431} 1426}
1432 1427
1433// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 1428// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
1434void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) 1429void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
1435{ 1430{
1436 if (size != 32 && size != 64) 1431 if (size != 32 && size != 64)
1437 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); 1432 ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
@@ -1442,7 +1437,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r
1442 arg.WriteRest(this, extrabytes, regOp1); 1437 arg.WriteRest(this, extrabytes, regOp1);
1443} 1438}
1444 1439
1445void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) 1440void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
1446{ 1441{
1447 CheckFlags(); 1442 CheckFlags();
1448 if (!Common::GetCPUCaps().bmi1) 1443 if (!Common::GetCPUCaps().bmi1)
@@ -1450,7 +1445,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg
1450 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); 1445 WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
1451} 1446}
1452 1447
1453void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) 1448void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
1454{ 1449{
1455 CheckFlags(); 1450 CheckFlags();
1456 if (!Common::GetCPUCaps().bmi2) 1451 if (!Common::GetCPUCaps().bmi2)
@@ -1517,135 +1512,136 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext)
1517 arg.WriteRest(this); 1512 arg.WriteRest(this);
1518} 1513}
1519 1514
1520void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} 1515void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);}
1521void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} 1516void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);}
1522 1517
1523void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} 1518void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
1524void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} 1519void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
1525void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} 1520void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
1526 1521
1527void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} 1522void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
1528void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} 1523void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
1529void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} 1524void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
1530void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} 1525void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
1531void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} 1526void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
1532void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} 1527void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
1533void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} 1528void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
1534void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} 1529void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
1535void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} 1530void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
1536void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} 1531void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
1537void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} 1532void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
1538void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} 1533void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
1539void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} 1534void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
1540void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} 1535void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
1541void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} 1536void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
1542void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} 1537void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
1543void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} 1538void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);}
1544 1539void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
1545void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} 1540
1546void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} 1541void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
1547void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} 1542void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
1548void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} 1543void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
1549void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} 1544void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
1550void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} 1545void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
1551void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} 1546void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
1552void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} 1547void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
1553void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} 1548void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
1554void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} 1549void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
1555void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} 1550void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
1556void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} 1551void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
1557void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} 1552void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
1558void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} 1553void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
1559void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} 1554void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
1560void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} 1555void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
1561void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} 1556void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
1562void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} 1557void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
1563void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} 1558void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
1564void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} 1559void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
1565void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} 1560void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
1566void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} 1561void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
1567void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} 1562void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
1568void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} 1563void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
1569void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } 1564void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
1570void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} 1565void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
1571void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} 1566void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
1572void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} 1567void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
1573 1568void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
1574void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} 1569
1575 1570void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
1576void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed 1571
1577void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered 1572void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
1578void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered 1573void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
1579void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} 1574void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
1580 1575void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
1581void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} 1576
1582void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} 1577void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
1583void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} 1578void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
1584void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} 1579void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
1585 1580void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
1586void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} 1581
1587void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} 1582void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
1588void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} 1583void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
1589void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} 1584void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
1590 1585void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
1591void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} 1586
1592void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} 1587void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
1593void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} 1588void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
1594void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} 1589void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
1595 1590void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
1596void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} 1591
1597void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} 1592void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
1598void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} 1593void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
1599void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} 1594void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
1600 1595void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
1601void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } 1596
1602void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } 1597void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
1603void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } 1598void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
1604void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } 1599void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
1605 1600void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
1606void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } 1601
1607void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } 1602void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
1608void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } 1603void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
1609void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } 1604void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
1605void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
1610 1606
1611void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} 1607void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
1612void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} 1608void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
1613 1609
1614void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} 1610void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
1615void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} 1611void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
1616 1612
1617void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} 1613void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
1618void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} 1614void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
1619void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} 1615void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
1620void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} 1616void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
1621void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} 1617void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
1622void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} 1618void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
1623 1619
1624void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} 1620void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
1625void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} 1621void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
1626void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} 1622void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
1627void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} 1623void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
1628 1624
1629void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} 1625void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
1630void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} 1626void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
1631void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} 1627void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
1632void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} 1628void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
1633 1629
1634void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} 1630void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
1635 1631
1636void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} 1632void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
1637void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} 1633void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
1638 1634
1639void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only 1635void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
1640 1636
1641// THESE TWO ARE UNTESTED. 1637// THESE TWO ARE UNTESTED.
1642void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} 1638void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
1643void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} 1639void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
1644 1640
1645void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} 1641void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
1646void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} 1642void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
1647 1643
1648void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) 1644void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
1649{ 1645{
1650 if (Common::GetCPUCaps().sse3) 1646 if (Common::GetCPUCaps().sse3)
1651 { 1647 {
@@ -1663,9 +1659,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
1663//There are a few more left 1659//There are a few more left
1664 1660
1665// Also some integer instructions are missing 1661// Also some integer instructions are missing
1666void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} 1662void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
1667void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} 1663void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
1668void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} 1664void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
1669 1665
1670void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} 1666void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
1671void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} 1667void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
@@ -1690,7 +1686,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift)
1690 Write8(shift); 1686 Write8(shift);
1691} 1687}
1692 1688
1693void XEmitter::PSRLQ(X64Reg reg, OpArg arg) 1689void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg)
1694{ 1690{
1695 WriteSSEOp(0x66, 0xd3, reg, arg); 1691 WriteSSEOp(0x66, 0xd3, reg, arg);
1696} 1692}
@@ -1735,212 +1731,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift)
1735 Write8(shift); 1731 Write8(shift);
1736} 1732}
1737 1733
1738void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) 1734void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
1739{ 1735{
1740 if (!Common::GetCPUCaps().ssse3) 1736 if (!Common::GetCPUCaps().ssse3)
1741 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); 1737 ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
1742 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); 1738 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1743} 1739}
1744 1740
1745void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) 1741void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
1746{ 1742{
1747 if (!Common::GetCPUCaps().sse4_1) 1743 if (!Common::GetCPUCaps().sse4_1)
1748 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); 1744 ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
1749 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); 1745 WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
1750} 1746}
1751 1747
1752void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} 1748void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
1753void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} 1749void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
1754void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} 1750void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
1755void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} 1751void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
1756 1752
1757void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} 1753void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
1758void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} 1754void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
1759void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} 1755void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
1760void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} 1756void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
1761void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} 1757void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
1762void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} 1758void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
1763void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} 1759void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
1764void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} 1760void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
1765 1761
1766void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} 1762void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
1767void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} 1763void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
1768void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} 1764void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
1769void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} 1765void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
1770void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} 1766void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
1771void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} 1767void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
1772void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} 1768void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
1773void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} 1769void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
1774void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} 1770void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
1775void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} 1771void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
1776void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} 1772void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
1777void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} 1773void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
1778 1774
1779void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} 1775void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
1780void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} 1776void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
1781void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} 1777void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
1782void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } 1778void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
1783void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } 1779void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
1784 1780
1785void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} 1781void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
1786void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} 1782void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
1787void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} 1783void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
1788void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} 1784void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
1789 1785
1790void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} 1786void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
1791void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} 1787void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
1792void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} 1788void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
1793void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} 1789void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
1794 1790
1795void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} 1791void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
1796void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} 1792void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
1797void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} 1793void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
1798void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} 1794void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
1799 1795
1800void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} 1796void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
1801void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);} 1797void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
1802void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} 1798void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
1803void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} 1799void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
1804 1800
1805void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} 1801void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
1806void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} 1802void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
1807void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} 1803void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
1808void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} 1804void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
1809 1805
1810void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} 1806void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
1811void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} 1807void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
1812void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} 1808void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
1813void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} 1809void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
1814 1810
1815void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} 1811void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
1816void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} 1812void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
1817 1813
1818void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);} 1814void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
1819void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);} 1815void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
1820void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);} 1816void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
1821 1817
1822void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);} 1818void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
1823void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);} 1819void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
1824void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);} 1820void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
1825 1821
1826void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} 1822void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
1827void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} 1823void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
1828 1824
1829void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } 1825void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
1830void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} 1826void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
1831 1827
1832void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } 1828void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
1833void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } 1829void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
1834void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } 1830void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
1835void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } 1831void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
1836 1832
1837void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } 1833void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
1838void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} 1834void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
1839void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} 1835void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
1840void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} 1836void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
1841 1837
1842// VEX 1838// VEX
1843void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} 1839void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
1844void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} 1840void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
1845void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} 1841void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
1846void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} 1842void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
1847void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} 1843void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
1848void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} 1844void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
1849void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} 1845void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
1850void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} 1846void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
1851void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} 1847void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
1852void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} 1848void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
1853void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} 1849void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
1854void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} 1850void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
1855 1851
1856void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } 1852void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
1857void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } 1853void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
1858void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } 1854void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
1859void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } 1855void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
1860void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } 1856void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
1861void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } 1857void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
1862void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } 1858void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
1863void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } 1859void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
1864 1860
1865void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } 1861void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
1866void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } 1862void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
1867void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } 1863void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
1868void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } 1864void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
1869 1865
1870void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } 1866void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
1871void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } 1867void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
1872void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } 1868void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
1873void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } 1869void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
1874void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } 1870void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
1875void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } 1871void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
1876void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } 1872void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
1877void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } 1873void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
1878void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } 1874void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
1879void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } 1875void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
1880void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } 1876void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
1881void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } 1877void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
1882void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } 1878void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
1883void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } 1879void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
1884void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } 1880void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
1885void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } 1881void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
1886void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } 1882void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
1887void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } 1883void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
1888void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } 1884void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
1889void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } 1885void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
1890void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } 1886void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
1891void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } 1887void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
1892void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } 1888void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
1893void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } 1889void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
1894void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } 1890void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
1895void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } 1891void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
1896void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } 1892void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
1897void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } 1893void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
1898void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } 1894void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
1899void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } 1895void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
1900void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } 1896void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
1901void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } 1897void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
1902void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } 1898void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
1903void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } 1899void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
1904void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } 1900void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
1905void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } 1901void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
1906void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } 1902void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
1907void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } 1903void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
1908void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } 1904void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
1909void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } 1905void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
1910void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } 1906void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
1911void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } 1907void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
1912void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } 1908void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
1913void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } 1909void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
1914void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } 1910void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
1915void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } 1911void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
1916void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } 1912void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
1917void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } 1913void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
1918void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } 1914void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
1919void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } 1915void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
1920void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } 1916void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
1921void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } 1917void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
1922void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } 1918void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
1923void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } 1919void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
1924void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } 1920void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
1925void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } 1921void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
1926void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } 1922void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
1927void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } 1923void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
1928void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } 1924void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
1929void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } 1925void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
1930 1926
1931void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} 1927void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
1932void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} 1928void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
1933void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} 1929void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
1934void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} 1930void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
1935void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} 1931void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
1936void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} 1932void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
1937void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} 1933void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
1938void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} 1934void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
1939void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} 1935void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
1940void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} 1936void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
1941void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} 1937void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
1942void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} 1938void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
1943void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} 1939void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
1944 1940
1945// Prefixes 1941// Prefixes
1946 1942
@@ -1956,7 +1952,7 @@ void XEmitter::FWAIT()
1956} 1952}
1957 1953
1958// TODO: make this more generic 1954// TODO: make this more generic
1959void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) 1955void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg)
1960{ 1956{
1961 int mf = 0; 1957 int mf = 0;
1962 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); 1958 ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
@@ -1974,9 +1970,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a
1974 arg.WriteRest(this, 0, (X64Reg) op); 1970 arg.WriteRest(this, 0, (X64Reg) op);
1975} 1971}
1976 1972
1977void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} 1973void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
1978void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} 1974void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
1979void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} 1975void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
1980void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } 1976void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
1981 1977
1982void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } 1978void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index e9c924126..a49cd2cf1 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -328,8 +328,6 @@ enum SSECompare
328 ORD, 328 ORD,
329}; 329};
330 330
331typedef const u8* JumpTarget;
332
333class XEmitter 331class XEmitter
334{ 332{
335 friend struct OpArg; // for Write8 etc 333 friend struct OpArg; // for Write8 etc
@@ -344,27 +342,27 @@ private:
344 void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); 342 void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
345 void WriteMulDivType(int bits, OpArg src, int ext); 343 void WriteMulDivType(int bits, OpArg src, int ext);
346 void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); 344 void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
347 void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); 345 void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
348 void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); 346 void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
349 void WriteMXCSR(OpArg arg, int ext); 347 void WriteMXCSR(OpArg arg, int ext);
350 void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); 348 void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
351 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); 349 void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
352 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); 350 void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
353 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); 351 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
354 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); 352 void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
355 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); 353 void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
356 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); 354 void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
357 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); 355 void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
358 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); 356 void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
359 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); 357 void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
360 358
361 void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); 359 void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
362 360
363protected: 361protected:
364 inline void Write8(u8 value) {*code++ = value;} 362 void Write8(u8 value);
365 inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} 363 void Write16(u16 value);
366 inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} 364 void Write32(u32 value);
367 inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} 365 void Write64(u64 value);
368 366
369public: 367public:
370 XEmitter() { code = nullptr; flags_locked = false; } 368 XEmitter() { code = nullptr; flags_locked = false; }
@@ -413,8 +411,8 @@ public:
413 // Stack control 411 // Stack control
414 void PUSH(X64Reg reg); 412 void PUSH(X64Reg reg);
415 void POP(X64Reg reg); 413 void POP(X64Reg reg);
416 void PUSH(int bits, const OpArg &reg); 414 void PUSH(int bits, const OpArg& reg);
417 void POP(int bits, const OpArg &reg); 415 void POP(int bits, const OpArg& reg);
418 void PUSHF(); 416 void PUSHF();
419 void POPF(); 417 void POPF();
420 418
@@ -424,21 +422,19 @@ public:
424 void UD2(); 422 void UD2();
425 FixupBranch J(bool force5bytes = false); 423 FixupBranch J(bool force5bytes = false);
426 424
427 void JMP(const u8 * addr, bool force5Bytes = false); 425 void JMP(const u8* addr, bool force5Bytes = false);
428 void JMP(OpArg arg); 426 void JMPptr(const OpArg& arg);
429 void JMPptr(const OpArg &arg);
430 void JMPself(); //infinite loop! 427 void JMPself(); //infinite loop!
431#ifdef CALL 428#ifdef CALL
432#undef CALL 429#undef CALL
433#endif 430#endif
434 void CALL(const void *fnptr); 431 void CALL(const void* fnptr);
435 void CALLptr(OpArg arg); 432 void CALLptr(OpArg arg);
436 433
437 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); 434 FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
438 //void J_CC(CCFlags conditionCode, JumpTarget target); 435 void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
439 void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
440 436
441 void SetJumpTarget(const FixupBranch &branch); 437 void SetJumpTarget(const FixupBranch& branch);
442 438
443 void SETcc(CCFlags flag, OpArg dest); 439 void SETcc(CCFlags flag, OpArg dest);
444 // Note: CMOV brings small if any benefit on current cpus. 440 // Note: CMOV brings small if any benefit on current cpus.
@@ -450,8 +446,8 @@ public:
450 void SFENCE(); 446 void SFENCE();
451 447
452 // Bit scan 448 // Bit scan
453 void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit 449 void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
454 void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit 450 void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
455 451
456 // Cache control 452 // Cache control
457 enum PrefetchLevel 453 enum PrefetchLevel
@@ -462,67 +458,67 @@ public:
462 PF_T2, //Levels 3+ (aliased to T0 on AMD) 458 PF_T2, //Levels 3+ (aliased to T0 on AMD)
463 }; 459 };
464 void PREFETCH(PrefetchLevel level, OpArg arg); 460 void PREFETCH(PrefetchLevel level, OpArg arg);
465 void MOVNTI(int bits, OpArg dest, X64Reg src); 461 void MOVNTI(int bits, const OpArg& dest, X64Reg src);
466 void MOVNTDQ(OpArg arg, X64Reg regOp); 462 void MOVNTDQ(const OpArg& arg, X64Reg regOp);
467 void MOVNTPS(OpArg arg, X64Reg regOp); 463 void MOVNTPS(const OpArg& arg, X64Reg regOp);
468 void MOVNTPD(OpArg arg, X64Reg regOp); 464 void MOVNTPD(const OpArg& arg, X64Reg regOp);
469 465
470 // Multiplication / division 466 // Multiplication / division
471 void MUL(int bits, OpArg src); //UNSIGNED 467 void MUL(int bits, const OpArg& src); //UNSIGNED
472 void IMUL(int bits, OpArg src); //SIGNED 468 void IMUL(int bits, const OpArg& src); //SIGNED
473 void IMUL(int bits, X64Reg regOp, OpArg src); 469 void IMUL(int bits, X64Reg regOp, const OpArg& src);
474 void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); 470 void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
475 void DIV(int bits, OpArg src); 471 void DIV(int bits, const OpArg& src);
476 void IDIV(int bits, OpArg src); 472 void IDIV(int bits, const OpArg& src);
477 473
478 // Shift 474 // Shift
479 void ROL(int bits, OpArg dest, OpArg shift); 475 void ROL(int bits, const OpArg& dest, const OpArg& shift);
480 void ROR(int bits, OpArg dest, OpArg shift); 476 void ROR(int bits, const OpArg& dest, const OpArg& shift);
481 void RCL(int bits, OpArg dest, OpArg shift); 477 void RCL(int bits, const OpArg& dest, const OpArg& shift);
482 void RCR(int bits, OpArg dest, OpArg shift); 478 void RCR(int bits, const OpArg& dest, const OpArg& shift);
483 void SHL(int bits, OpArg dest, OpArg shift); 479 void SHL(int bits, const OpArg& dest, const OpArg& shift);
484 void SHR(int bits, OpArg dest, OpArg shift); 480 void SHR(int bits, const OpArg& dest, const OpArg& shift);
485 void SAR(int bits, OpArg dest, OpArg shift); 481 void SAR(int bits, const OpArg& dest, const OpArg& shift);
486 482
487 // Bit Test 483 // Bit Test
488 void BT(int bits, OpArg dest, OpArg index); 484 void BT(int bits, const OpArg& dest, const OpArg& index);
489 void BTS(int bits, OpArg dest, OpArg index); 485 void BTS(int bits, const OpArg& dest, const OpArg& index);
490 void BTR(int bits, OpArg dest, OpArg index); 486 void BTR(int bits, const OpArg& dest, const OpArg& index);
491 void BTC(int bits, OpArg dest, OpArg index); 487 void BTC(int bits, const OpArg& dest, const OpArg& index);
492 488
493 // Double-Precision Shift 489 // Double-Precision Shift
494 void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); 490 void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
495 void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); 491 void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
496 492
497 // Extend EAX into EDX in various ways 493 // Extend EAX into EDX in various ways
498 void CWD(int bits = 16); 494 void CWD(int bits = 16);
499 inline void CDQ() {CWD(32);} 495 void CDQ() {CWD(32);}
500 inline void CQO() {CWD(64);} 496 void CQO() {CWD(64);}
501 void CBW(int bits = 8); 497 void CBW(int bits = 8);
502 inline void CWDE() {CBW(16);} 498 void CWDE() {CBW(16);}
503 inline void CDQE() {CBW(32);} 499 void CDQE() {CBW(32);}
504 500
505 // Load effective address 501 // Load effective address
506 void LEA(int bits, X64Reg dest, OpArg src); 502 void LEA(int bits, X64Reg dest, OpArg src);
507 503
508 // Integer arithmetic 504 // Integer arithmetic
509 void NEG (int bits, OpArg src); 505 void NEG(int bits, const OpArg& src);
510 void ADD (int bits, const OpArg &a1, const OpArg &a2); 506 void ADD(int bits, const OpArg& a1, const OpArg& a2);
511 void ADC (int bits, const OpArg &a1, const OpArg &a2); 507 void ADC(int bits, const OpArg& a1, const OpArg& a2);
512 void SUB (int bits, const OpArg &a1, const OpArg &a2); 508 void SUB(int bits, const OpArg& a1, const OpArg& a2);
513 void SBB (int bits, const OpArg &a1, const OpArg &a2); 509 void SBB(int bits, const OpArg& a1, const OpArg& a2);
514 void AND (int bits, const OpArg &a1, const OpArg &a2); 510 void AND(int bits, const OpArg& a1, const OpArg& a2);
515 void CMP (int bits, const OpArg &a1, const OpArg &a2); 511 void CMP(int bits, const OpArg& a1, const OpArg& a2);
516 512
517 // Bit operations 513 // Bit operations
518 void NOT (int bits, OpArg src); 514 void NOT (int bits, const OpArg& src);
519 void OR (int bits, const OpArg &a1, const OpArg &a2); 515 void OR(int bits, const OpArg& a1, const OpArg& a2);
520 void XOR (int bits, const OpArg &a1, const OpArg &a2); 516 void XOR(int bits, const OpArg& a1, const OpArg& a2);
521 void MOV (int bits, const OpArg &a1, const OpArg &a2); 517 void MOV(int bits, const OpArg& a1, const OpArg& a2);
522 void TEST(int bits, const OpArg &a1, const OpArg &a2); 518 void TEST(int bits, const OpArg& a1, const OpArg& a2);
523 519
524 // Are these useful at all? Consider removing. 520 // Are these useful at all? Consider removing.
525 void XCHG(int bits, const OpArg &a1, const OpArg &a2); 521 void XCHG(int bits, const OpArg& a1, const OpArg& a2);
526 void XCHG_AHAL(); 522 void XCHG_AHAL();
527 523
528 // Byte swapping (32 and 64-bit only). 524 // Byte swapping (32 and 64-bit only).
@@ -536,13 +532,13 @@ public:
536 void MOVBE(int dbits, const OpArg& dest, const OpArg& src); 532 void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
537 533
538 // Available only on AMD >= Phenom or Intel >= Haswell 534 // Available only on AMD >= Phenom or Intel >= Haswell
539 void LZCNT(int bits, X64Reg dest, OpArg src); 535 void LZCNT(int bits, X64Reg dest, const OpArg& src);
540 // Note: this one is actually part of BMI1 536 // Note: this one is actually part of BMI1
541 void TZCNT(int bits, X64Reg dest, OpArg src); 537 void TZCNT(int bits, X64Reg dest, const OpArg& src);
542 538
543 // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) 539 // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
544 void STMXCSR(OpArg memloc); 540 void STMXCSR(const OpArg& memloc);
545 void LDMXCSR(OpArg memloc); 541 void LDMXCSR(const OpArg& memloc);
546 542
547 // Prefixes 543 // Prefixes
548 void LOCK(); 544 void LOCK();
@@ -569,259 +565,243 @@ public:
569 x87_FPUBusy = 0x8000, 565 x87_FPUBusy = 0x8000,
570 }; 566 };
571 567
572 void FLD(int bits, OpArg src); 568 void FLD(int bits, const OpArg& src);
573 void FST(int bits, OpArg dest); 569 void FST(int bits, const OpArg& dest);
574 void FSTP(int bits, OpArg dest); 570 void FSTP(int bits, const OpArg& dest);
575 void FNSTSW_AX(); 571 void FNSTSW_AX();
576 void FWAIT(); 572 void FWAIT();
577 573
578 // SSE/SSE2: Floating point arithmetic 574 // SSE/SSE2: Floating point arithmetic
579 void ADDSS(X64Reg regOp, OpArg arg); 575 void ADDSS(X64Reg regOp, const OpArg& arg);
580 void ADDSD(X64Reg regOp, OpArg arg); 576 void ADDSD(X64Reg regOp, const OpArg& arg);
581 void SUBSS(X64Reg regOp, OpArg arg); 577 void SUBSS(X64Reg regOp, const OpArg& arg);
582 void SUBSD(X64Reg regOp, OpArg arg); 578 void SUBSD(X64Reg regOp, const OpArg& arg);
583 void MULSS(X64Reg regOp, OpArg arg); 579 void MULSS(X64Reg regOp, const OpArg& arg);
584 void MULSD(X64Reg regOp, OpArg arg); 580 void MULSD(X64Reg regOp, const OpArg& arg);
585 void DIVSS(X64Reg regOp, OpArg arg); 581 void DIVSS(X64Reg regOp, const OpArg& arg);
586 void DIVSD(X64Reg regOp, OpArg arg); 582 void DIVSD(X64Reg regOp, const OpArg& arg);
587 void MINSS(X64Reg regOp, OpArg arg); 583 void MINSS(X64Reg regOp, const OpArg& arg);
588 void MINSD(X64Reg regOp, OpArg arg); 584 void MINSD(X64Reg regOp, const OpArg& arg);
589 void MAXSS(X64Reg regOp, OpArg arg); 585 void MAXSS(X64Reg regOp, const OpArg& arg);
590 void MAXSD(X64Reg regOp, OpArg arg); 586 void MAXSD(X64Reg regOp, const OpArg& arg);
591 void SQRTSS(X64Reg regOp, OpArg arg); 587 void SQRTSS(X64Reg regOp, const OpArg& arg);
592 void SQRTSD(X64Reg regOp, OpArg arg); 588 void SQRTSD(X64Reg regOp, const OpArg& arg);
593 void RSQRTSS(X64Reg regOp, OpArg arg); 589 void RCPSS(X64Reg regOp, const OpArg& arg);
590 void RSQRTSS(X64Reg regOp, const OpArg& arg);
594 591
595 // SSE/SSE2: Floating point bitwise (yes) 592 // SSE/SSE2: Floating point bitwise (yes)
596 void CMPSS(X64Reg regOp, OpArg arg, u8 compare); 593 void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
597 void CMPSD(X64Reg regOp, OpArg arg, u8 compare); 594 void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
598 595
599 inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } 596 void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
600 inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } 597 void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
601 inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } 598 void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
602 inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } 599 void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
603 inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } 600 void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
604 inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } 601 void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
605 inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } 602 void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
606 603
607 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) 604 // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
608 void ADDPS(X64Reg regOp, OpArg arg); 605 void ADDPS(X64Reg regOp, const OpArg& arg);
609 void ADDPD(X64Reg regOp, OpArg arg); 606 void ADDPD(X64Reg regOp, const OpArg& arg);
610 void SUBPS(X64Reg regOp, OpArg arg); 607 void SUBPS(X64Reg regOp, const OpArg& arg);
611 void SUBPD(X64Reg regOp, OpArg arg); 608 void SUBPD(X64Reg regOp, const OpArg& arg);
612 void CMPPS(X64Reg regOp, OpArg arg, u8 compare); 609 void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
613 void CMPPD(X64Reg regOp, OpArg arg, u8 compare); 610 void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
614 void MULPS(X64Reg regOp, OpArg arg); 611 void MULPS(X64Reg regOp, const OpArg& arg);
615 void MULPD(X64Reg regOp, OpArg arg); 612 void MULPD(X64Reg regOp, const OpArg& arg);
616 void DIVPS(X64Reg regOp, OpArg arg); 613 void DIVPS(X64Reg regOp, const OpArg& arg);
617 void DIVPD(X64Reg regOp, OpArg arg); 614 void DIVPD(X64Reg regOp, const OpArg& arg);
618 void MINPS(X64Reg regOp, OpArg arg); 615 void MINPS(X64Reg regOp, const OpArg& arg);
619 void MINPD(X64Reg regOp, OpArg arg); 616 void MINPD(X64Reg regOp, const OpArg& arg);
620 void MAXPS(X64Reg regOp, OpArg arg); 617 void MAXPS(X64Reg regOp, const OpArg& arg);
621 void MAXPD(X64Reg regOp, OpArg arg); 618 void MAXPD(X64Reg regOp, const OpArg& arg);
622 void SQRTPS(X64Reg regOp, OpArg arg); 619 void SQRTPS(X64Reg regOp, const OpArg& arg);
623 void SQRTPD(X64Reg regOp, OpArg arg); 620 void SQRTPD(X64Reg regOp, const OpArg& arg);
624 void RCPPS(X64Reg regOp, OpArg arg); 621 void RCPPS(X64Reg regOp, const OpArg& arg);
625 void RSQRTPS(X64Reg regOp, OpArg arg); 622 void RSQRTPS(X64Reg regOp, const OpArg& arg);
626 623
627 // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) 624 // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
628 void ANDPS(X64Reg regOp, OpArg arg); 625 void ANDPS(X64Reg regOp, const OpArg& arg);
629 void ANDPD(X64Reg regOp, OpArg arg); 626 void ANDPD(X64Reg regOp, const OpArg& arg);
630 void ANDNPS(X64Reg regOp, OpArg arg); 627 void ANDNPS(X64Reg regOp, const OpArg& arg);
631 void ANDNPD(X64Reg regOp, OpArg arg); 628 void ANDNPD(X64Reg regOp, const OpArg& arg);
632 void ORPS(X64Reg regOp, OpArg arg); 629 void ORPS(X64Reg regOp, const OpArg& arg);
633 void ORPD(X64Reg regOp, OpArg arg); 630 void ORPD(X64Reg regOp, const OpArg& arg);
634 void XORPS(X64Reg regOp, OpArg arg); 631 void XORPS(X64Reg regOp, const OpArg& arg);
635 void XORPD(X64Reg regOp, OpArg arg); 632 void XORPD(X64Reg regOp, const OpArg& arg);
636 633
637 // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. 634 // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
638 void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); 635 void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
639 void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); 636 void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
640 637
641 // SSE/SSE2: Useful alternative to shuffle in some cases. 638 // SSE/SSE2: Useful alternative to shuffle in some cases.
642 void MOVDDUP(X64Reg regOp, OpArg arg); 639 void MOVDDUP(X64Reg regOp, const OpArg& arg);
643
644 // TODO: Actually implement
645#if 0
646 // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
647 void ADDSUBPS(X64Reg dest, OpArg src);
648 void ADDSUBPD(X64Reg dest, OpArg src);
649 void HADDPD(X64Reg dest, OpArg src);
650 void HSUBPS(X64Reg dest, OpArg src);
651 void HSUBPD(X64Reg dest, OpArg src);
652
653 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
654 void DPPD(X64Reg dest, OpArg src, u8 arg);
655
656 // These are probably useful for VFPU emulation.
657 void INSERTPS(X64Reg dest, OpArg src, u8 arg);
658 void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
659#endif
660 640
661 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. 641 // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
662 void HADDPS(X64Reg dest, OpArg src); 642 void HADDPS(X64Reg dest, const OpArg& src);
663 643
664 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". 644 // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
665 void DPPS(X64Reg dest, OpArg src, u8 arg); 645 void DPPS(X64Reg dest, const OpArg& src, u8 arg);
666 646
667 void UNPCKLPS(X64Reg dest, OpArg src); 647 void UNPCKLPS(X64Reg dest, const OpArg& src);
668 void UNPCKHPS(X64Reg dest, OpArg src); 648 void UNPCKHPS(X64Reg dest, const OpArg& src);
669 void UNPCKLPD(X64Reg dest, OpArg src); 649 void UNPCKLPD(X64Reg dest, const OpArg& src);
670 void UNPCKHPD(X64Reg dest, OpArg src); 650 void UNPCKHPD(X64Reg dest, const OpArg& src);
671 651
672 // SSE/SSE2: Compares. 652 // SSE/SSE2: Compares.
673 void COMISS(X64Reg regOp, OpArg arg); 653 void COMISS(X64Reg regOp, const OpArg& arg);
674 void COMISD(X64Reg regOp, OpArg arg); 654 void COMISD(X64Reg regOp, const OpArg& arg);
675 void UCOMISS(X64Reg regOp, OpArg arg); 655 void UCOMISS(X64Reg regOp, const OpArg& arg);
676 void UCOMISD(X64Reg regOp, OpArg arg); 656 void UCOMISD(X64Reg regOp, const OpArg& arg);
677 657
678 // SSE/SSE2: Moves. Use the right data type for your data, in most cases. 658 // SSE/SSE2: Moves. Use the right data type for your data, in most cases.
679 void MOVAPS(X64Reg regOp, OpArg arg); 659 void MOVAPS(X64Reg regOp, const OpArg& arg);
680 void MOVAPD(X64Reg regOp, OpArg arg); 660 void MOVAPD(X64Reg regOp, const OpArg& arg);
681 void MOVAPS(OpArg arg, X64Reg regOp); 661 void MOVAPS(const OpArg& arg, X64Reg regOp);
682 void MOVAPD(OpArg arg, X64Reg regOp); 662 void MOVAPD(const OpArg& arg, X64Reg regOp);
683 663
684 void MOVUPS(X64Reg regOp, OpArg arg); 664 void MOVUPS(X64Reg regOp, const OpArg& arg);
685 void MOVUPD(X64Reg regOp, OpArg arg); 665 void MOVUPD(X64Reg regOp, const OpArg& arg);
686 void MOVUPS(OpArg arg, X64Reg regOp); 666 void MOVUPS(const OpArg& arg, X64Reg regOp);
687 void MOVUPD(OpArg arg, X64Reg regOp); 667 void MOVUPD(const OpArg& arg, X64Reg regOp);
688 668
689 void MOVDQA(X64Reg regOp, OpArg arg); 669 void MOVDQA(X64Reg regOp, const OpArg& arg);
690 void MOVDQA(OpArg arg, X64Reg regOp); 670 void MOVDQA(const OpArg& arg, X64Reg regOp);
691 void MOVDQU(X64Reg regOp, OpArg arg); 671 void MOVDQU(X64Reg regOp, const OpArg& arg);
692 void MOVDQU(OpArg arg, X64Reg regOp); 672 void MOVDQU(const OpArg& arg, X64Reg regOp);
693 673
694 void MOVSS(X64Reg regOp, OpArg arg); 674 void MOVSS(X64Reg regOp, const OpArg& arg);
695 void MOVSD(X64Reg regOp, OpArg arg); 675 void MOVSD(X64Reg regOp, const OpArg& arg);
696 void MOVSS(OpArg arg, X64Reg regOp); 676 void MOVSS(const OpArg& arg, X64Reg regOp);
697 void MOVSD(OpArg arg, X64Reg regOp); 677 void MOVSD(const OpArg& arg, X64Reg regOp);
698 678
699 void MOVLPS(X64Reg regOp, OpArg arg); 679 void MOVLPS(X64Reg regOp, const OpArg& arg);
700 void MOVLPD(X64Reg regOp, OpArg arg); 680 void MOVLPD(X64Reg regOp, const OpArg& arg);
701 void MOVLPS(OpArg arg, X64Reg regOp); 681 void MOVLPS(const OpArg& arg, X64Reg regOp);
702 void MOVLPD(OpArg arg, X64Reg regOp); 682 void MOVLPD(const OpArg& arg, X64Reg regOp);
703 683
704 void MOVHPS(X64Reg regOp, OpArg arg); 684 void MOVHPS(X64Reg regOp, const OpArg& arg);
705 void MOVHPD(X64Reg regOp, OpArg arg); 685 void MOVHPD(X64Reg regOp, const OpArg& arg);
706 void MOVHPS(OpArg arg, X64Reg regOp); 686 void MOVHPS(const OpArg& arg, X64Reg regOp);
707 void MOVHPD(OpArg arg, X64Reg regOp); 687 void MOVHPD(const OpArg& arg, X64Reg regOp);
708 688
709 void MOVHLPS(X64Reg regOp1, X64Reg regOp2); 689 void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
710 void MOVLHPS(X64Reg regOp1, X64Reg regOp2); 690 void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
711 691
712 void MOVD_xmm(X64Reg dest, const OpArg &arg); 692 void MOVD_xmm(X64Reg dest, const OpArg& arg);
713 void MOVQ_xmm(X64Reg dest, OpArg arg); 693 void MOVQ_xmm(X64Reg dest, OpArg arg);
714 void MOVD_xmm(const OpArg &arg, X64Reg src); 694 void MOVD_xmm(const OpArg& arg, X64Reg src);
715 void MOVQ_xmm(OpArg arg, X64Reg src); 695 void MOVQ_xmm(OpArg arg, X64Reg src);
716 696
717 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. 697 // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
718 void MOVMSKPS(X64Reg dest, OpArg arg); 698 void MOVMSKPS(X64Reg dest, const OpArg& arg);
719 void MOVMSKPD(X64Reg dest, OpArg arg); 699 void MOVMSKPD(X64Reg dest, const OpArg& arg);
720 700
721 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. 701 // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
722 void MASKMOVDQU(X64Reg dest, X64Reg src); 702 void MASKMOVDQU(X64Reg dest, X64Reg src);
723 void LDDQU(X64Reg dest, OpArg src); 703 void LDDQU(X64Reg dest, const OpArg& src);
724 704
725 // SSE/SSE2: Data type conversions. 705 // SSE/SSE2: Data type conversions.
726 void CVTPS2PD(X64Reg dest, OpArg src); 706 void CVTPS2PD(X64Reg dest, const OpArg& src);
727 void CVTPD2PS(X64Reg dest, OpArg src); 707 void CVTPD2PS(X64Reg dest, const OpArg& src);
728 void CVTSS2SD(X64Reg dest, OpArg src); 708 void CVTSS2SD(X64Reg dest, const OpArg& src);
729 void CVTSI2SS(X64Reg dest, OpArg src); 709 void CVTSI2SS(X64Reg dest, const OpArg& src);
730 void CVTSD2SS(X64Reg dest, OpArg src); 710 void CVTSD2SS(X64Reg dest, const OpArg& src);
731 void CVTSI2SD(X64Reg dest, OpArg src); 711 void CVTSI2SD(X64Reg dest, const OpArg& src);
732 void CVTDQ2PD(X64Reg regOp, OpArg arg); 712 void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
733 void CVTPD2DQ(X64Reg regOp, OpArg arg); 713 void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
734 void CVTDQ2PS(X64Reg regOp, OpArg arg); 714 void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
735 void CVTPS2DQ(X64Reg regOp, OpArg arg); 715 void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
736 716
737 void CVTTPS2DQ(X64Reg regOp, OpArg arg); 717 void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
738 void CVTTPD2DQ(X64Reg regOp, OpArg arg); 718 void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
739 719
740 // Destinations are X64 regs (rax, rbx, ...) for these instructions. 720 // Destinations are X64 regs (rax, rbx, ...) for these instructions.
741 void CVTSS2SI(X64Reg xregdest, OpArg src); 721 void CVTSS2SI(X64Reg xregdest, const OpArg& src);
742 void CVTSD2SI(X64Reg xregdest, OpArg src); 722 void CVTSD2SI(X64Reg xregdest, const OpArg& src);
743 void CVTTSS2SI(X64Reg xregdest, OpArg arg); 723 void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
744 void CVTTSD2SI(X64Reg xregdest, OpArg arg); 724 void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
745 725
746 // SSE2: Packed integer instructions 726 // SSE2: Packed integer instructions
747 void PACKSSDW(X64Reg dest, OpArg arg); 727 void PACKSSDW(X64Reg dest, const OpArg& arg);
748 void PACKSSWB(X64Reg dest, OpArg arg); 728 void PACKSSWB(X64Reg dest, const OpArg& arg);
749 void PACKUSDW(X64Reg dest, OpArg arg); 729 void PACKUSDW(X64Reg dest, const OpArg& arg);
750 void PACKUSWB(X64Reg dest, OpArg arg); 730 void PACKUSWB(X64Reg dest, const OpArg& arg);
751 731
752 void PUNPCKLBW(X64Reg dest, const OpArg &arg); 732 void PUNPCKLBW(X64Reg dest, const OpArg &arg);
753 void PUNPCKLWD(X64Reg dest, const OpArg &arg); 733 void PUNPCKLWD(X64Reg dest, const OpArg &arg);
754 void PUNPCKLDQ(X64Reg dest, const OpArg &arg); 734 void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
755 void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); 735 void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
756 736
757 void PTEST(X64Reg dest, OpArg arg); 737 void PTEST(X64Reg dest, const OpArg& arg);
758 void PAND(X64Reg dest, OpArg arg); 738 void PAND(X64Reg dest, const OpArg& arg);
759 void PANDN(X64Reg dest, OpArg arg); 739 void PANDN(X64Reg dest, const OpArg& arg);
760 void PXOR(X64Reg dest, OpArg arg); 740 void PXOR(X64Reg dest, const OpArg& arg);
761 void POR(X64Reg dest, OpArg arg); 741 void POR(X64Reg dest, const OpArg& arg);
762 742
763 void PADDB(X64Reg dest, OpArg arg); 743 void PADDB(X64Reg dest, const OpArg& arg);
764 void PADDW(X64Reg dest, OpArg arg); 744 void PADDW(X64Reg dest, const OpArg& arg);
765 void PADDD(X64Reg dest, OpArg arg); 745 void PADDD(X64Reg dest, const OpArg& arg);
766 void PADDQ(X64Reg dest, OpArg arg); 746 void PADDQ(X64Reg dest, const OpArg& arg);
767 747
768 void PADDSB(X64Reg dest, OpArg arg); 748 void PADDSB(X64Reg dest, const OpArg& arg);
769 void PADDSW(X64Reg dest, OpArg arg); 749 void PADDSW(X64Reg dest, const OpArg& arg);
770 void PADDUSB(X64Reg dest, OpArg arg); 750 void PADDUSB(X64Reg dest, const OpArg& arg);
771 void PADDUSW(X64Reg dest, OpArg arg); 751 void PADDUSW(X64Reg dest, const OpArg& arg);
772 752
773 void PSUBB(X64Reg dest, OpArg arg); 753 void PSUBB(X64Reg dest, const OpArg& arg);
774 void PSUBW(X64Reg dest, OpArg arg); 754 void PSUBW(X64Reg dest, const OpArg& arg);
775 void PSUBD(X64Reg dest, OpArg arg); 755 void PSUBD(X64Reg dest, const OpArg& arg);
776 void PSUBQ(X64Reg dest, OpArg arg); 756 void PSUBQ(X64Reg dest, const OpArg& arg);
777 757
778 void PSUBSB(X64Reg dest, OpArg arg); 758 void PSUBSB(X64Reg dest, const OpArg& arg);
779 void PSUBSW(X64Reg dest, OpArg arg); 759 void PSUBSW(X64Reg dest, const OpArg& arg);
780 void PSUBUSB(X64Reg dest, OpArg arg); 760 void PSUBUSB(X64Reg dest, const OpArg& arg);
781 void PSUBUSW(X64Reg dest, OpArg arg); 761 void PSUBUSW(X64Reg dest, const OpArg& arg);
782 762
783 void PAVGB(X64Reg dest, OpArg arg); 763 void PAVGB(X64Reg dest, const OpArg& arg);
784 void PAVGW(X64Reg dest, OpArg arg); 764 void PAVGW(X64Reg dest, const OpArg& arg);
785 765
786 void PCMPEQB(X64Reg dest, OpArg arg); 766 void PCMPEQB(X64Reg dest, const OpArg& arg);
787 void PCMPEQW(X64Reg dest, OpArg arg); 767 void PCMPEQW(X64Reg dest, const OpArg& arg);
788 void PCMPEQD(X64Reg dest, OpArg arg); 768 void PCMPEQD(X64Reg dest, const OpArg& arg);
789 769
790 void PCMPGTB(X64Reg dest, OpArg arg); 770 void PCMPGTB(X64Reg dest, const OpArg& arg);
791 void PCMPGTW(X64Reg dest, OpArg arg); 771 void PCMPGTW(X64Reg dest, const OpArg& arg);
792 void PCMPGTD(X64Reg dest, OpArg arg); 772 void PCMPGTD(X64Reg dest, const OpArg& arg);
793 773
794 void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); 774 void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
795 void PINSRW(X64Reg dest, OpArg arg, u8 subreg); 775 void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
796 776
797 void PMADDWD(X64Reg dest, OpArg arg); 777 void PMADDWD(X64Reg dest, const OpArg& arg);
798 void PSADBW(X64Reg dest, OpArg arg); 778 void PSADBW(X64Reg dest, const OpArg& arg);
799 779
800 void PMAXSW(X64Reg dest, OpArg arg); 780 void PMAXSW(X64Reg dest, const OpArg& arg);
801 void PMAXUB(X64Reg dest, OpArg arg); 781 void PMAXUB(X64Reg dest, const OpArg& arg);
802 void PMINSW(X64Reg dest, OpArg arg); 782 void PMINSW(X64Reg dest, const OpArg& arg);
803 void PMINUB(X64Reg dest, OpArg arg); 783 void PMINUB(X64Reg dest, const OpArg& arg);
804 // SSE4: More MAX/MIN instructions. 784 // SSE4: More MAX/MIN instructions.
805 void PMINSB(X64Reg dest, OpArg arg); 785 void PMINSB(X64Reg dest, const OpArg& arg);
806 void PMINSD(X64Reg dest, OpArg arg); 786 void PMINSD(X64Reg dest, const OpArg& arg);
807 void PMINUW(X64Reg dest, OpArg arg); 787 void PMINUW(X64Reg dest, const OpArg& arg);
808 void PMINUD(X64Reg dest, OpArg arg); 788 void PMINUD(X64Reg dest, const OpArg& arg);
809 void PMAXSB(X64Reg dest, OpArg arg); 789 void PMAXSB(X64Reg dest, const OpArg& arg);
810 void PMAXSD(X64Reg dest, OpArg arg); 790 void PMAXSD(X64Reg dest, const OpArg& arg);
811 void PMAXUW(X64Reg dest, OpArg arg); 791 void PMAXUW(X64Reg dest, const OpArg& arg);
812 void PMAXUD(X64Reg dest, OpArg arg); 792 void PMAXUD(X64Reg dest, const OpArg& arg);
813 793
814 void PMOVMSKB(X64Reg dest, OpArg arg); 794 void PMOVMSKB(X64Reg dest, const OpArg& arg);
815 void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); 795 void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
816 void PSHUFB(X64Reg dest, OpArg arg); 796 void PSHUFB(X64Reg dest, const OpArg& arg);
817 797
818 void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); 798 void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
819 void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); 799 void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
820 800
821 void PSRLW(X64Reg reg, int shift); 801 void PSRLW(X64Reg reg, int shift);
822 void PSRLD(X64Reg reg, int shift); 802 void PSRLD(X64Reg reg, int shift);
823 void PSRLQ(X64Reg reg, int shift); 803 void PSRLQ(X64Reg reg, int shift);
824 void PSRLQ(X64Reg reg, OpArg arg); 804 void PSRLQ(X64Reg reg, const OpArg& arg);
825 void PSRLDQ(X64Reg reg, int shift); 805 void PSRLDQ(X64Reg reg, int shift);
826 806
827 void PSLLW(X64Reg reg, int shift); 807 void PSLLW(X64Reg reg, int shift);
@@ -833,198 +813,198 @@ public:
833 void PSRAD(X64Reg reg, int shift); 813 void PSRAD(X64Reg reg, int shift);
834 814
835 // SSE4: data type conversions 815 // SSE4: data type conversions
836 void PMOVSXBW(X64Reg dest, OpArg arg); 816 void PMOVSXBW(X64Reg dest, const OpArg& arg);
837 void PMOVSXBD(X64Reg dest, OpArg arg); 817 void PMOVSXBD(X64Reg dest, const OpArg& arg);
838 void PMOVSXBQ(X64Reg dest, OpArg arg); 818 void PMOVSXBQ(X64Reg dest, const OpArg& arg);
839 void PMOVSXWD(X64Reg dest, OpArg arg); 819 void PMOVSXWD(X64Reg dest, const OpArg& arg);
840 void PMOVSXWQ(X64Reg dest, OpArg arg); 820 void PMOVSXWQ(X64Reg dest, const OpArg& arg);
841 void PMOVSXDQ(X64Reg dest, OpArg arg); 821 void PMOVSXDQ(X64Reg dest, const OpArg& arg);
842 void PMOVZXBW(X64Reg dest, OpArg arg); 822 void PMOVZXBW(X64Reg dest, const OpArg& arg);
843 void PMOVZXBD(X64Reg dest, OpArg arg); 823 void PMOVZXBD(X64Reg dest, const OpArg& arg);
844 void PMOVZXBQ(X64Reg dest, OpArg arg); 824 void PMOVZXBQ(X64Reg dest, const OpArg& arg);
845 void PMOVZXWD(X64Reg dest, OpArg arg); 825 void PMOVZXWD(X64Reg dest, const OpArg& arg);
846 void PMOVZXWQ(X64Reg dest, OpArg arg); 826 void PMOVZXWQ(X64Reg dest, const OpArg& arg);
847 void PMOVZXDQ(X64Reg dest, OpArg arg); 827 void PMOVZXDQ(X64Reg dest, const OpArg& arg);
848 828
849 // SSE4: variable blend instructions (xmm0 implicit argument) 829 // SSE4: variable blend instructions (xmm0 implicit argument)
850 void PBLENDVB(X64Reg dest, OpArg arg); 830 void PBLENDVB(X64Reg dest, const OpArg& arg);
851 void BLENDVPS(X64Reg dest, OpArg arg); 831 void BLENDVPS(X64Reg dest, const OpArg& arg);
852 void BLENDVPD(X64Reg dest, OpArg arg); 832 void BLENDVPD(X64Reg dest, const OpArg& arg);
853 void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); 833 void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
854 void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); 834 void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
855 835
856 // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) 836 // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
857 void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); 837 void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
858 void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); 838 void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
859 void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); 839 void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
860 void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); 840 void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
861 841
862 inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } 842 void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
863 inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } 843 void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
864 inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } 844 void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
865 inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } 845 void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
866 846
867 inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } 847 void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
868 inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } 848 void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
869 inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } 849 void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
870 inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } 850 void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
871 851
872 inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } 852 void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
873 inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } 853 void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
874 inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } 854 void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
875 inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } 855 void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
876 856
877 inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } 857 void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
878 inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } 858 void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
879 inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } 859 void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
880 inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } 860 void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
881 861
882 // AVX 862 // AVX
883 void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 863 void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
884 void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 864 void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
885 void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 865 void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
886 void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 866 void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
887 void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 867 void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
888 void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 868 void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
889 void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 869 void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
890 void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 870 void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
891 void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 871 void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
892 void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); 872 void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
893 void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 873 void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
894 void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 874 void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
895 875
896 void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 876 void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
897 void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 877 void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
898 void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 878 void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
899 void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 879 void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
900 void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 880 void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
901 void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 881 void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
902 void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 882 void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
903 void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 883 void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
904 884
905 void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); 885 void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
906 void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); 886 void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
907 void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); 887 void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
908 void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); 888 void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
909 889
910 // FMA3 890 // FMA3
911 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 891 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
912 void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 892 void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
913 void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 893 void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
914 void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 894 void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
915 void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 895 void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
916 void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 896 void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
917 void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 897 void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
918 void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 898 void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
919 void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 899 void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
920 void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 900 void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
921 void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 901 void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
922 void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 902 void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
923 void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 903 void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
924 void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 904 void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
925 void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 905 void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
926 void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 906 void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
927 void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 907 void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
928 void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 908 void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
929 void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 909 void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
930 void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 910 void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
931 void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 911 void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
932 void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 912 void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
933 void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 913 void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
934 void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 914 void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
935 void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 915 void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
936 void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 916 void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
937 void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 917 void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
938 void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 918 void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
939 void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 919 void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
940 void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 920 void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
941 void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 921 void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
942 void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 922 void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
943 void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 923 void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
944 void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 924 void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
945 void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 925 void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
946 void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 926 void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
947 void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 927 void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
948 void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 928 void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
949 void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 929 void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
950 void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 930 void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
951 void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 931 void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
952 void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 932 void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
953 void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 933 void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
954 void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 934 void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
955 void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 935 void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
956 void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 936 void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
957 void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 937 void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
958 void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 938 void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
959 void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 939 void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
960 void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 940 void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
961 void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 941 void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
962 void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 942 void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
963 void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 943 void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
964 void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 944 void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
965 void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 945 void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
966 void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 946 void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
967 void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); 947 void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
968 void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 948 void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
969 void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 949 void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
970 void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); 950 void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
971 951
972 // VEX GPR instructions 952 // VEX GPR instructions
973 void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); 953 void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
974 void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); 954 void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
975 void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); 955 void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
976 void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); 956 void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
977 void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); 957 void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
978 void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); 958 void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
979 void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); 959 void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
980 void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); 960 void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
981 void BLSR(int bits, X64Reg regOp, OpArg arg); 961 void BLSR(int bits, X64Reg regOp, const OpArg& arg);
982 void BLSMSK(int bits, X64Reg regOp, OpArg arg); 962 void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
983 void BLSI(int bits, X64Reg regOp, OpArg arg); 963 void BLSI(int bits, X64Reg regOp, const OpArg& arg);
984 void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); 964 void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
985 void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); 965 void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
986 966
987 void RDTSC(); 967 void RDTSC();
988 968
989 // Utility functions 969 // Utility functions
990 // The difference between this and CALL is that this aligns the stack 970 // The difference between this and CALL is that this aligns the stack
991 // where appropriate. 971 // where appropriate.
992 void ABI_CallFunction(const void *func); 972 void ABI_CallFunction(const void* func);
993 template <typename T> 973 template <typename T>
994 void ABI_CallFunction(T (*func)()) { 974 void ABI_CallFunction(T (*func)()) {
995 ABI_CallFunction((const void *)func); 975 ABI_CallFunction((const void*)func);
996 } 976 }
997 977
998 void ABI_CallFunction(const u8 *func) { 978 void ABI_CallFunction(const u8* func) {
999 ABI_CallFunction((const void *)func); 979 ABI_CallFunction((const void*)func);
1000 } 980 }
1001 void ABI_CallFunctionC16(const void *func, u16 param1); 981 void ABI_CallFunctionC16(const void* func, u16 param1);
1002 void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); 982 void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
1003 983
1004 984
1005 // These only support u32 parameters, but that's enough for a lot of uses. 985 // These only support u32 parameters, but that's enough for a lot of uses.
1006 // These will destroy the 1 or 2 first "parameter regs". 986 // These will destroy the 1 or 2 first "parameter regs".
1007 void ABI_CallFunctionC(const void *func, u32 param1); 987 void ABI_CallFunctionC(const void* func, u32 param1);
1008 void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); 988 void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
1009 void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); 989 void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
1010 void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); 990 void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
1011 void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); 991 void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
1012 void ABI_CallFunctionP(const void *func, void *param1); 992 void ABI_CallFunctionP(const void* func, void* param1);
1013 void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); 993 void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
1014 void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); 994 void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
1015 void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); 995 void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
1016 void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); 996 void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
1017 void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); 997 void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
1018 void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); 998 void ABI_CallFunctionA(const void* func, const OpArg& arg1);
1019 void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); 999 void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
1020 1000
1021 // Pass a register as a parameter. 1001 // Pass a register as a parameter.
1022 void ABI_CallFunctionR(const void *func, X64Reg reg1); 1002 void ABI_CallFunctionR(const void* func, X64Reg reg1);
1023 void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); 1003 void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
1024 1004
1025 template <typename Tr, typename T1> 1005 template <typename Tr, typename T1>
1026 void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { 1006 void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
1027 ABI_CallFunctionC((const void *)func, param1); 1007 ABI_CallFunctionC((const void*)func, param1);
1028 } 1008 }
1029 1009
1030 // A function that doesn't have any control over what it will do to regs, 1010 // A function that doesn't have any control over what it will do to regs,
@@ -1048,9 +1028,9 @@ public:
1048 void ABI_EmitEpilogue(int maxCallParams); 1028 void ABI_EmitEpilogue(int maxCallParams);
1049 1029
1050 #ifdef _M_IX86 1030 #ifdef _M_IX86
1051 inline int ABI_GetNumXMMRegs() { return 8; } 1031 static int ABI_GetNumXMMRegs() { return 8; }
1052 #else 1032 #else
1053 inline int ABI_GetNumXMMRegs() { return 16; } 1033 static int ABI_GetNumXMMRegs() { return 16; }
1054 #endif 1034 #endif
1055}; // class XEmitter 1035}; // class XEmitter
1056 1036
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 6cc60fd58..c17290b9b 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -29,6 +29,7 @@ set(SRCS
29 hle/kernel/address_arbiter.cpp 29 hle/kernel/address_arbiter.cpp
30 hle/kernel/event.cpp 30 hle/kernel/event.cpp
31 hle/kernel/kernel.cpp 31 hle/kernel/kernel.cpp
32 hle/kernel/memory.cpp
32 hle/kernel/mutex.cpp 33 hle/kernel/mutex.cpp
33 hle/kernel/process.cpp 34 hle/kernel/process.cpp
34 hle/kernel/resource_limit.cpp 35 hle/kernel/resource_limit.cpp
@@ -115,7 +116,6 @@ set(SRCS
115 loader/loader.cpp 116 loader/loader.cpp
116 loader/ncch.cpp 117 loader/ncch.cpp
117 tracer/recorder.cpp 118 tracer/recorder.cpp
118 mem_map.cpp
119 memory.cpp 119 memory.cpp
120 settings.cpp 120 settings.cpp
121 system.cpp 121 system.cpp
@@ -157,6 +157,7 @@ set(HEADERS
157 hle/kernel/address_arbiter.h 157 hle/kernel/address_arbiter.h
158 hle/kernel/event.h 158 hle/kernel/event.h
159 hle/kernel/kernel.h 159 hle/kernel/kernel.h
160 hle/kernel/memory.h
160 hle/kernel/mutex.h 161 hle/kernel/mutex.h
161 hle/kernel/process.h 162 hle/kernel/process.h
162 hle/kernel/resource_limit.h 163 hle/kernel/resource_limit.h
@@ -245,7 +246,6 @@ set(HEADERS
245 loader/ncch.h 246 loader/ncch.h
246 tracer/recorder.h 247 tracer/recorder.h
247 tracer/citrace.h 248 tracer/citrace.h
248 mem_map.h
249 memory.h 249 memory.h
250 memory_setup.h 250 memory_setup.h
251 settings.h 251 settings.h
diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp
index ccb2eb0eb..0491717dc 100644
--- a/src/core/arm/skyeye_common/armstate.cpp
+++ b/src/core/arm/skyeye_common/armstate.cpp
@@ -4,7 +4,6 @@
4 4
5#include "common/swap.h" 5#include "common/swap.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/mem_map.h"
8#include "core/memory.h" 7#include "core/memory.h"
9#include "core/arm/skyeye_common/armstate.h" 8#include "core/arm/skyeye_common/armstate.h"
10#include "core/arm/skyeye_common/vfp/vfp.h" 9#include "core/arm/skyeye_common/vfp/vfp.h"
diff --git a/src/core/arm/skyeye_common/armsupp.cpp b/src/core/arm/skyeye_common/armsupp.cpp
index d31fb9449..883713e86 100644
--- a/src/core/arm/skyeye_common/armsupp.cpp
+++ b/src/core/arm/skyeye_common/armsupp.cpp
@@ -17,7 +17,6 @@
17 17
18#include "common/logging/log.h" 18#include "common/logging/log.h"
19 19
20#include "core/mem_map.h"
21#include "core/arm/skyeye_common/arm_regformat.h" 20#include "core/arm/skyeye_common/arm_regformat.h"
22#include "core/arm/skyeye_common/armstate.h" 21#include "core/arm/skyeye_common/armstate.h"
23#include "core/arm/skyeye_common/armsupp.h" 22#include "core/arm/skyeye_common/armsupp.h"
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
index aea936d2d..b1a72dc0c 100644
--- a/src/core/hle/config_mem.cpp
+++ b/src/core/hle/config_mem.cpp
@@ -25,10 +25,6 @@ void Init() {
25 config_mem.sys_core_ver = 0x2; 25 config_mem.sys_core_ver = 0x2;
26 config_mem.unit_info = 0x1; // Bit 0 set for Retail 26 config_mem.unit_info = 0x1; // Bit 0 set for Retail
27 config_mem.prev_firm = 0; 27 config_mem.prev_firm = 0;
28 config_mem.app_mem_type = 0x2; // Default app mem type is 0
29 config_mem.app_mem_alloc = 0x06000000; // Set to 96MB, since some games use more than the default (64MB)
30 config_mem.base_mem_alloc = 0x01400000; // Default base memory is 20MB
31 config_mem.sys_mem_alloc = Memory::FCRAM_SIZE - (config_mem.app_mem_alloc + config_mem.base_mem_alloc);
32 config_mem.firm_unk = 0; 28 config_mem.firm_unk = 0;
33 config_mem.firm_version_rev = 0; 29 config_mem.firm_version_rev = 0;
34 config_mem.firm_version_min = 0x40; 30 config_mem.firm_version_min = 0x40;
@@ -36,7 +32,4 @@ void Init() {
36 config_mem.firm_sys_core_ver = 0x2; 32 config_mem.firm_sys_core_ver = 0x2;
37} 33}
38 34
39void Shutdown() {
40}
41
42} // namespace 35} // namespace
diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h
index 9825a09e8..24a1254f2 100644
--- a/src/core/hle/config_mem.h
+++ b/src/core/hle/config_mem.h
@@ -52,6 +52,5 @@ static_assert(sizeof(ConfigMemDef) == Memory::CONFIG_MEMORY_SIZE, "Config Memory
52extern ConfigMemDef config_mem; 52extern ConfigMemDef config_mem;
53 53
54void Init(); 54void Init();
55void Shutdown();
56 55
57} // namespace 56} // namespace
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index 1a0518926..5846a161b 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -172,6 +172,14 @@ template<ResultCode func(u32, s64, s64)> void Wrap() {
172 FuncReturn(func(PARAM(0), param1, param2).raw); 172 FuncReturn(func(PARAM(0), param1, param2).raw);
173} 173}
174 174
175template<ResultCode func(s64*, Handle, u32)> void Wrap() {
176 s64 param_1 = 0;
177 u32 retval = func(&param_1, PARAM(1), PARAM(2)).raw;
178 Core::g_app_core->SetReg(1, (u32)param_1);
179 Core::g_app_core->SetReg(2, (u32)(param_1 >> 32));
180 FuncReturn(retval);
181}
182
175//////////////////////////////////////////////////////////////////////////////////////////////////// 183////////////////////////////////////////////////////////////////////////////////////////////////////
176// Function wrappers that return type u32 184// Function wrappers that return type u32
177 185
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index cd0a400dc..331b1b22a 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -34,8 +34,6 @@ void Reschedule(const char *reason) {
34 34
35void Init() { 35void Init() {
36 Service::Init(); 36 Service::Init();
37 ConfigMem::Init();
38 SharedPage::Init();
39 37
40 g_reschedule = false; 38 g_reschedule = false;
41 39
@@ -43,8 +41,6 @@ void Init() {
43} 41}
44 42
45void Shutdown() { 43void Shutdown() {
46 ConfigMem::Shutdown();
47 SharedPage::Shutdown();
48 Service::Shutdown(); 44 Service::Shutdown();
49 45
50 LOG_DEBUG(Kernel, "shutdown OK"); 46 LOG_DEBUG(Kernel, "shutdown OK");
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 5711c0405..7a401a965 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -7,11 +7,14 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9 9
10#include "core/hle/config_mem.h"
10#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/resource_limit.h" 12#include "core/hle/kernel/memory.h"
12#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
14#include "core/hle/kernel/resource_limit.h"
13#include "core/hle/kernel/thread.h" 15#include "core/hle/kernel/thread.h"
14#include "core/hle/kernel/timer.h" 16#include "core/hle/kernel/timer.h"
17#include "core/hle/shared_page.h"
15 18
16namespace Kernel { 19namespace Kernel {
17 20
@@ -119,6 +122,13 @@ void HandleTable::Clear() {
119 122
120/// Initialize the kernel 123/// Initialize the kernel
121void Init() { 124void Init() {
125 ConfigMem::Init();
126 SharedPage::Init();
127
128 // TODO(yuriks): The memory type parameter needs to be determined by the ExHeader field instead
129 // For now it defaults to the one with a largest allocation to the app
130 Kernel::MemoryInit(2); // Allocates 96MB to the application
131
122 Kernel::ResourceLimitsInit(); 132 Kernel::ResourceLimitsInit();
123 Kernel::ThreadingInit(); 133 Kernel::ThreadingInit();
124 Kernel::TimersInit(); 134 Kernel::TimersInit();
@@ -131,11 +141,14 @@ void Init() {
131 141
132/// Shutdown the kernel 142/// Shutdown the kernel
133void Shutdown() { 143void Shutdown() {
144 g_handle_table.Clear(); // Free all kernel objects
145
134 Kernel::ThreadingShutdown(); 146 Kernel::ThreadingShutdown();
147 g_current_process = nullptr;
148
135 Kernel::TimersShutdown(); 149 Kernel::TimersShutdown();
136 Kernel::ResourceLimitsShutdown(); 150 Kernel::ResourceLimitsShutdown();
137 g_handle_table.Clear(); // Free all kernel objects 151 Kernel::MemoryShutdown();
138 g_current_process = nullptr;
139} 152}
140 153
141} // namespace 154} // namespace
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
new file mode 100644
index 000000000..e4fc5f3c4
--- /dev/null
+++ b/src/core/hle/kernel/memory.cpp
@@ -0,0 +1,136 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <memory>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13#include "core/hle/config_mem.h"
14#include "core/hle/kernel/memory.h"
15#include "core/hle/kernel/vm_manager.h"
16#include "core/hle/result.h"
17#include "core/hle/shared_page.h"
18#include "core/memory.h"
19#include "core/memory_setup.h"
20
21////////////////////////////////////////////////////////////////////////////////////////////////////
22
23namespace Kernel {
24
25static MemoryRegionInfo memory_regions[3];
26
27/// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each sytem
28/// memory configuration type.
29static const u32 memory_region_sizes[8][3] = {
30 // Old 3DS layouts
31 {0x04000000, 0x02C00000, 0x01400000}, // 0
32 { /* This appears to be unused. */ }, // 1
33 {0x06000000, 0x00C00000, 0x01400000}, // 2
34 {0x05000000, 0x01C00000, 0x01400000}, // 3
35 {0x04800000, 0x02400000, 0x01400000}, // 4
36 {0x02000000, 0x04C00000, 0x01400000}, // 5
37
38 // New 3DS layouts
39 {0x07C00000, 0x06400000, 0x02000000}, // 6
40 {0x0B200000, 0x02E00000, 0x02000000}, // 7
41};
42
43void MemoryInit(u32 mem_type) {
44 // TODO(yuriks): On the n3DS, all o3DS configurations (<=5) are forced to 6 instead.
45 ASSERT_MSG(mem_type <= 5, "New 3DS memory configuration aren't supported yet!");
46 ASSERT(mem_type != 1);
47
48 // The kernel allocation regions (APPLICATION, SYSTEM and BASE) are laid out in sequence, with
49 // the sizes specified in the memory_region_sizes table.
50 VAddr base = 0;
51 for (int i = 0; i < 3; ++i) {
52 memory_regions[i].base = base;
53 memory_regions[i].size = memory_region_sizes[mem_type][i];
54 memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
55
56 base += memory_regions[i].size;
57 }
58
59 // We must've allocated the entire FCRAM by the end
60 ASSERT(base == Memory::FCRAM_SIZE);
61
62 using ConfigMem::config_mem;
63 config_mem.app_mem_type = mem_type;
64 // app_mem_malloc does not always match the configured size for memory_region[0]: in case the
65 // n3DS type override is in effect it reports the size the game expects, not the real one.
66 config_mem.app_mem_alloc = memory_region_sizes[mem_type][0];
67 config_mem.sys_mem_alloc = memory_regions[1].size;
68 config_mem.base_mem_alloc = memory_regions[2].size;
69}
70
71void MemoryShutdown() {
72 for (auto& region : memory_regions) {
73 region.base = 0;
74 region.size = 0;
75 region.linear_heap_memory = nullptr;
76 }
77}
78
79MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) {
80 switch (region) {
81 case MemoryRegion::APPLICATION:
82 return &memory_regions[0];
83 case MemoryRegion::SYSTEM:
84 return &memory_regions[1];
85 case MemoryRegion::BASE:
86 return &memory_regions[2];
87 default:
88 UNREACHABLE();
89 }
90}
91
92}
93
94namespace Memory {
95
96namespace {
97
98struct MemoryArea {
99 u32 base;
100 u32 size;
101 const char* name;
102};
103
104// We don't declare the IO regions in here since its handled by other means.
105static MemoryArea memory_areas[] = {
106 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
107 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
108 {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
109 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
110};
111
112}
113
114void Init() {
115 InitMemoryMap();
116 LOG_DEBUG(HW_Memory, "initialized OK");
117}
118
119void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
120 using namespace Kernel;
121
122 for (MemoryArea& area : memory_areas) {
123 auto block = std::make_shared<std::vector<u8>>(area.size);
124 address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap();
125 }
126
127 auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR,
128 (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom();
129 address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
130
131 auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
132 (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
133 address_space.Reprotect(shared_page_vma, VMAPermission::Read);
134}
135
136} // namespace
diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h
new file mode 100644
index 000000000..36690b091
--- /dev/null
+++ b/src/core/hle/kernel/memory.h
@@ -0,0 +1,35 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8
9#include "common/common_types.h"
10
11#include "core/hle/kernel/process.h"
12
13namespace Kernel {
14
15class VMManager;
16
17struct MemoryRegionInfo {
18 u32 base; // Not an address, but offset from start of FCRAM
19 u32 size;
20
21 std::shared_ptr<std::vector<u8>> linear_heap_memory;
22};
23
24void MemoryInit(u32 mem_type);
25void MemoryShutdown();
26MemoryRegionInfo* GetMemoryRegion(MemoryRegion region);
27
28}
29
30namespace Memory {
31
32void Init();
33void InitLegacyAddressSpace(Kernel::VMManager& address_space);
34
35} // namespace
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index a7892c652..124047a53 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -7,11 +7,11 @@
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/make_unique.h" 8#include "common/make_unique.h"
9 9
10#include "core/hle/kernel/memory.h"
10#include "core/hle/kernel/process.h" 11#include "core/hle/kernel/process.h"
11#include "core/hle/kernel/resource_limit.h" 12#include "core/hle/kernel/resource_limit.h"
12#include "core/hle/kernel/thread.h" 13#include "core/hle/kernel/thread.h"
13#include "core/hle/kernel/vm_manager.h" 14#include "core/hle/kernel/vm_manager.h"
14#include "core/mem_map.h"
15#include "core/memory.h" 15#include "core/memory.h"
16 16
17namespace Kernel { 17namespace Kernel {
@@ -36,8 +36,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) {
36 process->codeset = std::move(code_set); 36 process->codeset = std::move(code_set);
37 process->flags.raw = 0; 37 process->flags.raw = 0;
38 process->flags.memory_region = MemoryRegion::APPLICATION; 38 process->flags.memory_region = MemoryRegion::APPLICATION;
39 process->address_space = Common::make_unique<VMManager>(); 39 Memory::InitLegacyAddressSpace(process->vm_manager);
40 Memory::InitLegacyAddressSpace(*process->address_space);
41 40
42 return process; 41 return process;
43} 42}
@@ -93,9 +92,11 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
93 mapping.unk_flag = false; 92 mapping.unk_flag = false;
94 } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF 93 } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF
95 // Kernel version 94 // Kernel version
96 int minor = descriptor & 0xFF; 95 kernel_version = descriptor & 0xFFFF;
97 int major = (descriptor >> 8) & 0xFF; 96
98 LOG_INFO(Loader, "ExHeader kernel version ignored: %d.%d", major, minor); 97 int minor = kernel_version & 0xFF;
98 int major = (kernel_version >> 8) & 0xFF;
99 LOG_INFO(Loader, "ExHeader kernel version: %d.%d", major, minor);
99 } else { 100 } else {
100 LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor); 101 LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor);
101 } 102 }
@@ -103,20 +104,153 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
103} 104}
104 105
105void Process::Run(s32 main_thread_priority, u32 stack_size) { 106void Process::Run(s32 main_thread_priority, u32 stack_size) {
107 memory_region = GetMemoryRegion(flags.memory_region);
108
106 auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { 109 auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) {
107 auto vma = address_space->MapMemoryBlock(segment.addr, codeset->memory, 110 auto vma = vm_manager.MapMemoryBlock(segment.addr, codeset->memory,
108 segment.offset, segment.size, memory_state).Unwrap(); 111 segment.offset, segment.size, memory_state).Unwrap();
109 address_space->Reprotect(vma, permissions); 112 vm_manager.Reprotect(vma, permissions);
113 misc_memory_used += segment.size;
110 }; 114 };
111 115
116 // Map CodeSet segments
112 MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code); 117 MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code);
113 MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code); 118 MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code);
114 MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private); 119 MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private);
115 120
116 address_space->LogLayout(); 121 // Allocate and map stack
122 vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size,
123 std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked
124 ).Unwrap();
125 misc_memory_used += stack_size;
126
127 vm_manager.LogLayout(Log::Level::Debug);
117 Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority); 128 Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority);
118} 129}
119 130
131VAddr Process::GetLinearHeapBase() const {
132 return (kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_SIZE)
133 + memory_region->base;
134}
135
136VAddr Process::GetLinearHeapLimit() const {
137 return GetLinearHeapBase() + memory_region->size;
138}
139
140ResultVal<VAddr> Process::HeapAllocate(VAddr target, u32 size, VMAPermission perms) {
141 if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) {
142 return ERR_INVALID_ADDRESS;
143 }
144
145 if (heap_memory == nullptr) {
146 // Initialize heap
147 heap_memory = std::make_shared<std::vector<u8>>();
148 heap_start = heap_end = target;
149 }
150
151 // If necessary, expand backing vector to cover new heap extents.
152 if (target < heap_start) {
153 heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
154 heap_start = target;
155 vm_manager.RefreshMemoryBlockMappings(heap_memory.get());
156 }
157 if (target + size > heap_end) {
158 heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
159 heap_end = target + size;
160 vm_manager.RefreshMemoryBlockMappings(heap_memory.get());
161 }
162 ASSERT(heap_end - heap_start == heap_memory->size());
163
164 CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, heap_memory, target - heap_start, size, MemoryState::Private));
165 vm_manager.Reprotect(vma, perms);
166
167 heap_used += size;
168
169 return MakeResult<VAddr>(heap_end - size);
170}
171
172ResultCode Process::HeapFree(VAddr target, u32 size) {
173 if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) {
174 return ERR_INVALID_ADDRESS;
175 }
176
177 ResultCode result = vm_manager.UnmapRange(target, size);
178 if (result.IsError()) return result;
179
180 heap_used -= size;
181
182 return RESULT_SUCCESS;
183}
184
185ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission perms) {
186 auto& linheap_memory = memory_region->linear_heap_memory;
187
188 VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size();
189 // Games and homebrew only ever seem to pass 0 here (which lets the kernel decide the address),
190 // but explicit addresses are also accepted and respected.
191 if (target == 0) {
192 target = heap_end;
193 }
194
195 if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() ||
196 target > heap_end || target + size < target) {
197
198 return ERR_INVALID_ADDRESS;
199 }
200
201 // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its
202 // end. It's possible to free gaps in the middle of the heap and then reallocate them later,
203 // but expansions are only allowed at the end.
204 if (target == heap_end) {
205 linheap_memory->insert(linheap_memory->end(), size, 0);
206 vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
207 }
208
209 // TODO(yuriks): As is, this lets processes map memory allocated by other processes from the
210 // same region. It is unknown if or how the 3DS kernel checks against this.
211 size_t offset = target - GetLinearHeapBase();
212 CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, linheap_memory, offset, size, MemoryState::Continuous));
213 vm_manager.Reprotect(vma, perms);
214
215 linear_heap_used += size;
216
217 return MakeResult<VAddr>(target);
218}
219
220ResultCode Process::LinearFree(VAddr target, u32 size) {
221 auto& linheap_memory = memory_region->linear_heap_memory;
222
223 if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() ||
224 target + size < target) {
225
226 return ERR_INVALID_ADDRESS;
227 }
228
229 VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size();
230 if (target + size > heap_end) {
231 return ERR_INVALID_ADDRESS_STATE;
232 }
233
234 ResultCode result = vm_manager.UnmapRange(target, size);
235 if (result.IsError()) return result;
236
237 linear_heap_used -= size;
238
239 if (target + size == heap_end) {
240 // End of linear heap has been freed, so check what's the last allocated block in it and
241 // reduce the size.
242 auto vma = vm_manager.FindVMA(target);
243 ASSERT(vma != vm_manager.vma_map.end());
244 ASSERT(vma->second.type == VMAType::Free);
245 VAddr new_end = vma->second.base;
246 if (new_end >= GetLinearHeapBase()) {
247 linheap_memory->resize(new_end - GetLinearHeapBase());
248 }
249 }
250
251 return RESULT_SUCCESS;
252}
253
120Kernel::Process::Process() {} 254Kernel::Process::Process() {}
121Kernel::Process::~Process() {} 255Kernel::Process::~Process() {}
122 256
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 83d3aceae..60e17f251 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16 16
17#include "core/hle/kernel/kernel.h" 17#include "core/hle/kernel/kernel.h"
18#include "core/hle/kernel/vm_manager.h"
18 19
19namespace Kernel { 20namespace Kernel {
20 21
@@ -48,7 +49,7 @@ union ProcessFlags {
48}; 49};
49 50
50class ResourceLimit; 51class ResourceLimit;
51class VMManager; 52struct MemoryRegionInfo;
52 53
53struct CodeSet final : public Object { 54struct CodeSet final : public Object {
54 static SharedPtr<CodeSet> Create(std::string name, u64 program_id); 55 static SharedPtr<CodeSet> Create(std::string name, u64 program_id);
@@ -104,14 +105,12 @@ public:
104 /// processes access to specific I/O regions and device memory. 105 /// processes access to specific I/O regions and device memory.
105 boost::container::static_vector<AddressMapping, 8> address_mappings; 106 boost::container::static_vector<AddressMapping, 8> address_mappings;
106 ProcessFlags flags; 107 ProcessFlags flags;
108 /// Kernel compatibility version for this process
109 u16 kernel_version = 0;
107 110
108 /// The id of this process 111 /// The id of this process
109 u32 process_id = next_process_id++; 112 u32 process_id = next_process_id++;
110 113
111 /// Bitmask of the used TLS slots
112 std::bitset<300> used_tls_slots;
113 std::unique_ptr<VMManager> address_space;
114
115 /** 114 /**
116 * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them 115 * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
117 * to this process. 116 * to this process.
@@ -123,6 +122,36 @@ public:
123 */ 122 */
124 void Run(s32 main_thread_priority, u32 stack_size); 123 void Run(s32 main_thread_priority, u32 stack_size);
125 124
125
126 ///////////////////////////////////////////////////////////////////////////////////////////////
127 // Memory Management
128
129 VMManager vm_manager;
130
131 // Memory used to back the allocations in the regular heap. A single vector is used to cover
132 // the entire virtual address space extents that bound the allocations, including any holes.
133 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
134 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
135 std::shared_ptr<std::vector<u8>> heap_memory;
136 // The left/right bounds of the address space covered by heap_memory.
137 VAddr heap_start = 0, heap_end = 0;
138
139 u32 heap_used = 0, linear_heap_used = 0, misc_memory_used = 0;
140
141 MemoryRegionInfo* memory_region = nullptr;
142
143 /// Bitmask of the used TLS slots
144 std::bitset<300> used_tls_slots;
145
146 VAddr GetLinearHeapBase() const;
147 VAddr GetLinearHeapLimit() const;
148
149 ResultVal<VAddr> HeapAllocate(VAddr target, u32 size, VMAPermission perms);
150 ResultCode HeapFree(VAddr target, u32 size);
151
152 ResultVal<VAddr> LinearAllocate(VAddr target, u32 size, VMAPermission perms);
153 ResultCode LinearFree(VAddr target, u32 size);
154
126private: 155private:
127 Process(); 156 Process();
128 ~Process() override; 157 ~Process() override;
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 94b3e3298..67dde08c2 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -6,7 +6,6 @@
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8 8
9#include "core/mem_map.h"
10#include "core/hle/kernel/resource_limit.h" 9#include "core/hle/kernel/resource_limit.h"
11 10
12namespace Kernel { 11namespace Kernel {
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 29ea6d531..c10126513 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,6 +117,7 @@ void Thread::Stop() {
117 wait_objects.clear(); 117 wait_objects.clear();
118 118
119 Kernel::g_current_process->used_tls_slots[tls_index] = false; 119 Kernel::g_current_process->used_tls_slots[tls_index] = false;
120 g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE;
120 121
121 HLE::Reschedule(__func__); 122 HLE::Reschedule(__func__);
122} 123}
@@ -414,6 +415,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
414 } 415 }
415 416
416 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); 417 ASSERT_MSG(thread->tls_index != -1, "Out of TLS space");
418 g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE;
417 419
418 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used 420 // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
419 // to initialize the context 421 // to initialize the context
@@ -504,7 +506,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
504} 506}
505 507
506VAddr Thread::GetTLSAddress() const { 508VAddr Thread::GetTLSAddress() const {
507 return Memory::TLS_AREA_VADDR + tls_index * 0x200; 509 return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
508} 510}
509 511
510//////////////////////////////////////////////////////////////////////////////////////////////////// 512////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 205cc7b53..2610acf76 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -11,6 +11,15 @@
11 11
12namespace Kernel { 12namespace Kernel {
13 13
14static const char* GetMemoryStateName(MemoryState state) {
15 static const char* names[] = {
16 "Free", "Reserved", "IO", "Static", "Code", "Private", "Shared", "Continuous", "Aliased",
17 "Alias", "AliasCode", "Locked",
18 };
19
20 return names[(int)state];
21}
22
14bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { 23bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
15 ASSERT(base + size == next.base); 24 ASSERT(base + size == next.base);
16 if (permissions != next.permissions || 25 if (permissions != next.permissions ||
@@ -51,11 +60,15 @@ void VMManager::Reset() {
51} 60}
52 61
53VMManager::VMAHandle VMManager::FindVMA(VAddr target) const { 62VMManager::VMAHandle VMManager::FindVMA(VAddr target) const {
54 return std::prev(vma_map.upper_bound(target)); 63 if (target >= MAX_ADDRESS) {
64 return vma_map.end();
65 } else {
66 return std::prev(vma_map.upper_bound(target));
67 }
55} 68}
56 69
57ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, 70ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
58 std::shared_ptr<std::vector<u8>> block, u32 offset, u32 size, MemoryState state) { 71 std::shared_ptr<std::vector<u8>> block, size_t offset, u32 size, MemoryState state) {
59 ASSERT(block != nullptr); 72 ASSERT(block != nullptr);
60 ASSERT(offset + size <= block->size()); 73 ASSERT(offset + size <= block->size());
61 74
@@ -106,10 +119,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u3
106 return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); 119 return MakeResult<VMAHandle>(MergeAdjacent(vma_handle));
107} 120}
108 121
109void VMManager::Unmap(VMAHandle vma_handle) { 122VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) {
110 VMAIter iter = StripIterConstness(vma_handle); 123 VirtualMemoryArea& vma = vma_handle->second;
111
112 VirtualMemoryArea& vma = iter->second;
113 vma.type = VMAType::Free; 124 vma.type = VMAType::Free;
114 vma.permissions = VMAPermission::None; 125 vma.permissions = VMAPermission::None;
115 vma.meminfo_state = MemoryState::Free; 126 vma.meminfo_state = MemoryState::Free;
@@ -121,26 +132,67 @@ void VMManager::Unmap(VMAHandle vma_handle) {
121 132
122 UpdatePageTableForVMA(vma); 133 UpdatePageTableForVMA(vma);
123 134
124 MergeAdjacent(iter); 135 return MergeAdjacent(vma_handle);
136}
137
138ResultCode VMManager::UnmapRange(VAddr target, u32 size) {
139 CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size));
140 VAddr target_end = target + size;
141
142 VMAIter end = vma_map.end();
143 // The comparison against the end of the range must be done using addresses since VMAs can be
144 // merged during this process, causing invalidation of the iterators.
145 while (vma != end && vma->second.base < target_end) {
146 vma = std::next(Unmap(vma));
147 }
148
149 ASSERT(FindVMA(target)->second.size >= size);
150 return RESULT_SUCCESS;
125} 151}
126 152
127void VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { 153VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) {
128 VMAIter iter = StripIterConstness(vma_handle); 154 VMAIter iter = StripIterConstness(vma_handle);
129 155
130 VirtualMemoryArea& vma = iter->second; 156 VirtualMemoryArea& vma = iter->second;
131 vma.permissions = new_perms; 157 vma.permissions = new_perms;
132 UpdatePageTableForVMA(vma); 158 UpdatePageTableForVMA(vma);
133 159
134 MergeAdjacent(iter); 160 return MergeAdjacent(iter);
161}
162
163ResultCode VMManager::ReprotectRange(VAddr target, u32 size, VMAPermission new_perms) {
164 CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size));
165 VAddr target_end = target + size;
166
167 VMAIter end = vma_map.end();
168 // The comparison against the end of the range must be done using addresses since VMAs can be
169 // merged during this process, causing invalidation of the iterators.
170 while (vma != end && vma->second.base < target_end) {
171 vma = std::next(StripIterConstness(Reprotect(vma, new_perms)));
172 }
173
174 return RESULT_SUCCESS;
135} 175}
136 176
137void VMManager::LogLayout() const { 177void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) {
178 // If this ever proves to have a noticeable performance impact, allow users of the function to
179 // specify a specific range of addresses to limit the scan to.
138 for (const auto& p : vma_map) { 180 for (const auto& p : vma_map) {
139 const VirtualMemoryArea& vma = p.second; 181 const VirtualMemoryArea& vma = p.second;
140 LOG_DEBUG(Kernel, "%08X - %08X size: %8X %c%c%c", vma.base, vma.base + vma.size, vma.size, 182 if (block == vma.backing_block.get()) {
183 UpdatePageTableForVMA(vma);
184 }
185 }
186}
187
188void VMManager::LogLayout(Log::Level log_level) const {
189 for (const auto& p : vma_map) {
190 const VirtualMemoryArea& vma = p.second;
191 LOG_GENERIC(Log::Class::Kernel, log_level, "%08X - %08X size: %8X %c%c%c %s",
192 vma.base, vma.base + vma.size, vma.size,
141 (u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-', 193 (u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-',
142 (u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-', 194 (u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-',
143 (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-'); 195 (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', GetMemoryStateName(vma.meminfo_state));
144 } 196 }
145} 197}
146 198
@@ -151,21 +203,19 @@ VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle & iter) {
151} 203}
152 204
153ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { 205ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
154 ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: %8X", size); 206 ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size);
155 ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: %08X", base); 207 ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", base);
156 208
157 VMAIter vma_handle = StripIterConstness(FindVMA(base)); 209 VMAIter vma_handle = StripIterConstness(FindVMA(base));
158 if (vma_handle == vma_map.end()) { 210 if (vma_handle == vma_map.end()) {
159 // Target address is outside the range managed by the kernel 211 // Target address is outside the range managed by the kernel
160 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, 212 return ERR_INVALID_ADDRESS;
161 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E01BF5
162 } 213 }
163 214
164 VirtualMemoryArea& vma = vma_handle->second; 215 VirtualMemoryArea& vma = vma_handle->second;
165 if (vma.type != VMAType::Free) { 216 if (vma.type != VMAType::Free) {
166 // Region is already allocated 217 // Region is already allocated
167 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, 218 return ERR_INVALID_ADDRESS_STATE;
168 ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5
169 } 219 }
170 220
171 u32 start_in_vma = base - vma.base; 221 u32 start_in_vma = base - vma.base;
@@ -173,8 +223,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
173 223
174 if (end_in_vma > vma.size) { 224 if (end_in_vma > vma.size) {
175 // Requested allocation doesn't fit inside VMA 225 // Requested allocation doesn't fit inside VMA
176 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, 226 return ERR_INVALID_ADDRESS_STATE;
177 ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5
178 } 227 }
179 228
180 if (end_in_vma != vma.size) { 229 if (end_in_vma != vma.size) {
@@ -189,6 +238,35 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
189 return MakeResult<VMAIter>(vma_handle); 238 return MakeResult<VMAIter>(vma_handle);
190} 239}
191 240
241ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u32 size) {
242 ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size);
243 ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", target);
244
245 VAddr target_end = target + size;
246 ASSERT(target_end >= target);
247 ASSERT(target_end <= MAX_ADDRESS);
248 ASSERT(size > 0);
249
250 VMAIter begin_vma = StripIterConstness(FindVMA(target));
251 VMAIter i_end = vma_map.lower_bound(target_end);
252 for (auto i = begin_vma; i != i_end; ++i) {
253 if (i->second.type == VMAType::Free) {
254 return ERR_INVALID_ADDRESS_STATE;
255 }
256 }
257
258 if (target != begin_vma->second.base) {
259 begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
260 }
261
262 VMAIter end_vma = StripIterConstness(FindVMA(target_end));
263 if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
264 end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
265 }
266
267 return MakeResult<VMAIter>(begin_vma);
268}
269
192VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) { 270VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) {
193 VirtualMemoryArea& old_vma = vma_handle->second; 271 VirtualMemoryArea& old_vma = vma_handle->second;
194 VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA 272 VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index b3795a94a..4e95f1f0c 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -14,6 +14,14 @@
14 14
15namespace Kernel { 15namespace Kernel {
16 16
17const ResultCode ERR_INVALID_ADDRESS{ // 0xE0E01BF5
18 ErrorDescription::InvalidAddress, ErrorModule::OS,
19 ErrorSummary::InvalidArgument, ErrorLevel::Usage};
20
21const ResultCode ERR_INVALID_ADDRESS_STATE{ // 0xE0A01BF5
22 ErrorDescription::InvalidAddress, ErrorModule::OS,
23 ErrorSummary::InvalidState, ErrorLevel::Usage};
24
17enum class VMAType : u8 { 25enum class VMAType : u8 {
18 /// VMA represents an unmapped region of the address space. 26 /// VMA represents an unmapped region of the address space.
19 Free, 27 Free,
@@ -75,7 +83,7 @@ struct VirtualMemoryArea {
75 /// Memory block backing this VMA. 83 /// Memory block backing this VMA.
76 std::shared_ptr<std::vector<u8>> backing_block = nullptr; 84 std::shared_ptr<std::vector<u8>> backing_block = nullptr;
77 /// Offset into the backing_memory the mapping starts from. 85 /// Offset into the backing_memory the mapping starts from.
78 u32 offset = 0; 86 size_t offset = 0;
79 87
80 // Settings for type = BackingMemory 88 // Settings for type = BackingMemory
81 /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed. 89 /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed.
@@ -141,7 +149,7 @@ public:
141 * @param state MemoryState tag to attach to the VMA. 149 * @param state MemoryState tag to attach to the VMA.
142 */ 150 */
143 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, 151 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
144 u32 offset, u32 size, MemoryState state); 152 size_t offset, u32 size, MemoryState state);
145 153
146 /** 154 /**
147 * Maps an unmanaged host memory pointer at a given address. 155 * Maps an unmanaged host memory pointer at a given address.
@@ -163,14 +171,23 @@ public:
163 */ 171 */
164 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state); 172 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state);
165 173
166 /// Unmaps the given VMA. 174 /// Unmaps a range of addresses, splitting VMAs as necessary.
167 void Unmap(VMAHandle vma); 175 ResultCode UnmapRange(VAddr target, u32 size);
168 176
169 /// Changes the permissions of the given VMA. 177 /// Changes the permissions of the given VMA.
170 void Reprotect(VMAHandle vma, VMAPermission new_perms); 178 VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms);
179
180 /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
181 ResultCode ReprotectRange(VAddr target, u32 size, VMAPermission new_perms);
182
183 /**
184 * Scans all VMAs and updates the page table range of any that use the given vector as backing
185 * memory. This should be called after any operation that causes reallocation of the vector.
186 */
187 void RefreshMemoryBlockMappings(const std::vector<u8>* block);
171 188
172 /// Dumps the address space layout to the log, for debugging 189 /// Dumps the address space layout to the log, for debugging
173 void LogLayout() const; 190 void LogLayout(Log::Level log_level) const;
174 191
175private: 192private:
176 using VMAIter = decltype(vma_map)::iterator; 193 using VMAIter = decltype(vma_map)::iterator;
@@ -178,6 +195,9 @@ private:
178 /// Converts a VMAHandle to a mutable VMAIter. 195 /// Converts a VMAHandle to a mutable VMAIter.
179 VMAIter StripIterConstness(const VMAHandle& iter); 196 VMAIter StripIterConstness(const VMAHandle& iter);
180 197
198 /// Unmaps the given VMA.
199 VMAIter Unmap(VMAIter vma);
200
181 /** 201 /**
182 * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing 202 * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
183 * the appropriate error checking. 203 * the appropriate error checking.
@@ -185,6 +205,12 @@ private:
185 ResultVal<VMAIter> CarveVMA(VAddr base, u32 size); 205 ResultVal<VMAIter> CarveVMA(VAddr base, u32 size);
186 206
187 /** 207 /**
208 * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
209 * end of the range.
210 */
211 ResultVal<VMAIter> CarveVMARange(VAddr base, u32 size);
212
213 /**
188 * Splits a VMA in two, at the specified offset. 214 * Splits a VMA in two, at the specified offset.
189 * @returns the right side of the split, with the original iterator becoming the left side. 215 * @returns the right side of the split, with the original iterator becoming the left side.
190 */ 216 */
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 35402341b..6a2fdea2b 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -16,6 +16,7 @@
16#include "core/hle/hle.h" 16#include "core/hle/hle.h"
17#include "core/hle/kernel/event.h" 17#include "core/hle/kernel/event.h"
18#include "core/hle/kernel/mutex.h" 18#include "core/hle/kernel/mutex.h"
19#include "core/hle/kernel/process.h"
19#include "core/hle/kernel/shared_memory.h" 20#include "core/hle/kernel/shared_memory.h"
20#include "core/hle/kernel/thread.h" 21#include "core/hle/kernel/thread.h"
21 22
@@ -37,7 +38,7 @@ static Kernel::SharedPtr<Kernel::Mutex> lock;
37static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event 38static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event
38static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event 39static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event
39 40
40static std::vector<u8> shared_font; 41static std::shared_ptr<std::vector<u8>> shared_font;
41 42
42static u32 cpu_percent; ///< CPU time available to the running application 43static u32 cpu_percent; ///< CPU time available to the running application
43 44
@@ -74,11 +75,12 @@ void Initialize(Service::Interface* self) {
74void GetSharedFont(Service::Interface* self) { 75void GetSharedFont(Service::Interface* self) {
75 u32* cmd_buff = Kernel::GetCommandBuffer(); 76 u32* cmd_buff = Kernel::GetCommandBuffer();
76 77
77 if (!shared_font.empty()) { 78 if (shared_font != nullptr) {
78 // TODO(bunnei): This function shouldn't copy the shared font every time it's called. 79 // TODO(yuriks): This is a hack to keep this working right now even with our completely
79 // Instead, it should probably map the shared font as RO memory. We don't currently have 80 // broken shared memory system.
80 // an easy way to do this, but the copy should be sufficient for now. 81 shared_font_mem->base_address = SHARED_FONT_VADDR;
81 memcpy(Memory::GetPointer(SHARED_FONT_VADDR), shared_font.data(), shared_font.size()); 82 Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->base_address,
83 shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared);
82 84
83 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); 85 cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);
84 cmd_buff[1] = RESULT_SUCCESS.raw; // No error 86 cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -391,7 +393,6 @@ void Init() {
391 // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file 393 // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file
392 // "shared_font.bin" in the Citra "sysdata" directory. 394 // "shared_font.bin" in the Citra "sysdata" directory.
393 395
394 shared_font.clear();
395 std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT; 396 std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT;
396 397
397 FileUtil::CreateFullPath(filepath); // Create path if not already created 398 FileUtil::CreateFullPath(filepath); // Create path if not already created
@@ -399,8 +400,8 @@ void Init() {
399 400
400 if (file.IsOpen()) { 401 if (file.IsOpen()) {
401 // Read shared font data 402 // Read shared font data
402 shared_font.resize((size_t)file.GetSize()); 403 shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize());
403 file.ReadBytes(shared_font.data(), (size_t)file.GetSize()); 404 file.ReadBytes(shared_font->data(), shared_font->size());
404 405
405 // Create shared font memory object 406 // Create shared font memory object
406 using Kernel::MemoryPermission; 407 using Kernel::MemoryPermission;
@@ -424,7 +425,7 @@ void Init() {
424} 425}
425 426
426void Shutdown() { 427void Shutdown() {
427 shared_font.clear(); 428 shared_font = nullptr;
428 shared_font_mem = nullptr; 429 shared_font_mem = nullptr;
429 lock = nullptr; 430 lock = nullptr;
430 notification_event = nullptr; 431 notification_event = nullptr;
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index e93c1b436..fde508a13 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -4,7 +4,6 @@
4 4
5#include "common/bit_field.h" 5#include "common/bit_field.h"
6 6
7#include "core/mem_map.h"
8#include "core/memory.h" 7#include "core/memory.h"
9#include "core/hle/kernel/event.h" 8#include "core/hle/kernel/event.h"
10#include "core/hle/kernel/shared_memory.h" 9#include "core/hle/kernel/shared_memory.h"
@@ -418,7 +417,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
418 417
419 case CommandId::SET_DISPLAY_TRANSFER: 418 case CommandId::SET_DISPLAY_TRANSFER:
420 { 419 {
421 auto& params = command.image_copy; 420 auto& params = command.display_transfer;
422 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), 421 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
423 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); 422 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
424 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), 423 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
@@ -433,17 +432,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
433 // TODO: Check if texture copies are implemented correctly.. 432 // TODO: Check if texture copies are implemented correctly..
434 case CommandId::SET_TEXTURE_COPY: 433 case CommandId::SET_TEXTURE_COPY:
435 { 434 {
436 auto& params = command.image_copy; 435 auto& params = command.texture_copy;
437 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), 436 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
438 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); 437 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
439 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), 438 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
440 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); 439 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
441 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); 440 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
442 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); 441 params.size);
443 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); 442 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
444 443 params.in_width_gap);
445 // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? 444 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
446 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); 445 params.out_width_gap);
446 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags),
447 params.flags);
448
449 // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
450 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
447 break; 451 break;
448 } 452 }
449 453
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index c89d0a467..8bcb30ad1 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -127,7 +127,16 @@ struct Command {
127 u32 in_buffer_size; 127 u32 in_buffer_size;
128 u32 out_buffer_size; 128 u32 out_buffer_size;
129 u32 flags; 129 u32 flags;
130 } image_copy; 130 } display_transfer;
131
132 struct {
133 u32 in_buffer_address;
134 u32 out_buffer_address;
135 u32 size;
136 u32 in_width_gap;
137 u32 out_width_gap;
138 u32 flags;
139 } texture_copy;
131 140
132 u8 raw_data[0x1C]; 141 u8 raw_data[0x1C];
133 }; 142 };
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 6e7dafaad..6b1b71fe4 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -10,7 +10,6 @@
10#include "core/hle/kernel/event.h" 10#include "core/hle/kernel/event.h"
11#include "core/hle/service/y2r_u.h" 11#include "core/hle/service/y2r_u.h"
12#include "core/hw/y2r.h" 12#include "core/hw/y2r.h"
13#include "core/mem_map.h"
14 13
15#include "video_core/renderer_base.h" 14#include "video_core/renderer_base.h"
16#include "video_core/utils.h" 15#include "video_core/utils.h"
diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp
index 26d87c7e2..50c5bc01b 100644
--- a/src/core/hle/shared_page.cpp
+++ b/src/core/hle/shared_page.cpp
@@ -18,7 +18,4 @@ void Init() {
18 shared_page.running_hw = 0x1; // product 18 shared_page.running_hw = 0x1; // product
19} 19}
20 20
21void Shutdown() {
22}
23
24} // namespace 21} // namespace
diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h
index db6a5340b..379bb7b63 100644
--- a/src/core/hle/shared_page.h
+++ b/src/core/hle/shared_page.h
@@ -54,6 +54,5 @@ static_assert(sizeof(SharedPageDef) == Memory::SHARED_PAGE_SIZE, "Shared page st
54extern SharedPageDef shared_page; 54extern SharedPageDef shared_page;
55 55
56void Init(); 56void Init();
57void Shutdown();
58 57
59} // namespace 58} // namespace
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index bb64fdfb7..89ac45a6f 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -10,11 +10,11 @@
10#include "common/symbols.h" 10#include "common/symbols.h"
11 11
12#include "core/core_timing.h" 12#include "core/core_timing.h"
13#include "core/mem_map.h"
14#include "core/arm/arm_interface.h" 13#include "core/arm/arm_interface.h"
15 14
16#include "core/hle/kernel/address_arbiter.h" 15#include "core/hle/kernel/address_arbiter.h"
17#include "core/hle/kernel/event.h" 16#include "core/hle/kernel/event.h"
17#include "core/hle/kernel/memory.h"
18#include "core/hle/kernel/mutex.h" 18#include "core/hle/kernel/mutex.h"
19#include "core/hle/kernel/process.h" 19#include "core/hle/kernel/process.h"
20#include "core/hle/kernel/resource_limit.h" 20#include "core/hle/kernel/resource_limit.h"
@@ -41,32 +41,114 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel,
41const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS, 41const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS,
42 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E 42 ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E
43 43
44const ResultCode ERR_MISALIGNED_ADDRESS{ // 0xE0E01BF1
45 ErrorDescription::MisalignedAddress, ErrorModule::OS,
46 ErrorSummary::InvalidArgument, ErrorLevel::Usage};
47const ResultCode ERR_MISALIGNED_SIZE{ // 0xE0E01BF2
48 ErrorDescription::MisalignedSize, ErrorModule::OS,
49 ErrorSummary::InvalidArgument, ErrorLevel::Usage};
50const ResultCode ERR_INVALID_COMBINATION{ // 0xE0E01BEE
51 ErrorDescription::InvalidCombination, ErrorModule::OS,
52 ErrorSummary::InvalidArgument, ErrorLevel::Usage};
53
44enum ControlMemoryOperation { 54enum ControlMemoryOperation {
45 MEMORY_OPERATION_HEAP = 0x00000003, 55 MEMOP_FREE = 1,
46 MEMORY_OPERATION_GSP_HEAP = 0x00010003, 56 MEMOP_RESERVE = 2, // This operation seems to be unsupported in the kernel
57 MEMOP_COMMIT = 3,
58 MEMOP_MAP = 4,
59 MEMOP_UNMAP = 5,
60 MEMOP_PROTECT = 6,
61 MEMOP_OPERATION_MASK = 0xFF,
62
63 MEMOP_REGION_APP = 0x100,
64 MEMOP_REGION_SYSTEM = 0x200,
65 MEMOP_REGION_BASE = 0x300,
66 MEMOP_REGION_MASK = 0xF00,
67
68 MEMOP_LINEAR = 0x10000,
47}; 69};
48 70
49/// Map application or GSP heap memory 71/// Map application or GSP heap memory
50static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) { 72static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) {
51 LOG_TRACE(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X", 73 using namespace Kernel;
74
75 LOG_DEBUG(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=0x%X, permissions=0x%08X",
52 operation, addr0, addr1, size, permissions); 76 operation, addr0, addr1, size, permissions);
53 77
54 switch (operation) { 78 if ((addr0 & Memory::PAGE_MASK) != 0 || (addr1 & Memory::PAGE_MASK) != 0) {
79 return ERR_MISALIGNED_ADDRESS;
80 }
81 if ((size & Memory::PAGE_MASK) != 0) {
82 return ERR_MISALIGNED_SIZE;
83 }
84
85 u32 region = operation & MEMOP_REGION_MASK;
86 operation &= ~MEMOP_REGION_MASK;
87
88 if (region != 0) {
89 LOG_WARNING(Kernel_SVC, "ControlMemory with specified region not supported, region=%X", region);
90 }
91
92 if ((permissions & (u32)MemoryPermission::ReadWrite) != permissions) {
93 return ERR_INVALID_COMBINATION;
94 }
95 VMAPermission vma_permissions = (VMAPermission)permissions;
96
97 auto& process = *g_current_process;
98
99 switch (operation & MEMOP_OPERATION_MASK) {
100 case MEMOP_FREE:
101 {
102 if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) {
103 ResultCode result = process.HeapFree(addr0, size);
104 if (result.IsError()) return result;
105 } else if (addr0 >= process.GetLinearHeapBase() && addr0 < process.GetLinearHeapLimit()) {
106 ResultCode result = process.LinearFree(addr0, size);
107 if (result.IsError()) return result;
108 } else {
109 return ERR_INVALID_ADDRESS;
110 }
111 *out_addr = addr0;
112 break;
113 }
114
115 case MEMOP_COMMIT:
116 {
117 if (operation & MEMOP_LINEAR) {
118 CASCADE_RESULT(*out_addr, process.LinearAllocate(addr0, size, vma_permissions));
119 } else {
120 CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));
121 }
122 break;
123 }
55 124
56 // Map normal heap memory 125 case MEMOP_MAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented
57 case MEMORY_OPERATION_HEAP: 126 {
58 *out_addr = Memory::MapBlock_Heap(size, operation, permissions); 127 CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));
59 break; 128 break;
129 }
130
131 case MEMOP_UNMAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented
132 {
133 ResultCode result = process.HeapFree(addr0, size);
134 if (result.IsError()) return result;
135 break;
136 }
60 137
61 // Map GSP heap memory 138 case MEMOP_PROTECT:
62 case MEMORY_OPERATION_GSP_HEAP: 139 {
63 *out_addr = Memory::MapBlock_HeapLinear(size, operation, permissions); 140 ResultCode result = process.vm_manager.ReprotectRange(addr0, size, vma_permissions);
141 if (result.IsError()) return result;
64 break; 142 break;
143 }
65 144
66 // Unknown ControlMemory operation
67 default: 145 default:
68 LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation); 146 LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation);
147 return ERR_INVALID_COMBINATION;
69 } 148 }
149
150 process.vm_manager.LogLayout(Log::Level::Trace);
151
70 return RESULT_SUCCESS; 152 return RESULT_SUCCESS;
71} 153}
72 154
@@ -537,9 +619,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* page_inf
537 if (process == nullptr) 619 if (process == nullptr)
538 return ERR_INVALID_HANDLE; 620 return ERR_INVALID_HANDLE;
539 621
540 auto vma = process->address_space->FindVMA(addr); 622 auto vma = process->vm_manager.FindVMA(addr);
541 623
542 if (vma == process->address_space->vma_map.end()) 624 if (vma == Kernel::g_current_process->vm_manager.vma_map.end())
543 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); 625 return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
544 626
545 memory_info->base_address = vma->second.base; 627 memory_info->base_address = vma->second.base;
@@ -692,6 +774,52 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32
692 return RESULT_SUCCESS; 774 return RESULT_SUCCESS;
693} 775}
694 776
777static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) {
778 LOG_TRACE(Kernel_SVC, "called process=0x%08X type=%u", process_handle, type);
779
780 using Kernel::Process;
781 Kernel::SharedPtr<Process> process = Kernel::g_handle_table.Get<Process>(process_handle);
782 if (process == nullptr)
783 return ERR_INVALID_HANDLE;
784
785 switch (type) {
786 case 0:
787 case 2:
788 // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure
789 // what's the difference between them.
790 *out = process->heap_used + process->linear_heap_used + process->misc_memory_used;
791 break;
792 case 1:
793 case 3:
794 case 4:
795 case 5:
796 case 6:
797 case 7:
798 case 8:
799 // These are valid, but not implemented yet
800 LOG_ERROR(Kernel_SVC, "unimplemented GetProcessInfo type=%u", type);
801 break;
802 case 20:
803 *out = Memory::FCRAM_PADDR - process->GetLinearHeapBase();
804 break;
805 default:
806 LOG_ERROR(Kernel_SVC, "unknown GetProcessInfo type=%u", type);
807
808 if (type >= 21 && type <= 23) {
809 return ResultCode( // 0xE0E01BF4
810 ErrorDescription::NotImplemented, ErrorModule::OS,
811 ErrorSummary::InvalidArgument, ErrorLevel::Usage);
812 } else {
813 return ResultCode( // 0xD8E007ED
814 ErrorDescription::InvalidEnumValue, ErrorModule::Kernel,
815 ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
816 }
817 break;
818 }
819
820 return RESULT_SUCCESS;
821}
822
695namespace { 823namespace {
696 struct FunctionDef { 824 struct FunctionDef {
697 using Func = void(); 825 using Func = void();
@@ -746,7 +874,7 @@ static const FunctionDef SVC_Table[] = {
746 {0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"}, 874 {0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"},
747 {0x29, nullptr, "GetHandleInfo"}, 875 {0x29, nullptr, "GetHandleInfo"},
748 {0x2A, nullptr, "GetSystemInfo"}, 876 {0x2A, nullptr, "GetSystemInfo"},
749 {0x2B, nullptr, "GetProcessInfo"}, 877 {0x2B, HLE::Wrap<GetProcessInfo>, "GetProcessInfo"},
750 {0x2C, nullptr, "GetThreadInfo"}, 878 {0x2C, nullptr, "GetThreadInfo"},
751 {0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"}, 879 {0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"},
752 {0x2E, nullptr, "SendSyncRequest1"}, 880 {0x2E, nullptr, "SendSyncRequest1"},
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 3ccbc03b2..68ae38289 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <numeric>
6#include <type_traits> 7#include <type_traits>
7 8
8#include "common/color.h" 9#include "common/color.h"
@@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) {
158 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 159 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
159 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 160 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
160 161
162 if (config.is_texture_copy) {
163 u32 input_width = config.texture_copy.input_width * 16;
164 u32 input_gap = config.texture_copy.input_gap * 16;
165 u32 output_width = config.texture_copy.output_width * 16;
166 u32 output_gap = config.texture_copy.output_gap * 16;
167
168 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
169 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size);
170
171 u32 remaining_size = config.texture_copy.size;
172 u32 remaining_input = input_width;
173 u32 remaining_output = output_width;
174 while (remaining_size > 0) {
175 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
176
177 std::memcpy(dst_pointer, src_pointer, copy_size);
178 src_pointer += copy_size;
179 dst_pointer += copy_size;
180
181 remaining_input -= copy_size;
182 remaining_output -= copy_size;
183 remaining_size -= copy_size;
184
185 if (remaining_input == 0) {
186 remaining_input = input_width;
187 src_pointer += input_gap;
188 }
189 if (remaining_output == 0) {
190 remaining_output = output_width;
191 dst_pointer += output_gap;
192 }
193 }
194
195 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
196 config.texture_copy.size,
197 config.GetPhysicalInputAddress(), input_width, input_gap,
198 config.GetPhysicalOutputAddress(), output_width, output_gap,
199 config.flags);
200
201 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
202 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size);
203
204 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
205 break;
206 }
207
161 if (config.scaling > config.ScaleXY) { 208 if (config.scaling > config.ScaleXY) {
162 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 209 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
163 UNIMPLEMENTED(); 210 UNIMPLEMENTED();
164 break; 211 break;
165 } 212 }
166 213
167 if (config.output_tiled && 214 if (config.input_linear && config.scaling != config.NoScale) {
168 (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
169 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 215 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
170 UNIMPLEMENTED(); 216 UNIMPLEMENTED();
171 break; 217 break;
@@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) {
182 228
183 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); 229 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
184 230
185 if (config.raw_copy) {
186 // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
187 // TODO(Subv): Verify if raw copies perform scaling
188 memcpy(dst_pointer, src_pointer, output_size);
189
190 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy",
191 output_size,
192 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
193 config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
194 config.output_format.Value(), config.flags);
195
196 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
197
198 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
199 break;
200 }
201
202 for (u32 y = 0; y < output_height; ++y) { 231 for (u32 y = 0; y < output_height; ++y) {
203 for (u32 x = 0; x < output_width; ++x) { 232 for (u32 x = 0; x < output_width; ++x) {
204 Math::Vec4<u8> src_color; 233 Math::Vec4<u8> src_color;
@@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) {
220 u32 src_offset; 249 u32 src_offset;
221 u32 dst_offset; 250 u32 dst_offset;
222 251
223 if (config.output_tiled) { 252 if (config.input_linear) {
224 if (!config.dont_swizzle) { 253 if (!config.dont_swizzle) {
225 // Interpret the input as linear and the output as tiled 254 // Interpret the input as linear and the output as tiled
226 u32 coarse_y = y & ~7; 255 u32 coarse_y = y & ~7;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index daad506fe..2e3a9f779 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -201,12 +201,14 @@ struct Regs {
201 u32 flags; 201 u32 flags;
202 202
203 BitField< 0, 1, u32> flip_vertically; // flips input data vertically 203 BitField< 0, 1, u32> flip_vertically; // flips input data vertically
204 BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format 204 BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format
205 BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing 205 BitField< 2, 1, u32> crop_input_lines;
206 BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields
206 BitField< 5, 1, u32> dont_swizzle; 207 BitField< 5, 1, u32> dont_swizzle;
207 BitField< 8, 3, PixelFormat> input_format; 208 BitField< 8, 3, PixelFormat> input_format;
208 BitField<12, 3, PixelFormat> output_format; 209 BitField<12, 3, PixelFormat> output_format;
209 210 /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
211 BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
210 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer 212 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
211 }; 213 };
212 214
@@ -214,10 +216,30 @@ struct Regs {
214 216
215 // it seems that writing to this field triggers the display transfer 217 // it seems that writing to this field triggers the display transfer
216 u32 trigger; 218 u32 trigger;
219
220 INSERT_PADDING_WORDS(0x1);
221
222 struct {
223 u32 size;
224
225 union {
226 u32 input_size;
227
228 BitField< 0, 16, u32> input_width;
229 BitField<16, 16, u32> input_gap;
230 };
231
232 union {
233 u32 output_size;
234
235 BitField< 0, 16, u32> output_width;
236 BitField<16, 16, u32> output_gap;
237 };
238 } texture_copy;
217 } display_transfer_config; 239 } display_transfer_config;
218 ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); 240 ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c);
219 241
220 INSERT_PADDING_WORDS(0x331); 242 INSERT_PADDING_WORDS(0x32D);
221 243
222 struct { 244 struct {
223 // command list size (in bytes) 245 // command list size (in bytes)
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
deleted file mode 100644
index cbe993fbe..000000000
--- a/src/core/mem_map.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <memory>
7#include <utility>
8#include <vector>
9
10#include "common/common_types.h"
11#include "common/logging/log.h"
12
13#include "core/hle/config_mem.h"
14#include "core/hle/kernel/vm_manager.h"
15#include "core/hle/result.h"
16#include "core/hle/shared_page.h"
17#include "core/mem_map.h"
18#include "core/memory.h"
19#include "core/memory_setup.h"
20
21////////////////////////////////////////////////////////////////////////////////////////////////////
22
23namespace Memory {
24
25namespace {
26
27struct MemoryArea {
28 u32 base;
29 u32 size;
30 const char* name;
31};
32
33// We don't declare the IO regions in here since its handled by other means.
34static MemoryArea memory_areas[] = {
35 {HEAP_VADDR, HEAP_SIZE, "Heap"}, // Application heap (main memory)
36 {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
37 {LINEAR_HEAP_VADDR, LINEAR_HEAP_SIZE, "Linear Heap"}, // Linear heap (main memory)
38 {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
39 {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
40 {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
41};
42
43/// Represents a block of memory mapped by ControlMemory/MapMemoryBlock
44struct MemoryBlock {
45 MemoryBlock() : handle(0), base_address(0), address(0), size(0), operation(0), permissions(0) {
46 }
47 u32 handle;
48 u32 base_address;
49 u32 address;
50 u32 size;
51 u32 operation;
52 u32 permissions;
53
54 const u32 GetVirtualAddress() const{
55 return base_address + address;
56 }
57};
58
59static std::map<u32, MemoryBlock> heap_map;
60static std::map<u32, MemoryBlock> heap_linear_map;
61
62}
63
64u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
65 MemoryBlock block;
66
67 block.base_address = HEAP_VADDR;
68 block.size = size;
69 block.operation = operation;
70 block.permissions = permissions;
71
72 if (heap_map.size() > 0) {
73 const MemoryBlock last_block = heap_map.rbegin()->second;
74 block.address = last_block.address + last_block.size;
75 }
76 heap_map[block.GetVirtualAddress()] = block;
77
78 return block.GetVirtualAddress();
79}
80
81u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions) {
82 MemoryBlock block;
83
84 block.base_address = LINEAR_HEAP_VADDR;
85 block.size = size;
86 block.operation = operation;
87 block.permissions = permissions;
88
89 if (heap_linear_map.size() > 0) {
90 const MemoryBlock last_block = heap_linear_map.rbegin()->second;
91 block.address = last_block.address + last_block.size;
92 }
93 heap_linear_map[block.GetVirtualAddress()] = block;
94
95 return block.GetVirtualAddress();
96}
97
98PAddr VirtualToPhysicalAddress(const VAddr addr) {
99 if (addr == 0) {
100 return 0;
101 } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
102 return addr - VRAM_VADDR + VRAM_PADDR;
103 } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
104 return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
105 } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
106 return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
107 } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
108 return addr - IO_AREA_VADDR + IO_AREA_PADDR;
109 }
110
111 LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08x", addr);
112 // To help with debugging, set bit on address so that it's obviously invalid.
113 return addr | 0x80000000;
114}
115
116VAddr PhysicalToVirtualAddress(const PAddr addr) {
117 if (addr == 0) {
118 return 0;
119 } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) {
120 return addr - VRAM_PADDR + VRAM_VADDR;
121 } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) {
122 return addr - FCRAM_PADDR + LINEAR_HEAP_VADDR;
123 } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) {
124 return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
125 } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
126 return addr - IO_AREA_PADDR + IO_AREA_VADDR;
127 }
128
129 LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08x", addr);
130 // To help with debugging, set bit on address so that it's obviously invalid.
131 return addr | 0x80000000;
132}
133
134void Init() {
135 InitMemoryMap();
136 LOG_DEBUG(HW_Memory, "initialized OK");
137}
138
139void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
140 using namespace Kernel;
141
142 for (MemoryArea& area : memory_areas) {
143 auto block = std::make_shared<std::vector<u8>>(area.size);
144 address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap();
145 }
146
147 auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR,
148 (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom();
149 address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
150
151 auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
152 (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
153 address_space.Reprotect(shared_page_vma, VMAPermission::Read);
154}
155
156void Shutdown() {
157 heap_map.clear();
158 heap_linear_map.clear();
159
160 LOG_DEBUG(HW_Memory, "shutdown OK");
161}
162
163} // namespace
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
deleted file mode 100644
index 229ef82c5..000000000
--- a/src/core/mem_map.h
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2014 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Kernel {
10class VMManager;
11}
12
13namespace Memory {
14
15void Init();
16void InitLegacyAddressSpace(Kernel::VMManager& address_space);
17void Shutdown();
18
19/**
20 * Maps a block of memory on the heap
21 * @param size Size of block in bytes
22 * @param operation Memory map operation type
23 * @param permissions Memory allocation permissions
24 */
25u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions);
26
27/**
28 * Maps a block of memory on the GSP heap
29 * @param size Size of block in bytes
30 * @param operation Memory map operation type
31 * @param permissions Control memory permissions
32 */
33u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions);
34
35/**
36 * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical
37 * address. This should be used by services to translate addresses for use by the hardware.
38 */
39PAddr VirtualToPhysicalAddress(VAddr addr);
40
41/**
42 * Undoes a mapping performed by VirtualToPhysicalAddress().
43 */
44VAddr PhysicalToVirtualAddress(PAddr addr);
45
46} // namespace
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 1f66bb27d..cde390b8a 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -9,7 +9,7 @@
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/swap.h" 10#include "common/swap.h"
11 11
12#include "core/mem_map.h" 12#include "core/hle/kernel/process.h"
13#include "core/memory.h" 13#include "core/memory.h"
14#include "core/memory_setup.h" 14#include "core/memory_setup.h"
15 15
@@ -198,4 +198,42 @@ void WriteBlock(const VAddr addr, const u8* data, const size_t size) {
198 Write8(addr + offset, data[offset]); 198 Write8(addr + offset, data[offset]);
199} 199}
200 200
201PAddr VirtualToPhysicalAddress(const VAddr addr) {
202 if (addr == 0) {
203 return 0;
204 } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
205 return addr - VRAM_VADDR + VRAM_PADDR;
206 } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
207 return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
208 } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
209 return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
210 } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
211 return addr - IO_AREA_VADDR + IO_AREA_PADDR;
212 } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
213 return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR;
214 }
215
216 LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr);
217 // To help with debugging, set bit on address so that it's obviously invalid.
218 return addr | 0x80000000;
219}
220
221VAddr PhysicalToVirtualAddress(const PAddr addr) {
222 if (addr == 0) {
223 return 0;
224 } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) {
225 return addr - VRAM_PADDR + VRAM_VADDR;
226 } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) {
227 return addr - FCRAM_PADDR + Kernel::g_current_process->GetLinearHeapBase();
228 } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) {
229 return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
230 } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
231 return addr - IO_AREA_PADDR + IO_AREA_VADDR;
232 }
233
234 LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr);
235 // To help with debugging, set bit on address so that it's obviously invalid.
236 return addr | 0x80000000;
237}
238
201} // namespace 239} // namespace
diff --git a/src/core/memory.h b/src/core/memory.h
index 418609de0..5af72b7a7 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -15,6 +15,8 @@ namespace Memory {
15 * be mapped. 15 * be mapped.
16 */ 16 */
17const u32 PAGE_SIZE = 0x1000; 17const u32 PAGE_SIZE = 0x1000;
18const u32 PAGE_MASK = PAGE_SIZE - 1;
19const int PAGE_BITS = 12;
18 20
19/// Physical memory regions as seen from the ARM11 21/// Physical memory regions as seen from the ARM11
20enum : PAddr { 22enum : PAddr {
@@ -103,8 +105,15 @@ enum : VAddr {
103 // hardcoded value. 105 // hardcoded value.
104 /// Area where TLS (Thread-Local Storage) buffers are allocated. 106 /// Area where TLS (Thread-Local Storage) buffers are allocated.
105 TLS_AREA_VADDR = 0x1FF82000, 107 TLS_AREA_VADDR = 0x1FF82000,
106 TLS_AREA_SIZE = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads 108 TLS_ENTRY_SIZE = 0x200,
109 TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
107 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, 110 TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
111
112
113 /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
114 NEW_LINEAR_HEAP_VADDR = 0x30000000,
115 NEW_LINEAR_HEAP_SIZE = 0x10000000,
116 NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE,
108}; 117};
109 118
110u8 Read8(VAddr addr); 119u8 Read8(VAddr addr);
@@ -122,6 +131,17 @@ void WriteBlock(VAddr addr, const u8* data, size_t size);
122u8* GetPointer(VAddr virtual_address); 131u8* GetPointer(VAddr virtual_address);
123 132
124/** 133/**
134* Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical
135* address. This should be used by services to translate addresses for use by the hardware.
136*/
137PAddr VirtualToPhysicalAddress(VAddr addr);
138
139/**
140* Undoes a mapping performed by VirtualToPhysicalAddress().
141*/
142VAddr PhysicalToVirtualAddress(PAddr addr);
143
144/**
125 * Gets a pointer to the memory region beginning at the specified physical address. 145 * Gets a pointer to the memory region beginning at the specified physical address.
126 * 146 *
127 * @note This is currently implemented using PhysicalToVirtualAddress(). 147 * @note This is currently implemented using PhysicalToVirtualAddress().
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 361bfc816..84ff30120 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -10,9 +10,6 @@
10 10
11namespace Memory { 11namespace Memory {
12 12
13const u32 PAGE_MASK = PAGE_SIZE - 1;
14const int PAGE_BITS = 12;
15
16void InitMemoryMap(); 13void InitMemoryMap();
17 14
18/** 15/**
diff --git a/src/core/system.cpp b/src/core/system.cpp
index 561ff82f0..3cd84bf5e 100644
--- a/src/core/system.cpp
+++ b/src/core/system.cpp
@@ -4,11 +4,11 @@
4 4
5#include "core/core.h" 5#include "core/core.h"
6#include "core/core_timing.h" 6#include "core/core_timing.h"
7#include "core/mem_map.h"
8#include "core/system.h" 7#include "core/system.h"
9#include "core/hw/hw.h" 8#include "core/hw/hw.h"
10#include "core/hle/hle.h" 9#include "core/hle/hle.h"
11#include "core/hle/kernel/kernel.h" 10#include "core/hle/kernel/kernel.h"
11#include "core/hle/kernel/memory.h"
12 12
13#include "video_core/video_core.h" 13#include "video_core/video_core.h"
14 14
@@ -29,7 +29,6 @@ void Shutdown() {
29 HLE::Shutdown(); 29 HLE::Shutdown();
30 Kernel::Shutdown(); 30 Kernel::Shutdown();
31 HW::Shutdown(); 31 HW::Shutdown();
32 Memory::Shutdown();
33 CoreTiming::Shutdown(); 32 CoreTiming::Shutdown();
34 Core::Shutdown(); 33 Core::Shutdown();
35} 34}
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index e14de0768..ae5a30441 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -197,12 +197,19 @@ void RunInterpreter(UnitState<Debug>& state) {
197 197
198 case OpCode::Id::DP3: 198 case OpCode::Id::DP3:
199 case OpCode::Id::DP4: 199 case OpCode::Id::DP4:
200 case OpCode::Id::DPH:
201 case OpCode::Id::DPHI:
200 { 202 {
201 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 203 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
202 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); 204 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
203 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 205 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
206
207 OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode();
208 if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
209 src1[3] = float24::FromFloat32(1.0f);
210
204 float24 dot = float24::FromFloat32(0.f); 211 float24 dot = float24::FromFloat32(0.f);
205 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; 212 int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
206 for (int i = 0; i < num_components; ++i) 213 for (int i = 0; i < num_components; ++i)
207 dot = dot + src1[i] * src2[i]; 214 dot = dot + src1[i] * src2[i];
208 215
@@ -221,13 +228,12 @@ void RunInterpreter(UnitState<Debug>& state) {
221 { 228 {
222 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 229 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
223 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 230 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
231 float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
224 for (int i = 0; i < 4; ++i) { 232 for (int i = 0; i < 4; ++i) {
225 if (!swizzle.DestComponentEnabled(i)) 233 if (!swizzle.DestComponentEnabled(i))
226 continue; 234 continue;
227 235
228 // TODO: Be stable against division by zero! 236 dest[i] = rcp_res;
229 // TODO: I think this might be wrong... we should only use one component here
230 dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
231 } 237 }
232 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 238 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
233 break; 239 break;
@@ -238,13 +244,12 @@ void RunInterpreter(UnitState<Debug>& state) {
238 { 244 {
239 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 245 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
240 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); 246 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
247 float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
241 for (int i = 0; i < 4; ++i) { 248 for (int i = 0; i < 4; ++i) {
242 if (!swizzle.DestComponentEnabled(i)) 249 if (!swizzle.DestComponentEnabled(i))
243 continue; 250 continue;
244 251
245 // TODO: Be stable against division by zero! 252 dest[i] = rsq_res;
246 // TODO: I think this might be wrong... we should only use one component here
247 dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
248 } 253 }
249 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); 254 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
250 break; 255 break;
@@ -278,6 +283,20 @@ void RunInterpreter(UnitState<Debug>& state) {
278 break; 283 break;
279 } 284 }
280 285
286 case OpCode::Id::SGE:
287 case OpCode::Id::SGEI:
288 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
289 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
290 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
291 for (int i = 0; i < 4; ++i) {
292 if (!swizzle.DestComponentEnabled(i))
293 continue;
294
295 dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
296 }
297 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
298 break;
299
281 case OpCode::Id::SLT: 300 case OpCode::Id::SLT:
282 case OpCode::Id::SLTI: 301 case OpCode::Id::SLTI:
283 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); 302 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
@@ -334,6 +353,42 @@ void RunInterpreter(UnitState<Debug>& state) {
334 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); 353 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
335 break; 354 break;
336 355
356 case OpCode::Id::EX2:
357 {
358 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
359 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
360
361 // EX2 only takes first component exp2 and writes it to all dest components
362 float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32()));
363 for (int i = 0; i < 4; ++i) {
364 if (!swizzle.DestComponentEnabled(i))
365 continue;
366
367 dest[i] = ex2_res;
368 }
369
370 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
371 break;
372 }
373
374 case OpCode::Id::LG2:
375 {
376 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
377 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
378
379 // LG2 only takes the first component log2 and writes it to all dest components
380 float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32()));
381 for (int i = 0; i < 4; ++i) {
382 if (!swizzle.DestComponentEnabled(i))
383 continue;
384
385 dest[i] = lg2_res;
386 }
387
388 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
389 break;
390 }
391
337 default: 392 default:
338 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", 393 LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
339 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); 394 (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 836942c6b..cc66fc8d6 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -23,14 +23,14 @@ const JitFunction instr_table[64] = {
23 &JitCompiler::Compile_ADD, // add 23 &JitCompiler::Compile_ADD, // add
24 &JitCompiler::Compile_DP3, // dp3 24 &JitCompiler::Compile_DP3, // dp3
25 &JitCompiler::Compile_DP4, // dp4 25 &JitCompiler::Compile_DP4, // dp4
26 nullptr, // dph 26 &JitCompiler::Compile_DPH, // dph
27 nullptr, // unknown 27 nullptr, // unknown
28 nullptr, // ex2 28 &JitCompiler::Compile_EX2, // ex2
29 nullptr, // lg2 29 &JitCompiler::Compile_LG2, // lg2
30 nullptr, // unknown 30 nullptr, // unknown
31 &JitCompiler::Compile_MUL, // mul 31 &JitCompiler::Compile_MUL, // mul
32 nullptr, // lge 32 &JitCompiler::Compile_SGE, // sge
33 nullptr, // slt 33 &JitCompiler::Compile_SLT, // slt
34 &JitCompiler::Compile_FLR, // flr 34 &JitCompiler::Compile_FLR, // flr
35 &JitCompiler::Compile_MAX, // max 35 &JitCompiler::Compile_MAX, // max
36 &JitCompiler::Compile_MIN, // min 36 &JitCompiler::Compile_MIN, // min
@@ -44,10 +44,10 @@ const JitFunction instr_table[64] = {
44 nullptr, // unknown 44 nullptr, // unknown
45 nullptr, // unknown 45 nullptr, // unknown
46 nullptr, // unknown 46 nullptr, // unknown
47 nullptr, // dphi 47 &JitCompiler::Compile_DPH, // dphi
48 nullptr, // unknown 48 nullptr, // unknown
49 nullptr, // sgei 49 &JitCompiler::Compile_SGE, // sgei
50 &JitCompiler::Compile_SLTI, // slti 50 &JitCompiler::Compile_SLT, // slti
51 nullptr, // unknown 51 nullptr, // unknown
52 nullptr, // unknown 52 nullptr, // unknown
53 nullptr, // unknown 53 nullptr, // unknown
@@ -280,6 +280,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
280 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); 280 CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
281} 281}
282 282
283void JitCompiler::Compile_PushCallerSavedXMM() {
284#ifndef _WIN32
285 SUB(64, R(RSP), Imm8(2 * 16));
286 MOVUPS(MDisp(RSP, 16), ONE);
287 MOVUPS(MDisp(RSP, 0), NEGBIT);
288#endif
289}
290
291void JitCompiler::Compile_PopCallerSavedXMM() {
292#ifndef _WIN32
293 MOVUPS(NEGBIT, MDisp(RSP, 0));
294 MOVUPS(ONE, MDisp(RSP, 16));
295 ADD(64, R(RSP), Imm8(2 * 16));
296#endif
297}
298
283void JitCompiler::Compile_ADD(Instruction instr) { 299void JitCompiler::Compile_ADD(Instruction instr) {
284 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 300 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
285 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 301 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -331,6 +347,71 @@ void JitCompiler::Compile_DP4(Instruction instr) {
331 Compile_DestEnable(instr, SRC1); 347 Compile_DestEnable(instr, SRC1);
332} 348}
333 349
350void JitCompiler::Compile_DPH(Instruction instr) {
351 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
352 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
353 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
354 } else {
355 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
356 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
357 }
358
359 if (Common::GetCPUCaps().sse4_1) {
360 // Set 4th component to 1.0
361 BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
362 DPPS(SRC1, R(SRC2), 0xff);
363 } else {
364 // Reverse to set the 4th component to 1.0
365 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
366 MOVSS(SRC1, R(ONE));
367 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3));
368
369 MULPS(SRC1, R(SRC2));
370
371 MOVAPS(SRC2, R(SRC1));
372 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
373 ADDPS(SRC1, R(SRC2));
374
375 MOVAPS(SRC2, R(SRC1));
376 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
377 ADDPS(SRC1, R(SRC2));
378 }
379
380 Compile_DestEnable(instr, SRC1);
381}
382
383void JitCompiler::Compile_EX2(Instruction instr) {
384 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
385 MOVSS(XMM0, R(SRC1));
386
387 // The following will actually break the stack alignment
388 ABI_PushAllCallerSavedRegsAndAdjustStack();
389 Compile_PushCallerSavedXMM();
390 ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
391 Compile_PopCallerSavedXMM();
392 ABI_PopAllCallerSavedRegsAndAdjustStack();
393
394 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
395 MOVAPS(SRC1, R(XMM0));
396 Compile_DestEnable(instr, SRC1);
397}
398
399void JitCompiler::Compile_LG2(Instruction instr) {
400 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
401 MOVSS(XMM0, R(SRC1));
402
403 // The following will actually break the stack alignment
404 ABI_PushAllCallerSavedRegsAndAdjustStack();
405 Compile_PushCallerSavedXMM();
406 ABI_CallFunction(reinterpret_cast<const void*>(log2f));
407 Compile_PopCallerSavedXMM();
408 ABI_PopAllCallerSavedRegsAndAdjustStack();
409
410 SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
411 MOVAPS(SRC1, R(XMM0));
412 Compile_DestEnable(instr, SRC1);
413}
414
334void JitCompiler::Compile_MUL(Instruction instr) { 415void JitCompiler::Compile_MUL(Instruction instr) {
335 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 416 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
336 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); 417 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -338,6 +419,36 @@ void JitCompiler::Compile_MUL(Instruction instr) {
338 Compile_DestEnable(instr, SRC1); 419 Compile_DestEnable(instr, SRC1);
339} 420}
340 421
422void JitCompiler::Compile_SGE(Instruction instr) {
423 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
424 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
425 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
426 } else {
427 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
428 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
429 }
430
431 CMPPS(SRC1, R(SRC2), CMP_NLT);
432 ANDPS(SRC1, R(ONE));
433
434 Compile_DestEnable(instr, SRC1);
435}
436
437void JitCompiler::Compile_SLT(Instruction instr) {
438 if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
439 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
440 Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
441 } else {
442 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
443 Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
444 }
445
446 CMPPS(SRC1, R(SRC2), CMP_LT);
447 ANDPS(SRC1, R(ONE));
448
449 Compile_DestEnable(instr, SRC1);
450}
451
341void JitCompiler::Compile_FLR(Instruction instr) { 452void JitCompiler::Compile_FLR(Instruction instr) {
342 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 453 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
343 454
@@ -415,22 +526,13 @@ void JitCompiler::Compile_MOV(Instruction instr) {
415 Compile_DestEnable(instr, SRC1); 526 Compile_DestEnable(instr, SRC1);
416} 527}
417 528
418void JitCompiler::Compile_SLTI(Instruction instr) {
419 Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
420 Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2);
421
422 CMPSS(SRC1, R(SRC2), CMP_LT);
423 ANDPS(SRC1, R(ONE));
424
425 Compile_DestEnable(instr, SRC1);
426}
427
428void JitCompiler::Compile_RCP(Instruction instr) { 529void JitCompiler::Compile_RCP(Instruction instr) {
429 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 530 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
430 531
431 // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica 532 // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
432 // performs this operation more accurately. This should be checked on hardware. 533 // performs this operation more accurately. This should be checked on hardware.
433 RCPPS(SRC1, R(SRC1)); 534 RCPSS(SRC1, R(SRC1));
535 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
434 536
435 Compile_DestEnable(instr, SRC1); 537 Compile_DestEnable(instr, SRC1);
436} 538}
@@ -438,9 +540,10 @@ void JitCompiler::Compile_RCP(Instruction instr) {
438void JitCompiler::Compile_RSQ(Instruction instr) { 540void JitCompiler::Compile_RSQ(Instruction instr) {
439 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); 541 Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
440 542
441 // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica 543 // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
442 // performs this operation more accurately. This should be checked on hardware. 544 // performs this operation more accurately. This should be checked on hardware.
443 RSQRTPS(SRC1, R(SRC1)); 545 RSQRTSS(SRC1, R(SRC1));
546 SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
444 547
445 Compile_DestEnable(instr, SRC1); 548 Compile_DestEnable(instr, SRC1);
446} 549}
@@ -646,12 +749,12 @@ CompiledShader* JitCompiler::Compile() {
646 // Used to set a register to one 749 // Used to set a register to one
647 static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; 750 static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
648 MOV(PTRBITS, R(RAX), ImmPtr(&one)); 751 MOV(PTRBITS, R(RAX), ImmPtr(&one));
649 MOVAPS(ONE, MDisp(RAX, 0)); 752 MOVAPS(ONE, MatR(RAX));
650 753
651 // Used to negate registers 754 // Used to negate registers
652 static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; 755 static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
653 MOV(PTRBITS, R(RAX), ImmPtr(&neg)); 756 MOV(PTRBITS, R(RAX), ImmPtr(&neg));
654 MOVAPS(NEGBIT, MDisp(RAX, 0)); 757 MOVAPS(NEGBIT, MatR(RAX));
655 758
656 looping = false; 759 looping = false;
657 760
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index b88f2a0d2..fbe19fe93 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -37,7 +37,12 @@ public:
37 void Compile_ADD(Instruction instr); 37 void Compile_ADD(Instruction instr);
38 void Compile_DP3(Instruction instr); 38 void Compile_DP3(Instruction instr);
39 void Compile_DP4(Instruction instr); 39 void Compile_DP4(Instruction instr);
40 void Compile_DPH(Instruction instr);
41 void Compile_EX2(Instruction instr);
42 void Compile_LG2(Instruction instr);
40 void Compile_MUL(Instruction instr); 43 void Compile_MUL(Instruction instr);
44 void Compile_SGE(Instruction instr);
45 void Compile_SLT(Instruction instr);
41 void Compile_FLR(Instruction instr); 46 void Compile_FLR(Instruction instr);
42 void Compile_MAX(Instruction instr); 47 void Compile_MAX(Instruction instr);
43 void Compile_MIN(Instruction instr); 48 void Compile_MIN(Instruction instr);
@@ -45,7 +50,6 @@ public:
45 void Compile_RSQ(Instruction instr); 50 void Compile_RSQ(Instruction instr);
46 void Compile_MOVA(Instruction instr); 51 void Compile_MOVA(Instruction instr);
47 void Compile_MOV(Instruction instr); 52 void Compile_MOV(Instruction instr);
48 void Compile_SLTI(Instruction instr);
49 void Compile_NOP(Instruction instr); 53 void Compile_NOP(Instruction instr);
50 void Compile_END(Instruction instr); 54 void Compile_END(Instruction instr);
51 void Compile_CALL(Instruction instr); 55 void Compile_CALL(Instruction instr);
@@ -67,6 +71,9 @@ private:
67 void Compile_EvaluateCondition(Instruction instr); 71 void Compile_EvaluateCondition(Instruction instr);
68 void Compile_UniformCondition(Instruction instr); 72 void Compile_UniformCondition(Instruction instr);
69 73
74 void Compile_PushCallerSavedXMM();
75 void Compile_PopCallerSavedXMM();
76
70 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. 77 /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
71 unsigned* offset_ptr = nullptr; 78 unsigned* offset_ptr = nullptr;
72 79