diff options
Diffstat (limited to 'src')
41 files changed, 1868 insertions, 1221 deletions
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index a1a4865bd..8bf2a3e13 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp | |||
| @@ -287,6 +287,17 @@ void GMainWindow::ShutdownGame() { | |||
| 287 | render_window->hide(); | 287 | render_window->hide(); |
| 288 | } | 288 | } |
| 289 | 289 | ||
| 290 | void GMainWindow::StoreRecentFile(const QString& filename) | ||
| 291 | { | ||
| 292 | QSettings settings; | ||
| 293 | QStringList recent_files = settings.value("recentFiles").toStringList(); | ||
| 294 | recent_files.prepend(filename); | ||
| 295 | recent_files.removeDuplicates(); | ||
| 296 | settings.setValue("recentFiles", recent_files); | ||
| 297 | |||
| 298 | UpdateRecentFiles(); | ||
| 299 | } | ||
| 300 | |||
| 290 | void GMainWindow::UpdateRecentFiles() { | 301 | void GMainWindow::UpdateRecentFiles() { |
| 291 | QSettings settings; | 302 | QSettings settings; |
| 292 | QStringList recent_files = settings.value("recentFiles").toStringList(); | 303 | QStringList recent_files = settings.value("recentFiles").toStringList(); |
| @@ -297,6 +308,7 @@ void GMainWindow::UpdateRecentFiles() { | |||
| 297 | QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName()); | 308 | QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName()); |
| 298 | actions_recent_files[i]->setText(text); | 309 | actions_recent_files[i]->setText(text); |
| 299 | actions_recent_files[i]->setData(recent_files[i]); | 310 | actions_recent_files[i]->setData(recent_files[i]); |
| 311 | actions_recent_files[i]->setToolTip(recent_files[i]); | ||
| 300 | actions_recent_files[i]->setVisible(true); | 312 | actions_recent_files[i]->setVisible(true); |
| 301 | } | 313 | } |
| 302 | 314 | ||
| @@ -319,11 +331,7 @@ void GMainWindow::OnMenuLoadFile() { | |||
| 319 | QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)")); | 331 | QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)")); |
| 320 | if (filename.size()) { | 332 | if (filename.size()) { |
| 321 | settings.setValue("romsPath", QFileInfo(filename).path()); | 333 | settings.setValue("romsPath", QFileInfo(filename).path()); |
| 322 | // Update recent files list | 334 | StoreRecentFile(filename); |
| 323 | QStringList recent_files = settings.value("recentFiles").toStringList(); | ||
| 324 | recent_files.prepend(filename); | ||
| 325 | settings.setValue("recentFiles", recent_files); | ||
| 326 | UpdateRecentFiles(); // Update UI | ||
| 327 | 335 | ||
| 328 | BootGame(filename.toLatin1().data()); | 336 | BootGame(filename.toLatin1().data()); |
| 329 | } | 337 | } |
| @@ -349,6 +357,7 @@ void GMainWindow::OnMenuRecentFile() { | |||
| 349 | QFileInfo file_info(filename); | 357 | QFileInfo file_info(filename); |
| 350 | if (file_info.exists()) { | 358 | if (file_info.exists()) { |
| 351 | BootGame(filename.toLatin1().data()); | 359 | BootGame(filename.toLatin1().data()); |
| 360 | StoreRecentFile(filename); // Put the filename on top of the list | ||
| 352 | } else { | 361 | } else { |
| 353 | // Display an error message and remove the file from the list. | 362 | // Display an error message and remove the file from the list. |
| 354 | QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename)); | 363 | QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename)); |
| @@ -357,12 +366,7 @@ void GMainWindow::OnMenuRecentFile() { | |||
| 357 | QStringList recent_files = settings.value("recentFiles").toStringList(); | 366 | QStringList recent_files = settings.value("recentFiles").toStringList(); |
| 358 | recent_files.removeOne(filename); | 367 | recent_files.removeOne(filename); |
| 359 | settings.setValue("recentFiles", recent_files); | 368 | settings.setValue("recentFiles", recent_files); |
| 360 | 369 | UpdateRecentFiles(); | |
| 361 | action->setVisible(false); | ||
| 362 | // Grey out the recent files menu if the list is empty | ||
| 363 | if (ui.menu_recent_files->isEmpty()) { | ||
| 364 | ui.menu_recent_files->setEnabled(false); | ||
| 365 | } | ||
| 366 | } | 370 | } |
| 367 | } | 371 | } |
| 368 | 372 | ||
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h index 4b260ae8b..6f1292295 100644 --- a/src/citra_qt/main.h +++ b/src/citra_qt/main.h | |||
| @@ -60,6 +60,24 @@ private: | |||
| 60 | void BootGame(const std::string& filename); | 60 | void BootGame(const std::string& filename); |
| 61 | void ShutdownGame(); | 61 | void ShutdownGame(); |
| 62 | 62 | ||
| 63 | /** | ||
| 64 | * Stores the filename in the recently loaded files list. | ||
| 65 | * The new filename is stored at the beginning of the recently loaded files list. | ||
| 66 | * After inserting the new entry, duplicates are removed meaning that if | ||
| 67 | * this was inserted from \a OnMenuRecentFile(), the entry will be put on top | ||
| 68 | * and remove from its previous position. | ||
| 69 | * | ||
| 70 | * Finally, this function calls \a UpdateRecentFiles() to update the UI. | ||
| 71 | * | ||
| 72 | * @param filename the filename to store | ||
| 73 | */ | ||
| 74 | void StoreRecentFile(const QString& filename); | ||
| 75 | |||
| 76 | /** | ||
| 77 | * Updates the recent files menu. | ||
| 78 | * Menu entries are rebuilt from the configuration file. | ||
| 79 | * If there is no entry in the menu, the menu is greyed out. | ||
| 80 | */ | ||
| 63 | void UpdateRecentFiles(); | 81 | void UpdateRecentFiles(); |
| 64 | 82 | ||
| 65 | void closeEvent(QCloseEvent* event) override; | 83 | void closeEvent(QCloseEvent* event) override; |
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 88e452a16..ed20c3629 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h | |||
| @@ -45,14 +45,20 @@ | |||
| 45 | 45 | ||
| 46 | // GCC 4.8 defines all the rotate functions now | 46 | // GCC 4.8 defines all the rotate functions now |
| 47 | // Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit | 47 | // Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit |
| 48 | #ifndef _rotl | 48 | #ifdef _rotl |
| 49 | inline u32 _rotl(u32 x, int shift) { | 49 | #define rotl _rotl |
| 50 | #else | ||
| 51 | inline u32 rotl(u32 x, int shift) { | ||
| 50 | shift &= 31; | 52 | shift &= 31; |
| 51 | if (!shift) return x; | 53 | if (!shift) return x; |
| 52 | return (x << shift) | (x >> (32 - shift)); | 54 | return (x << shift) | (x >> (32 - shift)); |
| 53 | } | 55 | } |
| 56 | #endif | ||
| 54 | 57 | ||
| 55 | inline u32 _rotr(u32 x, int shift) { | 58 | #ifdef _rotr |
| 59 | #define rotr _rotr | ||
| 60 | #else | ||
| 61 | inline u32 rotr(u32 x, int shift) { | ||
| 56 | shift &= 31; | 62 | shift &= 31; |
| 57 | if (!shift) return x; | 63 | if (!shift) return x; |
| 58 | return (x >> shift) | (x << (32 - shift)); | 64 | return (x >> shift) | (x << (32 - shift)); |
diff --git a/src/common/file_util.h b/src/common/file_util.h index d0dccdf69..e71a9b2fa 100644 --- a/src/common/file_util.h +++ b/src/common/file_util.h | |||
| @@ -244,7 +244,7 @@ private: | |||
| 244 | template <typename T> | 244 | template <typename T> |
| 245 | void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) | 245 | void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode) |
| 246 | { | 246 | { |
| 247 | #ifdef _WIN32 | 247 | #ifdef _MSC_VER |
| 248 | fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode); | 248 | fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode); |
| 249 | #else | 249 | #else |
| 250 | fstream.open(filename.c_str(), openmode); | 250 | fstream.open(filename.c_str(), openmode); |
diff --git a/src/common/logging/log.h b/src/common/logging/log.h index e16dde7fc..5fd3bd7f5 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h | |||
| @@ -91,17 +91,16 @@ void LogMessage(Class log_class, Level log_level, | |||
| 91 | } // namespace Log | 91 | } // namespace Log |
| 92 | 92 | ||
| 93 | #define LOG_GENERIC(log_class, log_level, ...) \ | 93 | #define LOG_GENERIC(log_class, log_level, ...) \ |
| 94 | ::Log::LogMessage(::Log::Class::log_class, ::Log::Level::log_level, \ | 94 | ::Log::LogMessage(log_class, log_level, __FILE__, __LINE__, __func__, __VA_ARGS__) |
| 95 | __FILE__, __LINE__, __func__, __VA_ARGS__) | ||
| 96 | 95 | ||
| 97 | #ifdef _DEBUG | 96 | #ifdef _DEBUG |
| 98 | #define LOG_TRACE( log_class, ...) LOG_GENERIC(log_class, Trace, __VA_ARGS__) | 97 | #define LOG_TRACE( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Trace, __VA_ARGS__) |
| 99 | #else | 98 | #else |
| 100 | #define LOG_TRACE( log_class, ...) (void(0)) | 99 | #define LOG_TRACE( log_class, ...) (void(0)) |
| 101 | #endif | 100 | #endif |
| 102 | 101 | ||
| 103 | #define LOG_DEBUG( log_class, ...) LOG_GENERIC(log_class, Debug, __VA_ARGS__) | 102 | #define LOG_DEBUG( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Debug, __VA_ARGS__) |
| 104 | #define LOG_INFO( log_class, ...) LOG_GENERIC(log_class, Info, __VA_ARGS__) | 103 | #define LOG_INFO( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Info, __VA_ARGS__) |
| 105 | #define LOG_WARNING( log_class, ...) LOG_GENERIC(log_class, Warning, __VA_ARGS__) | 104 | #define LOG_WARNING( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Warning, __VA_ARGS__) |
| 106 | #define LOG_ERROR( log_class, ...) LOG_GENERIC(log_class, Error, __VA_ARGS__) | 105 | #define LOG_ERROR( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Error, __VA_ARGS__) |
| 107 | #define LOG_CRITICAL(log_class, ...) LOG_GENERIC(log_class, Critical, __VA_ARGS__) | 106 | #define LOG_CRITICAL(log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Critical, __VA_ARGS__) |
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 4b79acd1f..939df210e 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | // Official SVN repository and contact information can be found at | 15 | // Official SVN repository and contact information can be found at |
| 16 | // http://code.google.com/p/dolphin-emu/ | 16 | // http://code.google.com/p/dolphin-emu/ |
| 17 | 17 | ||
| 18 | #include <cinttypes> | ||
| 18 | #include <cstring> | 19 | #include <cstring> |
| 19 | 20 | ||
| 20 | #include "common/assert.h" | 21 | #include "common/assert.h" |
| @@ -25,11 +26,6 @@ | |||
| 25 | #include "cpu_detect.h" | 26 | #include "cpu_detect.h" |
| 26 | #include "emitter.h" | 27 | #include "emitter.h" |
| 27 | 28 | ||
| 28 | #define PRIx64 "llx" | ||
| 29 | |||
| 30 | // Minimize the diff against Dolphin | ||
| 31 | #define DYNA_REC JIT | ||
| 32 | |||
| 33 | namespace Gen | 29 | namespace Gen |
| 34 | { | 30 | { |
| 35 | 31 | ||
| @@ -113,6 +109,29 @@ u8 *XEmitter::GetWritableCodePtr() | |||
| 113 | return code; | 109 | return code; |
| 114 | } | 110 | } |
| 115 | 111 | ||
| 112 | void XEmitter::Write8(u8 value) | ||
| 113 | { | ||
| 114 | *code++ = value; | ||
| 115 | } | ||
| 116 | |||
| 117 | void XEmitter::Write16(u16 value) | ||
| 118 | { | ||
| 119 | std::memcpy(code, &value, sizeof(u16)); | ||
| 120 | code += sizeof(u16); | ||
| 121 | } | ||
| 122 | |||
| 123 | void XEmitter::Write32(u32 value) | ||
| 124 | { | ||
| 125 | std::memcpy(code, &value, sizeof(u32)); | ||
| 126 | code += sizeof(u32); | ||
| 127 | } | ||
| 128 | |||
| 129 | void XEmitter::Write64(u64 value) | ||
| 130 | { | ||
| 131 | std::memcpy(code, &value, sizeof(u64)); | ||
| 132 | code += sizeof(u64); | ||
| 133 | } | ||
| 134 | |||
| 116 | void XEmitter::ReserveCodeSpace(int bytes) | 135 | void XEmitter::ReserveCodeSpace(int bytes) |
| 117 | { | 136 | { |
| 118 | for (int i = 0; i < bytes; i++) | 137 | for (int i = 0; i < bytes; i++) |
| @@ -374,7 +393,7 @@ void XEmitter::Rex(int w, int r, int x, int b) | |||
| 374 | Write8(rx); | 393 | Write8(rx); |
| 375 | } | 394 | } |
| 376 | 395 | ||
| 377 | void XEmitter::JMP(const u8 *addr, bool force5Bytes) | 396 | void XEmitter::JMP(const u8* addr, bool force5Bytes) |
| 378 | { | 397 | { |
| 379 | u64 fn = (u64)addr; | 398 | u64 fn = (u64)addr; |
| 380 | if (!force5Bytes) | 399 | if (!force5Bytes) |
| @@ -398,7 +417,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes) | |||
| 398 | } | 417 | } |
| 399 | } | 418 | } |
| 400 | 419 | ||
| 401 | void XEmitter::JMPptr(const OpArg &arg2) | 420 | void XEmitter::JMPptr(const OpArg& arg2) |
| 402 | { | 421 | { |
| 403 | OpArg arg = arg2; | 422 | OpArg arg = arg2; |
| 404 | if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); | 423 | if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); |
| @@ -425,7 +444,7 @@ void XEmitter::CALLptr(OpArg arg) | |||
| 425 | arg.WriteRest(this); | 444 | arg.WriteRest(this); |
| 426 | } | 445 | } |
| 427 | 446 | ||
| 428 | void XEmitter::CALL(const void *fnptr) | 447 | void XEmitter::CALL(const void* fnptr) |
| 429 | { | 448 | { |
| 430 | u64 distance = u64(fnptr) - (u64(code) + 5); | 449 | u64 distance = u64(fnptr) - (u64(code) + 5); |
| 431 | ASSERT_MSG( | 450 | ASSERT_MSG( |
| @@ -496,7 +515,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) | |||
| 496 | } | 515 | } |
| 497 | } | 516 | } |
| 498 | 517 | ||
| 499 | void XEmitter::SetJumpTarget(const FixupBranch &branch) | 518 | void XEmitter::SetJumpTarget(const FixupBranch& branch) |
| 500 | { | 519 | { |
| 501 | if (branch.type == 0) | 520 | if (branch.type == 0) |
| 502 | { | 521 | { |
| @@ -512,30 +531,6 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch) | |||
| 512 | } | 531 | } |
| 513 | } | 532 | } |
| 514 | 533 | ||
| 515 | // INC/DEC considered harmful on newer CPUs due to partial flag set. | ||
| 516 | // Use ADD, SUB instead. | ||
| 517 | |||
| 518 | /* | ||
| 519 | void XEmitter::INC(int bits, OpArg arg) | ||
| 520 | { | ||
| 521 | if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument"); | ||
| 522 | arg.operandReg = 0; | ||
| 523 | if (bits == 16) {Write8(0x66);} | ||
| 524 | arg.WriteRex(this, bits, bits); | ||
| 525 | Write8(bits == 8 ? 0xFE : 0xFF); | ||
| 526 | arg.WriteRest(this); | ||
| 527 | } | ||
| 528 | void XEmitter::DEC(int bits, OpArg arg) | ||
| 529 | { | ||
| 530 | if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument"); | ||
| 531 | arg.operandReg = 1; | ||
| 532 | if (bits == 16) {Write8(0x66);} | ||
| 533 | arg.WriteRex(this, bits, bits); | ||
| 534 | Write8(bits == 8 ? 0xFE : 0xFF); | ||
| 535 | arg.WriteRest(this); | ||
| 536 | } | ||
| 537 | */ | ||
| 538 | |||
| 539 | //Single byte opcodes | 534 | //Single byte opcodes |
| 540 | //There is no PUSHAD/POPAD in 64-bit mode. | 535 | //There is no PUSHAD/POPAD in 64-bit mode. |
| 541 | void XEmitter::INT3() {Write8(0xCC);} | 536 | void XEmitter::INT3() {Write8(0xCC);} |
| @@ -667,7 +662,7 @@ void XEmitter::CBW(int bits) | |||
| 667 | void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} | 662 | void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} |
| 668 | void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} | 663 | void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} |
| 669 | 664 | ||
| 670 | void XEmitter::PUSH(int bits, const OpArg ®) | 665 | void XEmitter::PUSH(int bits, const OpArg& reg) |
| 671 | { | 666 | { |
| 672 | if (reg.IsSimpleReg()) | 667 | if (reg.IsSimpleReg()) |
| 673 | PUSH(reg.GetSimpleReg()); | 668 | PUSH(reg.GetSimpleReg()); |
| @@ -703,7 +698,7 @@ void XEmitter::PUSH(int bits, const OpArg ®) | |||
| 703 | } | 698 | } |
| 704 | } | 699 | } |
| 705 | 700 | ||
| 706 | void XEmitter::POP(int /*bits*/, const OpArg ®) | 701 | void XEmitter::POP(int /*bits*/, const OpArg& reg) |
| 707 | { | 702 | { |
| 708 | if (reg.IsSimpleReg()) | 703 | if (reg.IsSimpleReg()) |
| 709 | POP(reg.GetSimpleReg()); | 704 | POP(reg.GetSimpleReg()); |
| @@ -791,12 +786,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) | |||
| 791 | src.WriteRest(this); | 786 | src.WriteRest(this); |
| 792 | } | 787 | } |
| 793 | 788 | ||
| 794 | void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);} | 789 | void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);} |
| 795 | void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);} | 790 | void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);} |
| 796 | void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);} | 791 | void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);} |
| 797 | void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);} | 792 | void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);} |
| 798 | void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);} | 793 | void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);} |
| 799 | void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);} | 794 | void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);} |
| 800 | 795 | ||
| 801 | void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) | 796 | void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) |
| 802 | { | 797 | { |
| @@ -813,24 +808,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo | |||
| 813 | src.WriteRest(this); | 808 | src.WriteRest(this); |
| 814 | } | 809 | } |
| 815 | 810 | ||
| 816 | void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src) | 811 | void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) |
| 817 | { | 812 | { |
| 818 | if (bits <= 16) | 813 | if (bits <= 16) |
| 819 | ASSERT_MSG(0, "MOVNTI - bits<=16"); | 814 | ASSERT_MSG(0, "MOVNTI - bits<=16"); |
| 820 | WriteBitSearchType(bits, src, dest, 0xC3); | 815 | WriteBitSearchType(bits, src, dest, 0xC3); |
| 821 | } | 816 | } |
| 822 | 817 | ||
| 823 | void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit | 818 | void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit |
| 824 | void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit | 819 | void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit |
| 825 | 820 | ||
| 826 | void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src) | 821 | void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) |
| 827 | { | 822 | { |
| 828 | CheckFlags(); | 823 | CheckFlags(); |
| 829 | if (!Common::GetCPUCaps().bmi1) | 824 | if (!Common::GetCPUCaps().bmi1) |
| 830 | ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); | 825 | ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); |
| 831 | WriteBitSearchType(bits, dest, src, 0xBC, true); | 826 | WriteBitSearchType(bits, dest, src, 0xBC, true); |
| 832 | } | 827 | } |
| 833 | void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src) | 828 | void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) |
| 834 | { | 829 | { |
| 835 | CheckFlags(); | 830 | CheckFlags(); |
| 836 | if (!Common::GetCPUCaps().lzcnt) | 831 | if (!Common::GetCPUCaps().lzcnt) |
| @@ -950,7 +945,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src) | |||
| 950 | } | 945 | } |
| 951 | 946 | ||
| 952 | //shift can be either imm8 or cl | 947 | //shift can be either imm8 or cl |
| 953 | void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) | 948 | void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) |
| 954 | { | 949 | { |
| 955 | CheckFlags(); | 950 | CheckFlags(); |
| 956 | bool writeImm = false; | 951 | bool writeImm = false; |
| @@ -991,16 +986,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext) | |||
| 991 | 986 | ||
| 992 | // large rotates and shift are slower on intel than amd | 987 | // large rotates and shift are slower on intel than amd |
| 993 | // intel likes to rotate by 1, and the op is smaller too | 988 | // intel likes to rotate by 1, and the op is smaller too |
| 994 | void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);} | 989 | void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);} |
| 995 | void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);} | 990 | void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);} |
| 996 | void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);} | 991 | void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);} |
| 997 | void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);} | 992 | void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);} |
| 998 | void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);} | 993 | void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);} |
| 999 | void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} | 994 | void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);} |
| 1000 | void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} | 995 | void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);} |
| 1001 | 996 | ||
| 1002 | // index can be either imm8 or register, don't use memory destination because it's slow | 997 | // index can be either imm8 or register, don't use memory destination because it's slow |
| 1003 | void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) | 998 | void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) |
| 1004 | { | 999 | { |
| 1005 | CheckFlags(); | 1000 | CheckFlags(); |
| 1006 | if (dest.IsImm()) | 1001 | if (dest.IsImm()) |
| @@ -1029,13 +1024,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) | |||
| 1029 | } | 1024 | } |
| 1030 | } | 1025 | } |
| 1031 | 1026 | ||
| 1032 | void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);} | 1027 | void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);} |
| 1033 | void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} | 1028 | void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);} |
| 1034 | void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} | 1029 | void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);} |
| 1035 | void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} | 1030 | void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);} |
| 1036 | 1031 | ||
| 1037 | //shift can be either imm8 or cl | 1032 | //shift can be either imm8 or cl |
| 1038 | void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) | 1033 | void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) |
| 1039 | { | 1034 | { |
| 1040 | CheckFlags(); | 1035 | CheckFlags(); |
| 1041 | if (dest.IsImm()) | 1036 | if (dest.IsImm()) |
| @@ -1067,7 +1062,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift) | |||
| 1067 | } | 1062 | } |
| 1068 | } | 1063 | } |
| 1069 | 1064 | ||
| 1070 | void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift) | 1065 | void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) |
| 1071 | { | 1066 | { |
| 1072 | CheckFlags(); | 1067 | CheckFlags(); |
| 1073 | if (dest.IsImm()) | 1068 | if (dest.IsImm()) |
| @@ -1111,7 +1106,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit | |||
| 1111 | } | 1106 | } |
| 1112 | 1107 | ||
| 1113 | //operand can either be immediate or register | 1108 | //operand can either be immediate or register |
| 1114 | void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const | 1109 | void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const |
| 1115 | { | 1110 | { |
| 1116 | X64Reg _operandReg; | 1111 | X64Reg _operandReg; |
| 1117 | if (IsImm()) | 1112 | if (IsImm()) |
| @@ -1257,7 +1252,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o | |||
| 1257 | } | 1252 | } |
| 1258 | } | 1253 | } |
| 1259 | 1254 | ||
| 1260 | void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2) | 1255 | void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2) |
| 1261 | { | 1256 | { |
| 1262 | if (a1.IsImm()) | 1257 | if (a1.IsImm()) |
| 1263 | { | 1258 | { |
| @@ -1283,24 +1278,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg | |||
| 1283 | } | 1278 | } |
| 1284 | } | 1279 | } |
| 1285 | 1280 | ||
| 1286 | void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} | 1281 | void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} |
| 1287 | void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} | 1282 | void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} |
| 1288 | void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} | 1283 | void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} |
| 1289 | void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} | 1284 | void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} |
| 1290 | void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} | 1285 | void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} |
| 1291 | void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} | 1286 | void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} |
| 1292 | void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} | 1287 | void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} |
| 1293 | void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) | 1288 | void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2) |
| 1294 | { | 1289 | { |
| 1295 | if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) | 1290 | if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) |
| 1296 | LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); | 1291 | LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); |
| 1297 | WriteNormalOp(this, bits, nrmMOV, a1, a2); | 1292 | WriteNormalOp(this, bits, nrmMOV, a1, a2); |
| 1298 | } | 1293 | } |
| 1299 | void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} | 1294 | void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} |
| 1300 | void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} | 1295 | void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} |
| 1301 | void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} | 1296 | void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} |
| 1302 | 1297 | ||
| 1303 | void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) | 1298 | void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) |
| 1304 | { | 1299 | { |
| 1305 | CheckFlags(); | 1300 | CheckFlags(); |
| 1306 | if (bits == 8) | 1301 | if (bits == 8) |
| @@ -1353,7 +1348,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2) | |||
| 1353 | } | 1348 | } |
| 1354 | } | 1349 | } |
| 1355 | 1350 | ||
| 1356 | void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a) | 1351 | void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) |
| 1357 | { | 1352 | { |
| 1358 | CheckFlags(); | 1353 | CheckFlags(); |
| 1359 | if (bits == 8) | 1354 | if (bits == 8) |
| @@ -1390,7 +1385,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr | |||
| 1390 | arg.WriteRest(this, extrabytes); | 1385 | arg.WriteRest(this, extrabytes); |
| 1391 | } | 1386 | } |
| 1392 | 1387 | ||
| 1393 | void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) | 1388 | void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) |
| 1394 | { | 1389 | { |
| 1395 | WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); | 1390 | WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); |
| 1396 | } | 1391 | } |
| @@ -1400,25 +1395,25 @@ static int GetVEXmmmmm(u16 op) | |||
| 1400 | // Currently, only 0x38 and 0x3A are used as secondary escape byte. | 1395 | // Currently, only 0x38 and 0x3A are used as secondary escape byte. |
| 1401 | if ((op >> 8) == 0x3A) | 1396 | if ((op >> 8) == 0x3A) |
| 1402 | return 3; | 1397 | return 3; |
| 1403 | else if ((op >> 8) == 0x38) | 1398 | if ((op >> 8) == 0x38) |
| 1404 | return 2; | 1399 | return 2; |
| 1405 | else | 1400 | |
| 1406 | return 1; | 1401 | return 1; |
| 1407 | } | 1402 | } |
| 1408 | 1403 | ||
| 1409 | static int GetVEXpp(u8 opPrefix) | 1404 | static int GetVEXpp(u8 opPrefix) |
| 1410 | { | 1405 | { |
| 1411 | if (opPrefix == 0x66) | 1406 | if (opPrefix == 0x66) |
| 1412 | return 1; | 1407 | return 1; |
| 1413 | else if (opPrefix == 0xF3) | 1408 | if (opPrefix == 0xF3) |
| 1414 | return 2; | 1409 | return 2; |
| 1415 | else if (opPrefix == 0xF2) | 1410 | if (opPrefix == 0xF2) |
| 1416 | return 3; | 1411 | return 3; |
| 1417 | else | 1412 | |
| 1418 | return 0; | 1413 | return 0; |
| 1419 | } | 1414 | } |
| 1420 | 1415 | ||
| 1421 | void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) | 1416 | void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) |
| 1422 | { | 1417 | { |
| 1423 | if (!Common::GetCPUCaps().avx) | 1418 | if (!Common::GetCPUCaps().avx) |
| 1424 | ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); | 1419 | ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); |
| @@ -1431,7 +1426,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA | |||
| 1431 | } | 1426 | } |
| 1432 | 1427 | ||
| 1433 | // Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 | 1428 | // Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 |
| 1434 | void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) | 1429 | void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) |
| 1435 | { | 1430 | { |
| 1436 | if (size != 32 && size != 64) | 1431 | if (size != 32 && size != 64) |
| 1437 | ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); | 1432 | ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); |
| @@ -1442,7 +1437,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r | |||
| 1442 | arg.WriteRest(this, extrabytes, regOp1); | 1437 | arg.WriteRest(this, extrabytes, regOp1); |
| 1443 | } | 1438 | } |
| 1444 | 1439 | ||
| 1445 | void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) | 1440 | void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) |
| 1446 | { | 1441 | { |
| 1447 | CheckFlags(); | 1442 | CheckFlags(); |
| 1448 | if (!Common::GetCPUCaps().bmi1) | 1443 | if (!Common::GetCPUCaps().bmi1) |
| @@ -1450,7 +1445,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg | |||
| 1450 | WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); | 1445 | WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); |
| 1451 | } | 1446 | } |
| 1452 | 1447 | ||
| 1453 | void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) | 1448 | void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) |
| 1454 | { | 1449 | { |
| 1455 | CheckFlags(); | 1450 | CheckFlags(); |
| 1456 | if (!Common::GetCPUCaps().bmi2) | 1451 | if (!Common::GetCPUCaps().bmi2) |
| @@ -1517,135 +1512,136 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext) | |||
| 1517 | arg.WriteRest(this); | 1512 | arg.WriteRest(this); |
| 1518 | } | 1513 | } |
| 1519 | 1514 | ||
| 1520 | void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);} | 1515 | void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);} |
| 1521 | void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);} | 1516 | void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);} |
| 1522 | 1517 | ||
| 1523 | void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} | 1518 | void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} |
| 1524 | void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} | 1519 | void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} |
| 1525 | void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} | 1520 | void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} |
| 1526 | 1521 | ||
| 1527 | void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} | 1522 | void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} |
| 1528 | void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} | 1523 | void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} |
| 1529 | void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} | 1524 | void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} |
| 1530 | void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} | 1525 | void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} |
| 1531 | void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} | 1526 | void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} |
| 1532 | void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} | 1527 | void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} |
| 1533 | void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} | 1528 | void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} |
| 1534 | void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} | 1529 | void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} |
| 1535 | void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} | 1530 | void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} |
| 1536 | void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} | 1531 | void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} |
| 1537 | void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} | 1532 | void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} |
| 1538 | void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} | 1533 | void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} |
| 1539 | void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} | 1534 | void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} |
| 1540 | void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} | 1535 | void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} |
| 1541 | void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} | 1536 | void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} |
| 1542 | void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} | 1537 | void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} |
| 1543 | void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} | 1538 | void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);} |
| 1544 | 1539 | void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} | |
| 1545 | void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} | 1540 | |
| 1546 | void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} | 1541 | void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} |
| 1547 | void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} | 1542 | void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} |
| 1548 | void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} | 1543 | void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} |
| 1549 | void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} | 1544 | void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} |
| 1550 | void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} | 1545 | void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} |
| 1551 | void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} | 1546 | void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} |
| 1552 | void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} | 1547 | void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} |
| 1553 | void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} | 1548 | void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} |
| 1554 | void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} | 1549 | void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} |
| 1555 | void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} | 1550 | void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} |
| 1556 | void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} | 1551 | void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} |
| 1557 | void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} | 1552 | void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} |
| 1558 | void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} | 1553 | void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} |
| 1559 | void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} | 1554 | void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} |
| 1560 | void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} | 1555 | void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} |
| 1561 | void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} | 1556 | void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} |
| 1562 | void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} | 1557 | void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} |
| 1563 | void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} | 1558 | void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} |
| 1564 | void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} | 1559 | void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} |
| 1565 | void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} | 1560 | void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} |
| 1566 | void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} | 1561 | void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} |
| 1567 | void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} | 1562 | void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} |
| 1568 | void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} | 1563 | void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} |
| 1569 | void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } | 1564 | void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} |
| 1570 | void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} | 1565 | void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } |
| 1571 | void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} | 1566 | void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} |
| 1572 | void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} | 1567 | void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} |
| 1573 | 1568 | void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} | |
| 1574 | void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} | 1569 | |
| 1575 | 1570 | void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} | |
| 1576 | void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed | 1571 | |
| 1577 | void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered | 1572 | void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed |
| 1578 | void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered | 1573 | void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered |
| 1579 | void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} | 1574 | void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered |
| 1580 | 1575 | void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} | |
| 1581 | void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} | 1576 | |
| 1582 | void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} | 1577 | void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} |
| 1583 | void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} | 1578 | void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} |
| 1584 | void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} | 1579 | void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} |
| 1585 | 1580 | void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} | |
| 1586 | void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} | 1581 | |
| 1587 | void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} | 1582 | void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} |
| 1588 | void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} | 1583 | void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} |
| 1589 | void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} | 1584 | void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} |
| 1590 | 1585 | void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} | |
| 1591 | void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} | 1586 | |
| 1592 | void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} | 1587 | void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} |
| 1593 | void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} | 1588 | void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} |
| 1594 | void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} | 1589 | void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} |
| 1595 | 1590 | void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} | |
| 1596 | void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} | 1591 | |
| 1597 | void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} | 1592 | void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} |
| 1598 | void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} | 1593 | void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} |
| 1599 | void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} | 1594 | void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} |
| 1600 | 1595 | void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} | |
| 1601 | void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } | 1596 | |
| 1602 | void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } | 1597 | void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } |
| 1603 | void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } | 1598 | void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } |
| 1604 | void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } | 1599 | void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } |
| 1605 | 1600 | void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } | |
| 1606 | void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } | 1601 | |
| 1607 | void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } | 1602 | void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } |
| 1608 | void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } | 1603 | void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } |
| 1609 | void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } | 1604 | void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } |
| 1605 | void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } | ||
| 1610 | 1606 | ||
| 1611 | void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} | 1607 | void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} |
| 1612 | void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} | 1608 | void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} |
| 1613 | 1609 | ||
| 1614 | void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} | 1610 | void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} |
| 1615 | void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} | 1611 | void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} |
| 1616 | 1612 | ||
| 1617 | void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} | 1613 | void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} |
| 1618 | void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} | 1614 | void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} |
| 1619 | void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} | 1615 | void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} |
| 1620 | void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} | 1616 | void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} |
| 1621 | void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} | 1617 | void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} |
| 1622 | void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} | 1618 | void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} |
| 1623 | 1619 | ||
| 1624 | void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} | 1620 | void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} |
| 1625 | void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} | 1621 | void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} |
| 1626 | void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} | 1622 | void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} |
| 1627 | void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} | 1623 | void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} |
| 1628 | 1624 | ||
| 1629 | void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} | 1625 | void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} |
| 1630 | void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} | 1626 | void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} |
| 1631 | void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} | 1627 | void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} |
| 1632 | void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} | 1628 | void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} |
| 1633 | 1629 | ||
| 1634 | void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} | 1630 | void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} |
| 1635 | 1631 | ||
| 1636 | void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);} | 1632 | void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);} |
| 1637 | void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);} | 1633 | void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);} |
| 1638 | 1634 | ||
| 1639 | void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only | 1635 | void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only |
| 1640 | 1636 | ||
| 1641 | // THESE TWO ARE UNTESTED. | 1637 | // THESE TWO ARE UNTESTED. |
| 1642 | void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} | 1638 | void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} |
| 1643 | void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} | 1639 | void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} |
| 1644 | 1640 | ||
| 1645 | void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);} | 1641 | void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} |
| 1646 | void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);} | 1642 | void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} |
| 1647 | 1643 | ||
| 1648 | void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) | 1644 | void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) |
| 1649 | { | 1645 | { |
| 1650 | if (Common::GetCPUCaps().sse3) | 1646 | if (Common::GetCPUCaps().sse3) |
| 1651 | { | 1647 | { |
| @@ -1663,9 +1659,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg) | |||
| 1663 | //There are a few more left | 1659 | //There are a few more left |
| 1664 | 1660 | ||
| 1665 | // Also some integer instructions are missing | 1661 | // Also some integer instructions are missing |
| 1666 | void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} | 1662 | void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} |
| 1667 | void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);} | 1663 | void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);} |
| 1668 | void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);} | 1664 | void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);} |
| 1669 | 1665 | ||
| 1670 | void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} | 1666 | void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} |
| 1671 | void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} | 1667 | void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} |
| @@ -1690,7 +1686,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift) | |||
| 1690 | Write8(shift); | 1686 | Write8(shift); |
| 1691 | } | 1687 | } |
| 1692 | 1688 | ||
| 1693 | void XEmitter::PSRLQ(X64Reg reg, OpArg arg) | 1689 | void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) |
| 1694 | { | 1690 | { |
| 1695 | WriteSSEOp(0x66, 0xd3, reg, arg); | 1691 | WriteSSEOp(0x66, 0xd3, reg, arg); |
| 1696 | } | 1692 | } |
| @@ -1735,212 +1731,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift) | |||
| 1735 | Write8(shift); | 1731 | Write8(shift); |
| 1736 | } | 1732 | } |
| 1737 | 1733 | ||
| 1738 | void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) | 1734 | void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) |
| 1739 | { | 1735 | { |
| 1740 | if (!Common::GetCPUCaps().ssse3) | 1736 | if (!Common::GetCPUCaps().ssse3) |
| 1741 | ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); | 1737 | ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); |
| 1742 | WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); | 1738 | WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); |
| 1743 | } | 1739 | } |
| 1744 | 1740 | ||
| 1745 | void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) | 1741 | void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) |
| 1746 | { | 1742 | { |
| 1747 | if (!Common::GetCPUCaps().sse4_1) | 1743 | if (!Common::GetCPUCaps().sse4_1) |
| 1748 | ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); | 1744 | ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); |
| 1749 | WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); | 1745 | WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); |
| 1750 | } | 1746 | } |
| 1751 | 1747 | ||
| 1752 | void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} | 1748 | void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} |
| 1753 | void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} | 1749 | void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} |
| 1754 | void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} | 1750 | void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} |
| 1755 | void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} | 1751 | void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} |
| 1756 | 1752 | ||
| 1757 | void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} | 1753 | void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} |
| 1758 | void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} | 1754 | void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} |
| 1759 | void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} | 1755 | void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} |
| 1760 | void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} | 1756 | void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} |
| 1761 | void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} | 1757 | void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} |
| 1762 | void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} | 1758 | void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} |
| 1763 | void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} | 1759 | void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} |
| 1764 | void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} | 1760 | void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} |
| 1765 | 1761 | ||
| 1766 | void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} | 1762 | void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} |
| 1767 | void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} | 1763 | void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} |
| 1768 | void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} | 1764 | void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} |
| 1769 | void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} | 1765 | void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} |
| 1770 | void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} | 1766 | void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} |
| 1771 | void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} | 1767 | void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} |
| 1772 | void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} | 1768 | void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} |
| 1773 | void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} | 1769 | void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} |
| 1774 | void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} | 1770 | void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} |
| 1775 | void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} | 1771 | void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} |
| 1776 | void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} | 1772 | void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} |
| 1777 | void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} | 1773 | void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} |
| 1778 | 1774 | ||
| 1779 | void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} | 1775 | void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} |
| 1780 | void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} | 1776 | void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} |
| 1781 | void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} | 1777 | void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} |
| 1782 | void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } | 1778 | void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } |
| 1783 | void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } | 1779 | void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } |
| 1784 | 1780 | ||
| 1785 | void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} | 1781 | void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} |
| 1786 | void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} | 1782 | void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} |
| 1787 | void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} | 1783 | void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} |
| 1788 | void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} | 1784 | void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} |
| 1789 | 1785 | ||
| 1790 | void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} | 1786 | void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} |
| 1791 | void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} | 1787 | void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} |
| 1792 | void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} | 1788 | void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} |
| 1793 | void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} | 1789 | void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} |
| 1794 | 1790 | ||
| 1795 | void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} | 1791 | void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} |
| 1796 | void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} | 1792 | void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} |
| 1797 | void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} | 1793 | void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} |
| 1798 | void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} | 1794 | void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} |
| 1799 | 1795 | ||
| 1800 | void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} | 1796 | void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} |
| 1801 | void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);} | 1797 | void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);} |
| 1802 | void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} | 1798 | void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} |
| 1803 | void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} | 1799 | void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} |
| 1804 | 1800 | ||
| 1805 | void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} | 1801 | void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} |
| 1806 | void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} | 1802 | void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} |
| 1807 | void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} | 1803 | void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} |
| 1808 | void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} | 1804 | void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} |
| 1809 | 1805 | ||
| 1810 | void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} | 1806 | void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} |
| 1811 | void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} | 1807 | void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} |
| 1812 | void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} | 1808 | void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} |
| 1813 | void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} | 1809 | void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} |
| 1814 | 1810 | ||
| 1815 | void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} | 1811 | void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} |
| 1816 | void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} | 1812 | void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} |
| 1817 | 1813 | ||
| 1818 | void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);} | 1814 | void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);} |
| 1819 | void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);} | 1815 | void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);} |
| 1820 | void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);} | 1816 | void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);} |
| 1821 | 1817 | ||
| 1822 | void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);} | 1818 | void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);} |
| 1823 | void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);} | 1819 | void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);} |
| 1824 | void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);} | 1820 | void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);} |
| 1825 | 1821 | ||
| 1826 | void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} | 1822 | void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} |
| 1827 | void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} | 1823 | void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} |
| 1828 | 1824 | ||
| 1829 | void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } | 1825 | void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } |
| 1830 | void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} | 1826 | void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} |
| 1831 | 1827 | ||
| 1832 | void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } | 1828 | void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } |
| 1833 | void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } | 1829 | void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } |
| 1834 | void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } | 1830 | void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } |
| 1835 | void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } | 1831 | void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } |
| 1836 | 1832 | ||
| 1837 | void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } | 1833 | void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } |
| 1838 | void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} | 1834 | void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} |
| 1839 | void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} | 1835 | void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} |
| 1840 | void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} | 1836 | void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} |
| 1841 | 1837 | ||
| 1842 | // VEX | 1838 | // VEX |
| 1843 | void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} | 1839 | void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} |
| 1844 | void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} | 1840 | void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} |
| 1845 | void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} | 1841 | void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} |
| 1846 | void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} | 1842 | void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} |
| 1847 | void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} | 1843 | void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} |
| 1848 | void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} | 1844 | void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} |
| 1849 | void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} | 1845 | void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} |
| 1850 | void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} | 1846 | void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} |
| 1851 | void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} | 1847 | void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} |
| 1852 | void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} | 1848 | void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} |
| 1853 | void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} | 1849 | void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} |
| 1854 | void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} | 1850 | void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} |
| 1855 | 1851 | ||
| 1856 | void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } | 1852 | void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } |
| 1857 | void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } | 1853 | void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } |
| 1858 | void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } | 1854 | void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } |
| 1859 | void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } | 1855 | void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } |
| 1860 | void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } | 1856 | void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } |
| 1861 | void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } | 1857 | void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } |
| 1862 | void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } | 1858 | void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } |
| 1863 | void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } | 1859 | void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } |
| 1864 | 1860 | ||
| 1865 | void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } | 1861 | void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } |
| 1866 | void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } | 1862 | void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } |
| 1867 | void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } | 1863 | void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } |
| 1868 | void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } | 1864 | void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } |
| 1869 | 1865 | ||
| 1870 | void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } | 1866 | void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } |
| 1871 | void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } | 1867 | void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } |
| 1872 | void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } | 1868 | void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } |
| 1873 | void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } | 1869 | void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } |
| 1874 | void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } | 1870 | void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } |
| 1875 | void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } | 1871 | void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } |
| 1876 | void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } | 1872 | void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } |
| 1877 | void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } | 1873 | void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } |
| 1878 | void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } | 1874 | void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } |
| 1879 | void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } | 1875 | void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } |
| 1880 | void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } | 1876 | void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } |
| 1881 | void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } | 1877 | void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } |
| 1882 | void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } | 1878 | void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } |
| 1883 | void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } | 1879 | void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } |
| 1884 | void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } | 1880 | void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } |
| 1885 | void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } | 1881 | void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } |
| 1886 | void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } | 1882 | void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } |
| 1887 | void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } | 1883 | void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } |
| 1888 | void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } | 1884 | void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } |
| 1889 | void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } | 1885 | void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } |
| 1890 | void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } | 1886 | void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } |
| 1891 | void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } | 1887 | void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } |
| 1892 | void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } | 1888 | void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } |
| 1893 | void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } | 1889 | void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } |
| 1894 | void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } | 1890 | void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } |
| 1895 | void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } | 1891 | void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } |
| 1896 | void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } | 1892 | void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } |
| 1897 | void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } | 1893 | void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } |
| 1898 | void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } | 1894 | void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } |
| 1899 | void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } | 1895 | void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } |
| 1900 | void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } | 1896 | void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } |
| 1901 | void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } | 1897 | void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } |
| 1902 | void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } | 1898 | void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } |
| 1903 | void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } | 1899 | void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } |
| 1904 | void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } | 1900 | void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } |
| 1905 | void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } | 1901 | void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } |
| 1906 | void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } | 1902 | void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } |
| 1907 | void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } | 1903 | void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } |
| 1908 | void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } | 1904 | void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } |
| 1909 | void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } | 1905 | void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } |
| 1910 | void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } | 1906 | void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } |
| 1911 | void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } | 1907 | void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } |
| 1912 | void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } | 1908 | void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } |
| 1913 | void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } | 1909 | void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } |
| 1914 | void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } | 1910 | void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } |
| 1915 | void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } | 1911 | void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } |
| 1916 | void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } | 1912 | void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } |
| 1917 | void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } | 1913 | void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } |
| 1918 | void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } | 1914 | void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } |
| 1919 | void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } | 1915 | void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } |
| 1920 | void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } | 1916 | void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } |
| 1921 | void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } | 1917 | void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } |
| 1922 | void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } | 1918 | void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } |
| 1923 | void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } | 1919 | void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } |
| 1924 | void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } | 1920 | void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } |
| 1925 | void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } | 1921 | void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } |
| 1926 | void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } | 1922 | void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } |
| 1927 | void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } | 1923 | void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } |
| 1928 | void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } | 1924 | void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } |
| 1929 | void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } | 1925 | void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } |
| 1930 | 1926 | ||
| 1931 | void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} | 1927 | void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} |
| 1932 | void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} | 1928 | void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} |
| 1933 | void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} | 1929 | void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} |
| 1934 | void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} | 1930 | void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} |
| 1935 | void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} | 1931 | void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} |
| 1936 | void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} | 1932 | void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} |
| 1937 | void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} | 1933 | void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} |
| 1938 | void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} | 1934 | void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} |
| 1939 | void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} | 1935 | void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} |
| 1940 | void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} | 1936 | void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} |
| 1941 | void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} | 1937 | void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} |
| 1942 | void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} | 1938 | void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} |
| 1943 | void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} | 1939 | void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} |
| 1944 | 1940 | ||
| 1945 | // Prefixes | 1941 | // Prefixes |
| 1946 | 1942 | ||
| @@ -1956,7 +1952,7 @@ void XEmitter::FWAIT() | |||
| 1956 | } | 1952 | } |
| 1957 | 1953 | ||
| 1958 | // TODO: make this more generic | 1954 | // TODO: make this more generic |
| 1959 | void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg) | 1955 | void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) |
| 1960 | { | 1956 | { |
| 1961 | int mf = 0; | 1957 | int mf = 0; |
| 1962 | ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); | 1958 | ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); |
| @@ -1974,9 +1970,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a | |||
| 1974 | arg.WriteRest(this, 0, (X64Reg) op); | 1970 | arg.WriteRest(this, 0, (X64Reg) op); |
| 1975 | } | 1971 | } |
| 1976 | 1972 | ||
| 1977 | void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} | 1973 | void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} |
| 1978 | void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} | 1974 | void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} |
| 1979 | void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} | 1975 | void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} |
| 1980 | void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } | 1976 | void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } |
| 1981 | 1977 | ||
| 1982 | void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } | 1978 | void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } |
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index e9c924126..a49cd2cf1 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h | |||
| @@ -328,8 +328,6 @@ enum SSECompare | |||
| 328 | ORD, | 328 | ORD, |
| 329 | }; | 329 | }; |
| 330 | 330 | ||
| 331 | typedef const u8* JumpTarget; | ||
| 332 | |||
| 333 | class XEmitter | 331 | class XEmitter |
| 334 | { | 332 | { |
| 335 | friend struct OpArg; // for Write8 etc | 333 | friend struct OpArg; // for Write8 etc |
| @@ -344,27 +342,27 @@ private: | |||
| 344 | void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); | 342 | void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg); |
| 345 | void WriteMulDivType(int bits, OpArg src, int ext); | 343 | void WriteMulDivType(int bits, OpArg src, int ext); |
| 346 | void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); | 344 | void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false); |
| 347 | void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); | 345 | void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext); |
| 348 | void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); | 346 | void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext); |
| 349 | void WriteMXCSR(OpArg arg, int ext); | 347 | void WriteMXCSR(OpArg arg, int ext); |
| 350 | void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 348 | void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); |
| 351 | void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 349 | void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); |
| 352 | void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 350 | void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); |
| 353 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0); | 351 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); |
| 354 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 352 | void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 355 | void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 353 | void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 356 | void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 354 | void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 357 | void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); | 355 | void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); |
| 358 | void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); | 356 | void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); |
| 359 | void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); | 357 | void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); |
| 360 | 358 | ||
| 361 | void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); | 359 | void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); |
| 362 | 360 | ||
| 363 | protected: | 361 | protected: |
| 364 | inline void Write8(u8 value) {*code++ = value;} | 362 | void Write8(u8 value); |
| 365 | inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} | 363 | void Write16(u16 value); |
| 366 | inline void Write32(u32 value) {*(u32*)code = (value); code += 4;} | 364 | void Write32(u32 value); |
| 367 | inline void Write64(u64 value) {*(u64*)code = (value); code += 8;} | 365 | void Write64(u64 value); |
| 368 | 366 | ||
| 369 | public: | 367 | public: |
| 370 | XEmitter() { code = nullptr; flags_locked = false; } | 368 | XEmitter() { code = nullptr; flags_locked = false; } |
| @@ -413,8 +411,8 @@ public: | |||
| 413 | // Stack control | 411 | // Stack control |
| 414 | void PUSH(X64Reg reg); | 412 | void PUSH(X64Reg reg); |
| 415 | void POP(X64Reg reg); | 413 | void POP(X64Reg reg); |
| 416 | void PUSH(int bits, const OpArg ®); | 414 | void PUSH(int bits, const OpArg& reg); |
| 417 | void POP(int bits, const OpArg ®); | 415 | void POP(int bits, const OpArg& reg); |
| 418 | void PUSHF(); | 416 | void PUSHF(); |
| 419 | void POPF(); | 417 | void POPF(); |
| 420 | 418 | ||
| @@ -424,21 +422,19 @@ public: | |||
| 424 | void UD2(); | 422 | void UD2(); |
| 425 | FixupBranch J(bool force5bytes = false); | 423 | FixupBranch J(bool force5bytes = false); |
| 426 | 424 | ||
| 427 | void JMP(const u8 * addr, bool force5Bytes = false); | 425 | void JMP(const u8* addr, bool force5Bytes = false); |
| 428 | void JMP(OpArg arg); | 426 | void JMPptr(const OpArg& arg); |
| 429 | void JMPptr(const OpArg &arg); | ||
| 430 | void JMPself(); //infinite loop! | 427 | void JMPself(); //infinite loop! |
| 431 | #ifdef CALL | 428 | #ifdef CALL |
| 432 | #undef CALL | 429 | #undef CALL |
| 433 | #endif | 430 | #endif |
| 434 | void CALL(const void *fnptr); | 431 | void CALL(const void* fnptr); |
| 435 | void CALLptr(OpArg arg); | 432 | void CALLptr(OpArg arg); |
| 436 | 433 | ||
| 437 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); | 434 | FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false); |
| 438 | //void J_CC(CCFlags conditionCode, JumpTarget target); | 435 | void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false); |
| 439 | void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false); | ||
| 440 | 436 | ||
| 441 | void SetJumpTarget(const FixupBranch &branch); | 437 | void SetJumpTarget(const FixupBranch& branch); |
| 442 | 438 | ||
| 443 | void SETcc(CCFlags flag, OpArg dest); | 439 | void SETcc(CCFlags flag, OpArg dest); |
| 444 | // Note: CMOV brings small if any benefit on current cpus. | 440 | // Note: CMOV brings small if any benefit on current cpus. |
| @@ -450,8 +446,8 @@ public: | |||
| 450 | void SFENCE(); | 446 | void SFENCE(); |
| 451 | 447 | ||
| 452 | // Bit scan | 448 | // Bit scan |
| 453 | void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit | 449 | void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit |
| 454 | void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit | 450 | void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit |
| 455 | 451 | ||
| 456 | // Cache control | 452 | // Cache control |
| 457 | enum PrefetchLevel | 453 | enum PrefetchLevel |
| @@ -462,67 +458,67 @@ public: | |||
| 462 | PF_T2, //Levels 3+ (aliased to T0 on AMD) | 458 | PF_T2, //Levels 3+ (aliased to T0 on AMD) |
| 463 | }; | 459 | }; |
| 464 | void PREFETCH(PrefetchLevel level, OpArg arg); | 460 | void PREFETCH(PrefetchLevel level, OpArg arg); |
| 465 | void MOVNTI(int bits, OpArg dest, X64Reg src); | 461 | void MOVNTI(int bits, const OpArg& dest, X64Reg src); |
| 466 | void MOVNTDQ(OpArg arg, X64Reg regOp); | 462 | void MOVNTDQ(const OpArg& arg, X64Reg regOp); |
| 467 | void MOVNTPS(OpArg arg, X64Reg regOp); | 463 | void MOVNTPS(const OpArg& arg, X64Reg regOp); |
| 468 | void MOVNTPD(OpArg arg, X64Reg regOp); | 464 | void MOVNTPD(const OpArg& arg, X64Reg regOp); |
| 469 | 465 | ||
| 470 | // Multiplication / division | 466 | // Multiplication / division |
| 471 | void MUL(int bits, OpArg src); //UNSIGNED | 467 | void MUL(int bits, const OpArg& src); //UNSIGNED |
| 472 | void IMUL(int bits, OpArg src); //SIGNED | 468 | void IMUL(int bits, const OpArg& src); //SIGNED |
| 473 | void IMUL(int bits, X64Reg regOp, OpArg src); | 469 | void IMUL(int bits, X64Reg regOp, const OpArg& src); |
| 474 | void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm); | 470 | void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); |
| 475 | void DIV(int bits, OpArg src); | 471 | void DIV(int bits, const OpArg& src); |
| 476 | void IDIV(int bits, OpArg src); | 472 | void IDIV(int bits, const OpArg& src); |
| 477 | 473 | ||
| 478 | // Shift | 474 | // Shift |
| 479 | void ROL(int bits, OpArg dest, OpArg shift); | 475 | void ROL(int bits, const OpArg& dest, const OpArg& shift); |
| 480 | void ROR(int bits, OpArg dest, OpArg shift); | 476 | void ROR(int bits, const OpArg& dest, const OpArg& shift); |
| 481 | void RCL(int bits, OpArg dest, OpArg shift); | 477 | void RCL(int bits, const OpArg& dest, const OpArg& shift); |
| 482 | void RCR(int bits, OpArg dest, OpArg shift); | 478 | void RCR(int bits, const OpArg& dest, const OpArg& shift); |
| 483 | void SHL(int bits, OpArg dest, OpArg shift); | 479 | void SHL(int bits, const OpArg& dest, const OpArg& shift); |
| 484 | void SHR(int bits, OpArg dest, OpArg shift); | 480 | void SHR(int bits, const OpArg& dest, const OpArg& shift); |
| 485 | void SAR(int bits, OpArg dest, OpArg shift); | 481 | void SAR(int bits, const OpArg& dest, const OpArg& shift); |
| 486 | 482 | ||
| 487 | // Bit Test | 483 | // Bit Test |
| 488 | void BT(int bits, OpArg dest, OpArg index); | 484 | void BT(int bits, const OpArg& dest, const OpArg& index); |
| 489 | void BTS(int bits, OpArg dest, OpArg index); | 485 | void BTS(int bits, const OpArg& dest, const OpArg& index); |
| 490 | void BTR(int bits, OpArg dest, OpArg index); | 486 | void BTR(int bits, const OpArg& dest, const OpArg& index); |
| 491 | void BTC(int bits, OpArg dest, OpArg index); | 487 | void BTC(int bits, const OpArg& dest, const OpArg& index); |
| 492 | 488 | ||
| 493 | // Double-Precision Shift | 489 | // Double-Precision Shift |
| 494 | void SHRD(int bits, OpArg dest, OpArg src, OpArg shift); | 490 | void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); |
| 495 | void SHLD(int bits, OpArg dest, OpArg src, OpArg shift); | 491 | void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift); |
| 496 | 492 | ||
| 497 | // Extend EAX into EDX in various ways | 493 | // Extend EAX into EDX in various ways |
| 498 | void CWD(int bits = 16); | 494 | void CWD(int bits = 16); |
| 499 | inline void CDQ() {CWD(32);} | 495 | void CDQ() {CWD(32);} |
| 500 | inline void CQO() {CWD(64);} | 496 | void CQO() {CWD(64);} |
| 501 | void CBW(int bits = 8); | 497 | void CBW(int bits = 8); |
| 502 | inline void CWDE() {CBW(16);} | 498 | void CWDE() {CBW(16);} |
| 503 | inline void CDQE() {CBW(32);} | 499 | void CDQE() {CBW(32);} |
| 504 | 500 | ||
| 505 | // Load effective address | 501 | // Load effective address |
| 506 | void LEA(int bits, X64Reg dest, OpArg src); | 502 | void LEA(int bits, X64Reg dest, OpArg src); |
| 507 | 503 | ||
| 508 | // Integer arithmetic | 504 | // Integer arithmetic |
| 509 | void NEG (int bits, OpArg src); | 505 | void NEG(int bits, const OpArg& src); |
| 510 | void ADD (int bits, const OpArg &a1, const OpArg &a2); | 506 | void ADD(int bits, const OpArg& a1, const OpArg& a2); |
| 511 | void ADC (int bits, const OpArg &a1, const OpArg &a2); | 507 | void ADC(int bits, const OpArg& a1, const OpArg& a2); |
| 512 | void SUB (int bits, const OpArg &a1, const OpArg &a2); | 508 | void SUB(int bits, const OpArg& a1, const OpArg& a2); |
| 513 | void SBB (int bits, const OpArg &a1, const OpArg &a2); | 509 | void SBB(int bits, const OpArg& a1, const OpArg& a2); |
| 514 | void AND (int bits, const OpArg &a1, const OpArg &a2); | 510 | void AND(int bits, const OpArg& a1, const OpArg& a2); |
| 515 | void CMP (int bits, const OpArg &a1, const OpArg &a2); | 511 | void CMP(int bits, const OpArg& a1, const OpArg& a2); |
| 516 | 512 | ||
| 517 | // Bit operations | 513 | // Bit operations |
| 518 | void NOT (int bits, OpArg src); | 514 | void NOT (int bits, const OpArg& src); |
| 519 | void OR (int bits, const OpArg &a1, const OpArg &a2); | 515 | void OR(int bits, const OpArg& a1, const OpArg& a2); |
| 520 | void XOR (int bits, const OpArg &a1, const OpArg &a2); | 516 | void XOR(int bits, const OpArg& a1, const OpArg& a2); |
| 521 | void MOV (int bits, const OpArg &a1, const OpArg &a2); | 517 | void MOV(int bits, const OpArg& a1, const OpArg& a2); |
| 522 | void TEST(int bits, const OpArg &a1, const OpArg &a2); | 518 | void TEST(int bits, const OpArg& a1, const OpArg& a2); |
| 523 | 519 | ||
| 524 | // Are these useful at all? Consider removing. | 520 | // Are these useful at all? Consider removing. |
| 525 | void XCHG(int bits, const OpArg &a1, const OpArg &a2); | 521 | void XCHG(int bits, const OpArg& a1, const OpArg& a2); |
| 526 | void XCHG_AHAL(); | 522 | void XCHG_AHAL(); |
| 527 | 523 | ||
| 528 | // Byte swapping (32 and 64-bit only). | 524 | // Byte swapping (32 and 64-bit only). |
| @@ -536,13 +532,13 @@ public: | |||
| 536 | void MOVBE(int dbits, const OpArg& dest, const OpArg& src); | 532 | void MOVBE(int dbits, const OpArg& dest, const OpArg& src); |
| 537 | 533 | ||
| 538 | // Available only on AMD >= Phenom or Intel >= Haswell | 534 | // Available only on AMD >= Phenom or Intel >= Haswell |
| 539 | void LZCNT(int bits, X64Reg dest, OpArg src); | 535 | void LZCNT(int bits, X64Reg dest, const OpArg& src); |
| 540 | // Note: this one is actually part of BMI1 | 536 | // Note: this one is actually part of BMI1 |
| 541 | void TZCNT(int bits, X64Reg dest, OpArg src); | 537 | void TZCNT(int bits, X64Reg dest, const OpArg& src); |
| 542 | 538 | ||
| 543 | // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) | 539 | // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64) |
| 544 | void STMXCSR(OpArg memloc); | 540 | void STMXCSR(const OpArg& memloc); |
| 545 | void LDMXCSR(OpArg memloc); | 541 | void LDMXCSR(const OpArg& memloc); |
| 546 | 542 | ||
| 547 | // Prefixes | 543 | // Prefixes |
| 548 | void LOCK(); | 544 | void LOCK(); |
| @@ -569,259 +565,243 @@ public: | |||
| 569 | x87_FPUBusy = 0x8000, | 565 | x87_FPUBusy = 0x8000, |
| 570 | }; | 566 | }; |
| 571 | 567 | ||
| 572 | void FLD(int bits, OpArg src); | 568 | void FLD(int bits, const OpArg& src); |
| 573 | void FST(int bits, OpArg dest); | 569 | void FST(int bits, const OpArg& dest); |
| 574 | void FSTP(int bits, OpArg dest); | 570 | void FSTP(int bits, const OpArg& dest); |
| 575 | void FNSTSW_AX(); | 571 | void FNSTSW_AX(); |
| 576 | void FWAIT(); | 572 | void FWAIT(); |
| 577 | 573 | ||
| 578 | // SSE/SSE2: Floating point arithmetic | 574 | // SSE/SSE2: Floating point arithmetic |
| 579 | void ADDSS(X64Reg regOp, OpArg arg); | 575 | void ADDSS(X64Reg regOp, const OpArg& arg); |
| 580 | void ADDSD(X64Reg regOp, OpArg arg); | 576 | void ADDSD(X64Reg regOp, const OpArg& arg); |
| 581 | void SUBSS(X64Reg regOp, OpArg arg); | 577 | void SUBSS(X64Reg regOp, const OpArg& arg); |
| 582 | void SUBSD(X64Reg regOp, OpArg arg); | 578 | void SUBSD(X64Reg regOp, const OpArg& arg); |
| 583 | void MULSS(X64Reg regOp, OpArg arg); | 579 | void MULSS(X64Reg regOp, const OpArg& arg); |
| 584 | void MULSD(X64Reg regOp, OpArg arg); | 580 | void MULSD(X64Reg regOp, const OpArg& arg); |
| 585 | void DIVSS(X64Reg regOp, OpArg arg); | 581 | void DIVSS(X64Reg regOp, const OpArg& arg); |
| 586 | void DIVSD(X64Reg regOp, OpArg arg); | 582 | void DIVSD(X64Reg regOp, const OpArg& arg); |
| 587 | void MINSS(X64Reg regOp, OpArg arg); | 583 | void MINSS(X64Reg regOp, const OpArg& arg); |
| 588 | void MINSD(X64Reg regOp, OpArg arg); | 584 | void MINSD(X64Reg regOp, const OpArg& arg); |
| 589 | void MAXSS(X64Reg regOp, OpArg arg); | 585 | void MAXSS(X64Reg regOp, const OpArg& arg); |
| 590 | void MAXSD(X64Reg regOp, OpArg arg); | 586 | void MAXSD(X64Reg regOp, const OpArg& arg); |
| 591 | void SQRTSS(X64Reg regOp, OpArg arg); | 587 | void SQRTSS(X64Reg regOp, const OpArg& arg); |
| 592 | void SQRTSD(X64Reg regOp, OpArg arg); | 588 | void SQRTSD(X64Reg regOp, const OpArg& arg); |
| 593 | void RSQRTSS(X64Reg regOp, OpArg arg); | 589 | void RCPSS(X64Reg regOp, const OpArg& arg); |
| 590 | void RSQRTSS(X64Reg regOp, const OpArg& arg); | ||
| 594 | 591 | ||
| 595 | // SSE/SSE2: Floating point bitwise (yes) | 592 | // SSE/SSE2: Floating point bitwise (yes) |
| 596 | void CMPSS(X64Reg regOp, OpArg arg, u8 compare); | 593 | void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); |
| 597 | void CMPSD(X64Reg regOp, OpArg arg, u8 compare); | 594 | void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); |
| 598 | 595 | ||
| 599 | inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); } | 596 | void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } |
| 600 | inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); } | 597 | void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } |
| 601 | inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); } | 598 | void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } |
| 602 | inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); } | 599 | void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } |
| 603 | inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); } | 600 | void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } |
| 604 | inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); } | 601 | void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } |
| 605 | inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); } | 602 | void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); } |
| 606 | 603 | ||
| 607 | // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) | 604 | // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) |
| 608 | void ADDPS(X64Reg regOp, OpArg arg); | 605 | void ADDPS(X64Reg regOp, const OpArg& arg); |
| 609 | void ADDPD(X64Reg regOp, OpArg arg); | 606 | void ADDPD(X64Reg regOp, const OpArg& arg); |
| 610 | void SUBPS(X64Reg regOp, OpArg arg); | 607 | void SUBPS(X64Reg regOp, const OpArg& arg); |
| 611 | void SUBPD(X64Reg regOp, OpArg arg); | 608 | void SUBPD(X64Reg regOp, const OpArg& arg); |
| 612 | void CMPPS(X64Reg regOp, OpArg arg, u8 compare); | 609 | void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare); |
| 613 | void CMPPD(X64Reg regOp, OpArg arg, u8 compare); | 610 | void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare); |
| 614 | void MULPS(X64Reg regOp, OpArg arg); | 611 | void MULPS(X64Reg regOp, const OpArg& arg); |
| 615 | void MULPD(X64Reg regOp, OpArg arg); | 612 | void MULPD(X64Reg regOp, const OpArg& arg); |
| 616 | void DIVPS(X64Reg regOp, OpArg arg); | 613 | void DIVPS(X64Reg regOp, const OpArg& arg); |
| 617 | void DIVPD(X64Reg regOp, OpArg arg); | 614 | void DIVPD(X64Reg regOp, const OpArg& arg); |
| 618 | void MINPS(X64Reg regOp, OpArg arg); | 615 | void MINPS(X64Reg regOp, const OpArg& arg); |
| 619 | void MINPD(X64Reg regOp, OpArg arg); | 616 | void MINPD(X64Reg regOp, const OpArg& arg); |
| 620 | void MAXPS(X64Reg regOp, OpArg arg); | 617 | void MAXPS(X64Reg regOp, const OpArg& arg); |
| 621 | void MAXPD(X64Reg regOp, OpArg arg); | 618 | void MAXPD(X64Reg regOp, const OpArg& arg); |
| 622 | void SQRTPS(X64Reg regOp, OpArg arg); | 619 | void SQRTPS(X64Reg regOp, const OpArg& arg); |
| 623 | void SQRTPD(X64Reg regOp, OpArg arg); | 620 | void SQRTPD(X64Reg regOp, const OpArg& arg); |
| 624 | void RCPPS(X64Reg regOp, OpArg arg); | 621 | void RCPPS(X64Reg regOp, const OpArg& arg); |
| 625 | void RSQRTPS(X64Reg regOp, OpArg arg); | 622 | void RSQRTPS(X64Reg regOp, const OpArg& arg); |
| 626 | 623 | ||
| 627 | // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) | 624 | // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double) |
| 628 | void ANDPS(X64Reg regOp, OpArg arg); | 625 | void ANDPS(X64Reg regOp, const OpArg& arg); |
| 629 | void ANDPD(X64Reg regOp, OpArg arg); | 626 | void ANDPD(X64Reg regOp, const OpArg& arg); |
| 630 | void ANDNPS(X64Reg regOp, OpArg arg); | 627 | void ANDNPS(X64Reg regOp, const OpArg& arg); |
| 631 | void ANDNPD(X64Reg regOp, OpArg arg); | 628 | void ANDNPD(X64Reg regOp, const OpArg& arg); |
| 632 | void ORPS(X64Reg regOp, OpArg arg); | 629 | void ORPS(X64Reg regOp, const OpArg& arg); |
| 633 | void ORPD(X64Reg regOp, OpArg arg); | 630 | void ORPD(X64Reg regOp, const OpArg& arg); |
| 634 | void XORPS(X64Reg regOp, OpArg arg); | 631 | void XORPS(X64Reg regOp, const OpArg& arg); |
| 635 | void XORPD(X64Reg regOp, OpArg arg); | 632 | void XORPD(X64Reg regOp, const OpArg& arg); |
| 636 | 633 | ||
| 637 | // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. | 634 | // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation. |
| 638 | void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle); | 635 | void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle); |
| 639 | void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle); | 636 | void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle); |
| 640 | 637 | ||
| 641 | // SSE/SSE2: Useful alternative to shuffle in some cases. | 638 | // SSE/SSE2: Useful alternative to shuffle in some cases. |
| 642 | void MOVDDUP(X64Reg regOp, OpArg arg); | 639 | void MOVDDUP(X64Reg regOp, const OpArg& arg); |
| 643 | |||
| 644 | // TODO: Actually implement | ||
| 645 | #if 0 | ||
| 646 | // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products... | ||
| 647 | void ADDSUBPS(X64Reg dest, OpArg src); | ||
| 648 | void ADDSUBPD(X64Reg dest, OpArg src); | ||
| 649 | void HADDPD(X64Reg dest, OpArg src); | ||
| 650 | void HSUBPS(X64Reg dest, OpArg src); | ||
| 651 | void HSUBPD(X64Reg dest, OpArg src); | ||
| 652 | |||
| 653 | // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". | ||
| 654 | void DPPD(X64Reg dest, OpArg src, u8 arg); | ||
| 655 | |||
| 656 | // These are probably useful for VFPU emulation. | ||
| 657 | void INSERTPS(X64Reg dest, OpArg src, u8 arg); | ||
| 658 | void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); | ||
| 659 | #endif | ||
| 660 | 640 | ||
| 661 | // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. | 641 | // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. |
| 662 | void HADDPS(X64Reg dest, OpArg src); | 642 | void HADDPS(X64Reg dest, const OpArg& src); |
| 663 | 643 | ||
| 664 | // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". | 644 | // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". |
| 665 | void DPPS(X64Reg dest, OpArg src, u8 arg); | 645 | void DPPS(X64Reg dest, const OpArg& src, u8 arg); |
| 666 | 646 | ||
| 667 | void UNPCKLPS(X64Reg dest, OpArg src); | 647 | void UNPCKLPS(X64Reg dest, const OpArg& src); |
| 668 | void UNPCKHPS(X64Reg dest, OpArg src); | 648 | void UNPCKHPS(X64Reg dest, const OpArg& src); |
| 669 | void UNPCKLPD(X64Reg dest, OpArg src); | 649 | void UNPCKLPD(X64Reg dest, const OpArg& src); |
| 670 | void UNPCKHPD(X64Reg dest, OpArg src); | 650 | void UNPCKHPD(X64Reg dest, const OpArg& src); |
| 671 | 651 | ||
| 672 | // SSE/SSE2: Compares. | 652 | // SSE/SSE2: Compares. |
| 673 | void COMISS(X64Reg regOp, OpArg arg); | 653 | void COMISS(X64Reg regOp, const OpArg& arg); |
| 674 | void COMISD(X64Reg regOp, OpArg arg); | 654 | void COMISD(X64Reg regOp, const OpArg& arg); |
| 675 | void UCOMISS(X64Reg regOp, OpArg arg); | 655 | void UCOMISS(X64Reg regOp, const OpArg& arg); |
| 676 | void UCOMISD(X64Reg regOp, OpArg arg); | 656 | void UCOMISD(X64Reg regOp, const OpArg& arg); |
| 677 | 657 | ||
| 678 | // SSE/SSE2: Moves. Use the right data type for your data, in most cases. | 658 | // SSE/SSE2: Moves. Use the right data type for your data, in most cases. |
| 679 | void MOVAPS(X64Reg regOp, OpArg arg); | 659 | void MOVAPS(X64Reg regOp, const OpArg& arg); |
| 680 | void MOVAPD(X64Reg regOp, OpArg arg); | 660 | void MOVAPD(X64Reg regOp, const OpArg& arg); |
| 681 | void MOVAPS(OpArg arg, X64Reg regOp); | 661 | void MOVAPS(const OpArg& arg, X64Reg regOp); |
| 682 | void MOVAPD(OpArg arg, X64Reg regOp); | 662 | void MOVAPD(const OpArg& arg, X64Reg regOp); |
| 683 | 663 | ||
| 684 | void MOVUPS(X64Reg regOp, OpArg arg); | 664 | void MOVUPS(X64Reg regOp, const OpArg& arg); |
| 685 | void MOVUPD(X64Reg regOp, OpArg arg); | 665 | void MOVUPD(X64Reg regOp, const OpArg& arg); |
| 686 | void MOVUPS(OpArg arg, X64Reg regOp); | 666 | void MOVUPS(const OpArg& arg, X64Reg regOp); |
| 687 | void MOVUPD(OpArg arg, X64Reg regOp); | 667 | void MOVUPD(const OpArg& arg, X64Reg regOp); |
| 688 | 668 | ||
| 689 | void MOVDQA(X64Reg regOp, OpArg arg); | 669 | void MOVDQA(X64Reg regOp, const OpArg& arg); |
| 690 | void MOVDQA(OpArg arg, X64Reg regOp); | 670 | void MOVDQA(const OpArg& arg, X64Reg regOp); |
| 691 | void MOVDQU(X64Reg regOp, OpArg arg); | 671 | void MOVDQU(X64Reg regOp, const OpArg& arg); |
| 692 | void MOVDQU(OpArg arg, X64Reg regOp); | 672 | void MOVDQU(const OpArg& arg, X64Reg regOp); |
| 693 | 673 | ||
| 694 | void MOVSS(X64Reg regOp, OpArg arg); | 674 | void MOVSS(X64Reg regOp, const OpArg& arg); |
| 695 | void MOVSD(X64Reg regOp, OpArg arg); | 675 | void MOVSD(X64Reg regOp, const OpArg& arg); |
| 696 | void MOVSS(OpArg arg, X64Reg regOp); | 676 | void MOVSS(const OpArg& arg, X64Reg regOp); |
| 697 | void MOVSD(OpArg arg, X64Reg regOp); | 677 | void MOVSD(const OpArg& arg, X64Reg regOp); |
| 698 | 678 | ||
| 699 | void MOVLPS(X64Reg regOp, OpArg arg); | 679 | void MOVLPS(X64Reg regOp, const OpArg& arg); |
| 700 | void MOVLPD(X64Reg regOp, OpArg arg); | 680 | void MOVLPD(X64Reg regOp, const OpArg& arg); |
| 701 | void MOVLPS(OpArg arg, X64Reg regOp); | 681 | void MOVLPS(const OpArg& arg, X64Reg regOp); |
| 702 | void MOVLPD(OpArg arg, X64Reg regOp); | 682 | void MOVLPD(const OpArg& arg, X64Reg regOp); |
| 703 | 683 | ||
| 704 | void MOVHPS(X64Reg regOp, OpArg arg); | 684 | void MOVHPS(X64Reg regOp, const OpArg& arg); |
| 705 | void MOVHPD(X64Reg regOp, OpArg arg); | 685 | void MOVHPD(X64Reg regOp, const OpArg& arg); |
| 706 | void MOVHPS(OpArg arg, X64Reg regOp); | 686 | void MOVHPS(const OpArg& arg, X64Reg regOp); |
| 707 | void MOVHPD(OpArg arg, X64Reg regOp); | 687 | void MOVHPD(const OpArg& arg, X64Reg regOp); |
| 708 | 688 | ||
| 709 | void MOVHLPS(X64Reg regOp1, X64Reg regOp2); | 689 | void MOVHLPS(X64Reg regOp1, X64Reg regOp2); |
| 710 | void MOVLHPS(X64Reg regOp1, X64Reg regOp2); | 690 | void MOVLHPS(X64Reg regOp1, X64Reg regOp2); |
| 711 | 691 | ||
| 712 | void MOVD_xmm(X64Reg dest, const OpArg &arg); | 692 | void MOVD_xmm(X64Reg dest, const OpArg& arg); |
| 713 | void MOVQ_xmm(X64Reg dest, OpArg arg); | 693 | void MOVQ_xmm(X64Reg dest, OpArg arg); |
| 714 | void MOVD_xmm(const OpArg &arg, X64Reg src); | 694 | void MOVD_xmm(const OpArg& arg, X64Reg src); |
| 715 | void MOVQ_xmm(OpArg arg, X64Reg src); | 695 | void MOVQ_xmm(OpArg arg, X64Reg src); |
| 716 | 696 | ||
| 717 | // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. | 697 | // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. |
| 718 | void MOVMSKPS(X64Reg dest, OpArg arg); | 698 | void MOVMSKPS(X64Reg dest, const OpArg& arg); |
| 719 | void MOVMSKPD(X64Reg dest, OpArg arg); | 699 | void MOVMSKPD(X64Reg dest, const OpArg& arg); |
| 720 | 700 | ||
| 721 | // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. | 701 | // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. |
| 722 | void MASKMOVDQU(X64Reg dest, X64Reg src); | 702 | void MASKMOVDQU(X64Reg dest, X64Reg src); |
| 723 | void LDDQU(X64Reg dest, OpArg src); | 703 | void LDDQU(X64Reg dest, const OpArg& src); |
| 724 | 704 | ||
| 725 | // SSE/SSE2: Data type conversions. | 705 | // SSE/SSE2: Data type conversions. |
| 726 | void CVTPS2PD(X64Reg dest, OpArg src); | 706 | void CVTPS2PD(X64Reg dest, const OpArg& src); |
| 727 | void CVTPD2PS(X64Reg dest, OpArg src); | 707 | void CVTPD2PS(X64Reg dest, const OpArg& src); |
| 728 | void CVTSS2SD(X64Reg dest, OpArg src); | 708 | void CVTSS2SD(X64Reg dest, const OpArg& src); |
| 729 | void CVTSI2SS(X64Reg dest, OpArg src); | 709 | void CVTSI2SS(X64Reg dest, const OpArg& src); |
| 730 | void CVTSD2SS(X64Reg dest, OpArg src); | 710 | void CVTSD2SS(X64Reg dest, const OpArg& src); |
| 731 | void CVTSI2SD(X64Reg dest, OpArg src); | 711 | void CVTSI2SD(X64Reg dest, const OpArg& src); |
| 732 | void CVTDQ2PD(X64Reg regOp, OpArg arg); | 712 | void CVTDQ2PD(X64Reg regOp, const OpArg& arg); |
| 733 | void CVTPD2DQ(X64Reg regOp, OpArg arg); | 713 | void CVTPD2DQ(X64Reg regOp, const OpArg& arg); |
| 734 | void CVTDQ2PS(X64Reg regOp, OpArg arg); | 714 | void CVTDQ2PS(X64Reg regOp, const OpArg& arg); |
| 735 | void CVTPS2DQ(X64Reg regOp, OpArg arg); | 715 | void CVTPS2DQ(X64Reg regOp, const OpArg& arg); |
| 736 | 716 | ||
| 737 | void CVTTPS2DQ(X64Reg regOp, OpArg arg); | 717 | void CVTTPS2DQ(X64Reg regOp, const OpArg& arg); |
| 738 | void CVTTPD2DQ(X64Reg regOp, OpArg arg); | 718 | void CVTTPD2DQ(X64Reg regOp, const OpArg& arg); |
| 739 | 719 | ||
| 740 | // Destinations are X64 regs (rax, rbx, ...) for these instructions. | 720 | // Destinations are X64 regs (rax, rbx, ...) for these instructions. |
| 741 | void CVTSS2SI(X64Reg xregdest, OpArg src); | 721 | void CVTSS2SI(X64Reg xregdest, const OpArg& src); |
| 742 | void CVTSD2SI(X64Reg xregdest, OpArg src); | 722 | void CVTSD2SI(X64Reg xregdest, const OpArg& src); |
| 743 | void CVTTSS2SI(X64Reg xregdest, OpArg arg); | 723 | void CVTTSS2SI(X64Reg xregdest, const OpArg& arg); |
| 744 | void CVTTSD2SI(X64Reg xregdest, OpArg arg); | 724 | void CVTTSD2SI(X64Reg xregdest, const OpArg& arg); |
| 745 | 725 | ||
| 746 | // SSE2: Packed integer instructions | 726 | // SSE2: Packed integer instructions |
| 747 | void PACKSSDW(X64Reg dest, OpArg arg); | 727 | void PACKSSDW(X64Reg dest, const OpArg& arg); |
| 748 | void PACKSSWB(X64Reg dest, OpArg arg); | 728 | void PACKSSWB(X64Reg dest, const OpArg& arg); |
| 749 | void PACKUSDW(X64Reg dest, OpArg arg); | 729 | void PACKUSDW(X64Reg dest, const OpArg& arg); |
| 750 | void PACKUSWB(X64Reg dest, OpArg arg); | 730 | void PACKUSWB(X64Reg dest, const OpArg& arg); |
| 751 | 731 | ||
| 752 | void PUNPCKLBW(X64Reg dest, const OpArg &arg); | 732 | void PUNPCKLBW(X64Reg dest, const OpArg &arg); |
| 753 | void PUNPCKLWD(X64Reg dest, const OpArg &arg); | 733 | void PUNPCKLWD(X64Reg dest, const OpArg &arg); |
| 754 | void PUNPCKLDQ(X64Reg dest, const OpArg &arg); | 734 | void PUNPCKLDQ(X64Reg dest, const OpArg &arg); |
| 755 | void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); | 735 | void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); |
| 756 | 736 | ||
| 757 | void PTEST(X64Reg dest, OpArg arg); | 737 | void PTEST(X64Reg dest, const OpArg& arg); |
| 758 | void PAND(X64Reg dest, OpArg arg); | 738 | void PAND(X64Reg dest, const OpArg& arg); |
| 759 | void PANDN(X64Reg dest, OpArg arg); | 739 | void PANDN(X64Reg dest, const OpArg& arg); |
| 760 | void PXOR(X64Reg dest, OpArg arg); | 740 | void PXOR(X64Reg dest, const OpArg& arg); |
| 761 | void POR(X64Reg dest, OpArg arg); | 741 | void POR(X64Reg dest, const OpArg& arg); |
| 762 | 742 | ||
| 763 | void PADDB(X64Reg dest, OpArg arg); | 743 | void PADDB(X64Reg dest, const OpArg& arg); |
| 764 | void PADDW(X64Reg dest, OpArg arg); | 744 | void PADDW(X64Reg dest, const OpArg& arg); |
| 765 | void PADDD(X64Reg dest, OpArg arg); | 745 | void PADDD(X64Reg dest, const OpArg& arg); |
| 766 | void PADDQ(X64Reg dest, OpArg arg); | 746 | void PADDQ(X64Reg dest, const OpArg& arg); |
| 767 | 747 | ||
| 768 | void PADDSB(X64Reg dest, OpArg arg); | 748 | void PADDSB(X64Reg dest, const OpArg& arg); |
| 769 | void PADDSW(X64Reg dest, OpArg arg); | 749 | void PADDSW(X64Reg dest, const OpArg& arg); |
| 770 | void PADDUSB(X64Reg dest, OpArg arg); | 750 | void PADDUSB(X64Reg dest, const OpArg& arg); |
| 771 | void PADDUSW(X64Reg dest, OpArg arg); | 751 | void PADDUSW(X64Reg dest, const OpArg& arg); |
| 772 | 752 | ||
| 773 | void PSUBB(X64Reg dest, OpArg arg); | 753 | void PSUBB(X64Reg dest, const OpArg& arg); |
| 774 | void PSUBW(X64Reg dest, OpArg arg); | 754 | void PSUBW(X64Reg dest, const OpArg& arg); |
| 775 | void PSUBD(X64Reg dest, OpArg arg); | 755 | void PSUBD(X64Reg dest, const OpArg& arg); |
| 776 | void PSUBQ(X64Reg dest, OpArg arg); | 756 | void PSUBQ(X64Reg dest, const OpArg& arg); |
| 777 | 757 | ||
| 778 | void PSUBSB(X64Reg dest, OpArg arg); | 758 | void PSUBSB(X64Reg dest, const OpArg& arg); |
| 779 | void PSUBSW(X64Reg dest, OpArg arg); | 759 | void PSUBSW(X64Reg dest, const OpArg& arg); |
| 780 | void PSUBUSB(X64Reg dest, OpArg arg); | 760 | void PSUBUSB(X64Reg dest, const OpArg& arg); |
| 781 | void PSUBUSW(X64Reg dest, OpArg arg); | 761 | void PSUBUSW(X64Reg dest, const OpArg& arg); |
| 782 | 762 | ||
| 783 | void PAVGB(X64Reg dest, OpArg arg); | 763 | void PAVGB(X64Reg dest, const OpArg& arg); |
| 784 | void PAVGW(X64Reg dest, OpArg arg); | 764 | void PAVGW(X64Reg dest, const OpArg& arg); |
| 785 | 765 | ||
| 786 | void PCMPEQB(X64Reg dest, OpArg arg); | 766 | void PCMPEQB(X64Reg dest, const OpArg& arg); |
| 787 | void PCMPEQW(X64Reg dest, OpArg arg); | 767 | void PCMPEQW(X64Reg dest, const OpArg& arg); |
| 788 | void PCMPEQD(X64Reg dest, OpArg arg); | 768 | void PCMPEQD(X64Reg dest, const OpArg& arg); |
| 789 | 769 | ||
| 790 | void PCMPGTB(X64Reg dest, OpArg arg); | 770 | void PCMPGTB(X64Reg dest, const OpArg& arg); |
| 791 | void PCMPGTW(X64Reg dest, OpArg arg); | 771 | void PCMPGTW(X64Reg dest, const OpArg& arg); |
| 792 | void PCMPGTD(X64Reg dest, OpArg arg); | 772 | void PCMPGTD(X64Reg dest, const OpArg& arg); |
| 793 | 773 | ||
| 794 | void PEXTRW(X64Reg dest, OpArg arg, u8 subreg); | 774 | void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg); |
| 795 | void PINSRW(X64Reg dest, OpArg arg, u8 subreg); | 775 | void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg); |
| 796 | 776 | ||
| 797 | void PMADDWD(X64Reg dest, OpArg arg); | 777 | void PMADDWD(X64Reg dest, const OpArg& arg); |
| 798 | void PSADBW(X64Reg dest, OpArg arg); | 778 | void PSADBW(X64Reg dest, const OpArg& arg); |
| 799 | 779 | ||
| 800 | void PMAXSW(X64Reg dest, OpArg arg); | 780 | void PMAXSW(X64Reg dest, const OpArg& arg); |
| 801 | void PMAXUB(X64Reg dest, OpArg arg); | 781 | void PMAXUB(X64Reg dest, const OpArg& arg); |
| 802 | void PMINSW(X64Reg dest, OpArg arg); | 782 | void PMINSW(X64Reg dest, const OpArg& arg); |
| 803 | void PMINUB(X64Reg dest, OpArg arg); | 783 | void PMINUB(X64Reg dest, const OpArg& arg); |
| 804 | // SSE4: More MAX/MIN instructions. | 784 | // SSE4: More MAX/MIN instructions. |
| 805 | void PMINSB(X64Reg dest, OpArg arg); | 785 | void PMINSB(X64Reg dest, const OpArg& arg); |
| 806 | void PMINSD(X64Reg dest, OpArg arg); | 786 | void PMINSD(X64Reg dest, const OpArg& arg); |
| 807 | void PMINUW(X64Reg dest, OpArg arg); | 787 | void PMINUW(X64Reg dest, const OpArg& arg); |
| 808 | void PMINUD(X64Reg dest, OpArg arg); | 788 | void PMINUD(X64Reg dest, const OpArg& arg); |
| 809 | void PMAXSB(X64Reg dest, OpArg arg); | 789 | void PMAXSB(X64Reg dest, const OpArg& arg); |
| 810 | void PMAXSD(X64Reg dest, OpArg arg); | 790 | void PMAXSD(X64Reg dest, const OpArg& arg); |
| 811 | void PMAXUW(X64Reg dest, OpArg arg); | 791 | void PMAXUW(X64Reg dest, const OpArg& arg); |
| 812 | void PMAXUD(X64Reg dest, OpArg arg); | 792 | void PMAXUD(X64Reg dest, const OpArg& arg); |
| 813 | 793 | ||
| 814 | void PMOVMSKB(X64Reg dest, OpArg arg); | 794 | void PMOVMSKB(X64Reg dest, const OpArg& arg); |
| 815 | void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle); | 795 | void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle); |
| 816 | void PSHUFB(X64Reg dest, OpArg arg); | 796 | void PSHUFB(X64Reg dest, const OpArg& arg); |
| 817 | 797 | ||
| 818 | void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle); | 798 | void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle); |
| 819 | void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle); | 799 | void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle); |
| 820 | 800 | ||
| 821 | void PSRLW(X64Reg reg, int shift); | 801 | void PSRLW(X64Reg reg, int shift); |
| 822 | void PSRLD(X64Reg reg, int shift); | 802 | void PSRLD(X64Reg reg, int shift); |
| 823 | void PSRLQ(X64Reg reg, int shift); | 803 | void PSRLQ(X64Reg reg, int shift); |
| 824 | void PSRLQ(X64Reg reg, OpArg arg); | 804 | void PSRLQ(X64Reg reg, const OpArg& arg); |
| 825 | void PSRLDQ(X64Reg reg, int shift); | 805 | void PSRLDQ(X64Reg reg, int shift); |
| 826 | 806 | ||
| 827 | void PSLLW(X64Reg reg, int shift); | 807 | void PSLLW(X64Reg reg, int shift); |
| @@ -833,198 +813,198 @@ public: | |||
| 833 | void PSRAD(X64Reg reg, int shift); | 813 | void PSRAD(X64Reg reg, int shift); |
| 834 | 814 | ||
| 835 | // SSE4: data type conversions | 815 | // SSE4: data type conversions |
| 836 | void PMOVSXBW(X64Reg dest, OpArg arg); | 816 | void PMOVSXBW(X64Reg dest, const OpArg& arg); |
| 837 | void PMOVSXBD(X64Reg dest, OpArg arg); | 817 | void PMOVSXBD(X64Reg dest, const OpArg& arg); |
| 838 | void PMOVSXBQ(X64Reg dest, OpArg arg); | 818 | void PMOVSXBQ(X64Reg dest, const OpArg& arg); |
| 839 | void PMOVSXWD(X64Reg dest, OpArg arg); | 819 | void PMOVSXWD(X64Reg dest, const OpArg& arg); |
| 840 | void PMOVSXWQ(X64Reg dest, OpArg arg); | 820 | void PMOVSXWQ(X64Reg dest, const OpArg& arg); |
| 841 | void PMOVSXDQ(X64Reg dest, OpArg arg); | 821 | void PMOVSXDQ(X64Reg dest, const OpArg& arg); |
| 842 | void PMOVZXBW(X64Reg dest, OpArg arg); | 822 | void PMOVZXBW(X64Reg dest, const OpArg& arg); |
| 843 | void PMOVZXBD(X64Reg dest, OpArg arg); | 823 | void PMOVZXBD(X64Reg dest, const OpArg& arg); |
| 844 | void PMOVZXBQ(X64Reg dest, OpArg arg); | 824 | void PMOVZXBQ(X64Reg dest, const OpArg& arg); |
| 845 | void PMOVZXWD(X64Reg dest, OpArg arg); | 825 | void PMOVZXWD(X64Reg dest, const OpArg& arg); |
| 846 | void PMOVZXWQ(X64Reg dest, OpArg arg); | 826 | void PMOVZXWQ(X64Reg dest, const OpArg& arg); |
| 847 | void PMOVZXDQ(X64Reg dest, OpArg arg); | 827 | void PMOVZXDQ(X64Reg dest, const OpArg& arg); |
| 848 | 828 | ||
| 849 | // SSE4: variable blend instructions (xmm0 implicit argument) | 829 | // SSE4: variable blend instructions (xmm0 implicit argument) |
| 850 | void PBLENDVB(X64Reg dest, OpArg arg); | 830 | void PBLENDVB(X64Reg dest, const OpArg& arg); |
| 851 | void BLENDVPS(X64Reg dest, OpArg arg); | 831 | void BLENDVPS(X64Reg dest, const OpArg& arg); |
| 852 | void BLENDVPD(X64Reg dest, OpArg arg); | 832 | void BLENDVPD(X64Reg dest, const OpArg& arg); |
| 853 | void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); | 833 | void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend); |
| 854 | void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); | 834 | void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend); |
| 855 | 835 | ||
| 856 | // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) | 836 | // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.) |
| 857 | void ROUNDSS(X64Reg dest, OpArg arg, u8 mode); | 837 | void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode); |
| 858 | void ROUNDSD(X64Reg dest, OpArg arg, u8 mode); | 838 | void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode); |
| 859 | void ROUNDPS(X64Reg dest, OpArg arg, u8 mode); | 839 | void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); |
| 860 | void ROUNDPD(X64Reg dest, OpArg arg, u8 mode); | 840 | void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); |
| 861 | 841 | ||
| 862 | inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } | 842 | void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } |
| 863 | inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } | 843 | void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } |
| 864 | inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); } | 844 | void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } |
| 865 | inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); } | 845 | void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } |
| 866 | 846 | ||
| 867 | inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } | 847 | void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } |
| 868 | inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } | 848 | void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } |
| 869 | inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); } | 849 | void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } |
| 870 | inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); } | 850 | void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } |
| 871 | 851 | ||
| 872 | inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } | 852 | void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } |
| 873 | inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } | 853 | void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } |
| 874 | inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); } | 854 | void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } |
| 875 | inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); } | 855 | void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } |
| 876 | 856 | ||
| 877 | inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } | 857 | void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } |
| 878 | inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } | 858 | void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } |
| 879 | inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); } | 859 | void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } |
| 880 | inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); } | 860 | void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); } |
| 881 | 861 | ||
| 882 | // AVX | 862 | // AVX |
| 883 | void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 863 | void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 884 | void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 864 | void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 885 | void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 865 | void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 886 | void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 866 | void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 887 | void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 867 | void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 888 | void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 868 | void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 889 | void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 869 | void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 890 | void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 870 | void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 891 | void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 871 | void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 892 | void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle); | 872 | void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle); |
| 893 | void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 873 | void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 894 | void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 874 | void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 895 | 875 | ||
| 896 | void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 876 | void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 897 | void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 877 | void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 898 | void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 878 | void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 899 | void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 879 | void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 900 | void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 880 | void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 901 | void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 881 | void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 902 | void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 882 | void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 903 | void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 883 | void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 904 | 884 | ||
| 905 | void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 885 | void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 906 | void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 886 | void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 907 | void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 887 | void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 908 | void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 888 | void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 909 | 889 | ||
| 910 | // FMA3 | 890 | // FMA3 |
| 911 | void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 891 | void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 912 | void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 892 | void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 913 | void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 893 | void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 914 | void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 894 | void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 915 | void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 895 | void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 916 | void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 896 | void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 917 | void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 897 | void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 918 | void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 898 | void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 919 | void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 899 | void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 920 | void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 900 | void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 921 | void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 901 | void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 922 | void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 902 | void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 923 | void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 903 | void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 924 | void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 904 | void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 925 | void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 905 | void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 926 | void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 906 | void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 927 | void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 907 | void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 928 | void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 908 | void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 929 | void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 909 | void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 930 | void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 910 | void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 931 | void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 911 | void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 932 | void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 912 | void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 933 | void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 913 | void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 934 | void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 914 | void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 935 | void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 915 | void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 936 | void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 916 | void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 937 | void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 917 | void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 938 | void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 918 | void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 939 | void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 919 | void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 940 | void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 920 | void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 941 | void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 921 | void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 942 | void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 922 | void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 943 | void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 923 | void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 944 | void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 924 | void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 945 | void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 925 | void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 946 | void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 926 | void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 947 | void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 927 | void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 948 | void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 928 | void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 949 | void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 929 | void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 950 | void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 930 | void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 951 | void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 931 | void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 952 | void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 932 | void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 953 | void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 933 | void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 954 | void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 934 | void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 955 | void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 935 | void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 956 | void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 936 | void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 957 | void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 937 | void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 958 | void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 938 | void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 959 | void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 939 | void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 960 | void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 940 | void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 961 | void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 941 | void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 962 | void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 942 | void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 963 | void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 943 | void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 964 | void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 944 | void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 965 | void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 945 | void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 966 | void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 946 | void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 967 | void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 947 | void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 968 | void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 948 | void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 969 | void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 949 | void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 970 | void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg); | 950 | void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 971 | 951 | ||
| 972 | // VEX GPR instructions | 952 | // VEX GPR instructions |
| 973 | void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 953 | void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 974 | void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 954 | void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 975 | void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 955 | void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 976 | void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); | 956 | void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate); |
| 977 | void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 957 | void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 978 | void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 958 | void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 979 | void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 959 | void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 980 | void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 960 | void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 981 | void BLSR(int bits, X64Reg regOp, OpArg arg); | 961 | void BLSR(int bits, X64Reg regOp, const OpArg& arg); |
| 982 | void BLSMSK(int bits, X64Reg regOp, OpArg arg); | 962 | void BLSMSK(int bits, X64Reg regOp, const OpArg& arg); |
| 983 | void BLSI(int bits, X64Reg regOp, OpArg arg); | 963 | void BLSI(int bits, X64Reg regOp, const OpArg& arg); |
| 984 | void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); | 964 | void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2); |
| 985 | void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); | 965 | void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg); |
| 986 | 966 | ||
| 987 | void RDTSC(); | 967 | void RDTSC(); |
| 988 | 968 | ||
| 989 | // Utility functions | 969 | // Utility functions |
| 990 | // The difference between this and CALL is that this aligns the stack | 970 | // The difference between this and CALL is that this aligns the stack |
| 991 | // where appropriate. | 971 | // where appropriate. |
| 992 | void ABI_CallFunction(const void *func); | 972 | void ABI_CallFunction(const void* func); |
| 993 | template <typename T> | 973 | template <typename T> |
| 994 | void ABI_CallFunction(T (*func)()) { | 974 | void ABI_CallFunction(T (*func)()) { |
| 995 | ABI_CallFunction((const void *)func); | 975 | ABI_CallFunction((const void*)func); |
| 996 | } | 976 | } |
| 997 | 977 | ||
| 998 | void ABI_CallFunction(const u8 *func) { | 978 | void ABI_CallFunction(const u8* func) { |
| 999 | ABI_CallFunction((const void *)func); | 979 | ABI_CallFunction((const void*)func); |
| 1000 | } | 980 | } |
| 1001 | void ABI_CallFunctionC16(const void *func, u16 param1); | 981 | void ABI_CallFunctionC16(const void* func, u16 param1); |
| 1002 | void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2); | 982 | void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); |
| 1003 | 983 | ||
| 1004 | 984 | ||
| 1005 | // These only support u32 parameters, but that's enough for a lot of uses. | 985 | // These only support u32 parameters, but that's enough for a lot of uses. |
| 1006 | // These will destroy the 1 or 2 first "parameter regs". | 986 | // These will destroy the 1 or 2 first "parameter regs". |
| 1007 | void ABI_CallFunctionC(const void *func, u32 param1); | 987 | void ABI_CallFunctionC(const void* func, u32 param1); |
| 1008 | void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2); | 988 | void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2); |
| 1009 | void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3); | 989 | void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3); |
| 1010 | void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3); | 990 | void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3); |
| 1011 | void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4); | 991 | void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4); |
| 1012 | void ABI_CallFunctionP(const void *func, void *param1); | 992 | void ABI_CallFunctionP(const void* func, void* param1); |
| 1013 | void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2); | 993 | void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2); |
| 1014 | void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3); | 994 | void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3); |
| 1015 | void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3); | 995 | void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3); |
| 1016 | void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2); | 996 | void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2); |
| 1017 | void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3); | 997 | void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3); |
| 1018 | void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1); | 998 | void ABI_CallFunctionA(const void* func, const OpArg& arg1); |
| 1019 | void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2); | 999 | void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2); |
| 1020 | 1000 | ||
| 1021 | // Pass a register as a parameter. | 1001 | // Pass a register as a parameter. |
| 1022 | void ABI_CallFunctionR(const void *func, X64Reg reg1); | 1002 | void ABI_CallFunctionR(const void* func, X64Reg reg1); |
| 1023 | void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2); | 1003 | void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2); |
| 1024 | 1004 | ||
| 1025 | template <typename Tr, typename T1> | 1005 | template <typename Tr, typename T1> |
| 1026 | void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { | 1006 | void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) { |
| 1027 | ABI_CallFunctionC((const void *)func, param1); | 1007 | ABI_CallFunctionC((const void*)func, param1); |
| 1028 | } | 1008 | } |
| 1029 | 1009 | ||
| 1030 | // A function that doesn't have any control over what it will do to regs, | 1010 | // A function that doesn't have any control over what it will do to regs, |
| @@ -1048,9 +1028,9 @@ public: | |||
| 1048 | void ABI_EmitEpilogue(int maxCallParams); | 1028 | void ABI_EmitEpilogue(int maxCallParams); |
| 1049 | 1029 | ||
| 1050 | #ifdef _M_IX86 | 1030 | #ifdef _M_IX86 |
| 1051 | inline int ABI_GetNumXMMRegs() { return 8; } | 1031 | static int ABI_GetNumXMMRegs() { return 8; } |
| 1052 | #else | 1032 | #else |
| 1053 | inline int ABI_GetNumXMMRegs() { return 16; } | 1033 | static int ABI_GetNumXMMRegs() { return 16; } |
| 1054 | #endif | 1034 | #endif |
| 1055 | }; // class XEmitter | 1035 | }; // class XEmitter |
| 1056 | 1036 | ||
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6cc60fd58..c17290b9b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -29,6 +29,7 @@ set(SRCS | |||
| 29 | hle/kernel/address_arbiter.cpp | 29 | hle/kernel/address_arbiter.cpp |
| 30 | hle/kernel/event.cpp | 30 | hle/kernel/event.cpp |
| 31 | hle/kernel/kernel.cpp | 31 | hle/kernel/kernel.cpp |
| 32 | hle/kernel/memory.cpp | ||
| 32 | hle/kernel/mutex.cpp | 33 | hle/kernel/mutex.cpp |
| 33 | hle/kernel/process.cpp | 34 | hle/kernel/process.cpp |
| 34 | hle/kernel/resource_limit.cpp | 35 | hle/kernel/resource_limit.cpp |
| @@ -115,7 +116,6 @@ set(SRCS | |||
| 115 | loader/loader.cpp | 116 | loader/loader.cpp |
| 116 | loader/ncch.cpp | 117 | loader/ncch.cpp |
| 117 | tracer/recorder.cpp | 118 | tracer/recorder.cpp |
| 118 | mem_map.cpp | ||
| 119 | memory.cpp | 119 | memory.cpp |
| 120 | settings.cpp | 120 | settings.cpp |
| 121 | system.cpp | 121 | system.cpp |
| @@ -157,6 +157,7 @@ set(HEADERS | |||
| 157 | hle/kernel/address_arbiter.h | 157 | hle/kernel/address_arbiter.h |
| 158 | hle/kernel/event.h | 158 | hle/kernel/event.h |
| 159 | hle/kernel/kernel.h | 159 | hle/kernel/kernel.h |
| 160 | hle/kernel/memory.h | ||
| 160 | hle/kernel/mutex.h | 161 | hle/kernel/mutex.h |
| 161 | hle/kernel/process.h | 162 | hle/kernel/process.h |
| 162 | hle/kernel/resource_limit.h | 163 | hle/kernel/resource_limit.h |
| @@ -245,7 +246,6 @@ set(HEADERS | |||
| 245 | loader/ncch.h | 246 | loader/ncch.h |
| 246 | tracer/recorder.h | 247 | tracer/recorder.h |
| 247 | tracer/citrace.h | 248 | tracer/citrace.h |
| 248 | mem_map.h | ||
| 249 | memory.h | 249 | memory.h |
| 250 | memory_setup.h | 250 | memory_setup.h |
| 251 | settings.h | 251 | settings.h |
diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp index ccb2eb0eb..0491717dc 100644 --- a/src/core/arm/skyeye_common/armstate.cpp +++ b/src/core/arm/skyeye_common/armstate.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/swap.h" | 5 | #include "common/swap.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/mem_map.h" | ||
| 8 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 9 | #include "core/arm/skyeye_common/armstate.h" | 8 | #include "core/arm/skyeye_common/armstate.h" |
| 10 | #include "core/arm/skyeye_common/vfp/vfp.h" | 9 | #include "core/arm/skyeye_common/vfp/vfp.h" |
diff --git a/src/core/arm/skyeye_common/armsupp.cpp b/src/core/arm/skyeye_common/armsupp.cpp index d31fb9449..883713e86 100644 --- a/src/core/arm/skyeye_common/armsupp.cpp +++ b/src/core/arm/skyeye_common/armsupp.cpp | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | 17 | ||
| 18 | #include "common/logging/log.h" | 18 | #include "common/logging/log.h" |
| 19 | 19 | ||
| 20 | #include "core/mem_map.h" | ||
| 21 | #include "core/arm/skyeye_common/arm_regformat.h" | 20 | #include "core/arm/skyeye_common/arm_regformat.h" |
| 22 | #include "core/arm/skyeye_common/armstate.h" | 21 | #include "core/arm/skyeye_common/armstate.h" |
| 23 | #include "core/arm/skyeye_common/armsupp.h" | 22 | #include "core/arm/skyeye_common/armsupp.h" |
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp index aea936d2d..b1a72dc0c 100644 --- a/src/core/hle/config_mem.cpp +++ b/src/core/hle/config_mem.cpp | |||
| @@ -25,10 +25,6 @@ void Init() { | |||
| 25 | config_mem.sys_core_ver = 0x2; | 25 | config_mem.sys_core_ver = 0x2; |
| 26 | config_mem.unit_info = 0x1; // Bit 0 set for Retail | 26 | config_mem.unit_info = 0x1; // Bit 0 set for Retail |
| 27 | config_mem.prev_firm = 0; | 27 | config_mem.prev_firm = 0; |
| 28 | config_mem.app_mem_type = 0x2; // Default app mem type is 0 | ||
| 29 | config_mem.app_mem_alloc = 0x06000000; // Set to 96MB, since some games use more than the default (64MB) | ||
| 30 | config_mem.base_mem_alloc = 0x01400000; // Default base memory is 20MB | ||
| 31 | config_mem.sys_mem_alloc = Memory::FCRAM_SIZE - (config_mem.app_mem_alloc + config_mem.base_mem_alloc); | ||
| 32 | config_mem.firm_unk = 0; | 28 | config_mem.firm_unk = 0; |
| 33 | config_mem.firm_version_rev = 0; | 29 | config_mem.firm_version_rev = 0; |
| 34 | config_mem.firm_version_min = 0x40; | 30 | config_mem.firm_version_min = 0x40; |
| @@ -36,7 +32,4 @@ void Init() { | |||
| 36 | config_mem.firm_sys_core_ver = 0x2; | 32 | config_mem.firm_sys_core_ver = 0x2; |
| 37 | } | 33 | } |
| 38 | 34 | ||
| 39 | void Shutdown() { | ||
| 40 | } | ||
| 41 | |||
| 42 | } // namespace | 35 | } // namespace |
diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h index 9825a09e8..24a1254f2 100644 --- a/src/core/hle/config_mem.h +++ b/src/core/hle/config_mem.h | |||
| @@ -52,6 +52,5 @@ static_assert(sizeof(ConfigMemDef) == Memory::CONFIG_MEMORY_SIZE, "Config Memory | |||
| 52 | extern ConfigMemDef config_mem; | 52 | extern ConfigMemDef config_mem; |
| 53 | 53 | ||
| 54 | void Init(); | 54 | void Init(); |
| 55 | void Shutdown(); | ||
| 56 | 55 | ||
| 57 | } // namespace | 56 | } // namespace |
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h index 1a0518926..5846a161b 100644 --- a/src/core/hle/function_wrappers.h +++ b/src/core/hle/function_wrappers.h | |||
| @@ -172,6 +172,14 @@ template<ResultCode func(u32, s64, s64)> void Wrap() { | |||
| 172 | FuncReturn(func(PARAM(0), param1, param2).raw); | 172 | FuncReturn(func(PARAM(0), param1, param2).raw); |
| 173 | } | 173 | } |
| 174 | 174 | ||
| 175 | template<ResultCode func(s64*, Handle, u32)> void Wrap() { | ||
| 176 | s64 param_1 = 0; | ||
| 177 | u32 retval = func(¶m_1, PARAM(1), PARAM(2)).raw; | ||
| 178 | Core::g_app_core->SetReg(1, (u32)param_1); | ||
| 179 | Core::g_app_core->SetReg(2, (u32)(param_1 >> 32)); | ||
| 180 | FuncReturn(retval); | ||
| 181 | } | ||
| 182 | |||
| 175 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 183 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 176 | // Function wrappers that return type u32 | 184 | // Function wrappers that return type u32 |
| 177 | 185 | ||
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index cd0a400dc..331b1b22a 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp | |||
| @@ -34,8 +34,6 @@ void Reschedule(const char *reason) { | |||
| 34 | 34 | ||
| 35 | void Init() { | 35 | void Init() { |
| 36 | Service::Init(); | 36 | Service::Init(); |
| 37 | ConfigMem::Init(); | ||
| 38 | SharedPage::Init(); | ||
| 39 | 37 | ||
| 40 | g_reschedule = false; | 38 | g_reschedule = false; |
| 41 | 39 | ||
| @@ -43,8 +41,6 @@ void Init() { | |||
| 43 | } | 41 | } |
| 44 | 42 | ||
| 45 | void Shutdown() { | 43 | void Shutdown() { |
| 46 | ConfigMem::Shutdown(); | ||
| 47 | SharedPage::Shutdown(); | ||
| 48 | Service::Shutdown(); | 44 | Service::Shutdown(); |
| 49 | 45 | ||
| 50 | LOG_DEBUG(Kernel, "shutdown OK"); | 46 | LOG_DEBUG(Kernel, "shutdown OK"); |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 5711c0405..7a401a965 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -7,11 +7,14 @@ | |||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | 9 | ||
| 10 | #include "core/hle/config_mem.h" | ||
| 10 | #include "core/hle/kernel/kernel.h" | 11 | #include "core/hle/kernel/kernel.h" |
| 11 | #include "core/hle/kernel/resource_limit.h" | 12 | #include "core/hle/kernel/memory.h" |
| 12 | #include "core/hle/kernel/process.h" | 13 | #include "core/hle/kernel/process.h" |
| 14 | #include "core/hle/kernel/resource_limit.h" | ||
| 13 | #include "core/hle/kernel/thread.h" | 15 | #include "core/hle/kernel/thread.h" |
| 14 | #include "core/hle/kernel/timer.h" | 16 | #include "core/hle/kernel/timer.h" |
| 17 | #include "core/hle/shared_page.h" | ||
| 15 | 18 | ||
| 16 | namespace Kernel { | 19 | namespace Kernel { |
| 17 | 20 | ||
| @@ -119,6 +122,13 @@ void HandleTable::Clear() { | |||
| 119 | 122 | ||
| 120 | /// Initialize the kernel | 123 | /// Initialize the kernel |
| 121 | void Init() { | 124 | void Init() { |
| 125 | ConfigMem::Init(); | ||
| 126 | SharedPage::Init(); | ||
| 127 | |||
| 128 | // TODO(yuriks): The memory type parameter needs to be determined by the ExHeader field instead | ||
| 129 | // For now it defaults to the one with a largest allocation to the app | ||
| 130 | Kernel::MemoryInit(2); // Allocates 96MB to the application | ||
| 131 | |||
| 122 | Kernel::ResourceLimitsInit(); | 132 | Kernel::ResourceLimitsInit(); |
| 123 | Kernel::ThreadingInit(); | 133 | Kernel::ThreadingInit(); |
| 124 | Kernel::TimersInit(); | 134 | Kernel::TimersInit(); |
| @@ -131,11 +141,14 @@ void Init() { | |||
| 131 | 141 | ||
| 132 | /// Shutdown the kernel | 142 | /// Shutdown the kernel |
| 133 | void Shutdown() { | 143 | void Shutdown() { |
| 144 | g_handle_table.Clear(); // Free all kernel objects | ||
| 145 | |||
| 134 | Kernel::ThreadingShutdown(); | 146 | Kernel::ThreadingShutdown(); |
| 147 | g_current_process = nullptr; | ||
| 148 | |||
| 135 | Kernel::TimersShutdown(); | 149 | Kernel::TimersShutdown(); |
| 136 | Kernel::ResourceLimitsShutdown(); | 150 | Kernel::ResourceLimitsShutdown(); |
| 137 | g_handle_table.Clear(); // Free all kernel objects | 151 | Kernel::MemoryShutdown(); |
| 138 | g_current_process = nullptr; | ||
| 139 | } | 152 | } |
| 140 | 153 | ||
| 141 | } // namespace | 154 | } // namespace |
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp new file mode 100644 index 000000000..e4fc5f3c4 --- /dev/null +++ b/src/core/hle/kernel/memory.cpp | |||
| @@ -0,0 +1,136 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <memory> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | |||
| 13 | #include "core/hle/config_mem.h" | ||
| 14 | #include "core/hle/kernel/memory.h" | ||
| 15 | #include "core/hle/kernel/vm_manager.h" | ||
| 16 | #include "core/hle/result.h" | ||
| 17 | #include "core/hle/shared_page.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | #include "core/memory_setup.h" | ||
| 20 | |||
| 21 | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 22 | |||
| 23 | namespace Kernel { | ||
| 24 | |||
| 25 | static MemoryRegionInfo memory_regions[3]; | ||
| 26 | |||
| 27 | /// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each sytem | ||
| 28 | /// memory configuration type. | ||
| 29 | static const u32 memory_region_sizes[8][3] = { | ||
| 30 | // Old 3DS layouts | ||
| 31 | {0x04000000, 0x02C00000, 0x01400000}, // 0 | ||
| 32 | { /* This appears to be unused. */ }, // 1 | ||
| 33 | {0x06000000, 0x00C00000, 0x01400000}, // 2 | ||
| 34 | {0x05000000, 0x01C00000, 0x01400000}, // 3 | ||
| 35 | {0x04800000, 0x02400000, 0x01400000}, // 4 | ||
| 36 | {0x02000000, 0x04C00000, 0x01400000}, // 5 | ||
| 37 | |||
| 38 | // New 3DS layouts | ||
| 39 | {0x07C00000, 0x06400000, 0x02000000}, // 6 | ||
| 40 | {0x0B200000, 0x02E00000, 0x02000000}, // 7 | ||
| 41 | }; | ||
| 42 | |||
| 43 | void MemoryInit(u32 mem_type) { | ||
| 44 | // TODO(yuriks): On the n3DS, all o3DS configurations (<=5) are forced to 6 instead. | ||
| 45 | ASSERT_MSG(mem_type <= 5, "New 3DS memory configuration aren't supported yet!"); | ||
| 46 | ASSERT(mem_type != 1); | ||
| 47 | |||
| 48 | // The kernel allocation regions (APPLICATION, SYSTEM and BASE) are laid out in sequence, with | ||
| 49 | // the sizes specified in the memory_region_sizes table. | ||
| 50 | VAddr base = 0; | ||
| 51 | for (int i = 0; i < 3; ++i) { | ||
| 52 | memory_regions[i].base = base; | ||
| 53 | memory_regions[i].size = memory_region_sizes[mem_type][i]; | ||
| 54 | memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>(); | ||
| 55 | |||
| 56 | base += memory_regions[i].size; | ||
| 57 | } | ||
| 58 | |||
| 59 | // We must've allocated the entire FCRAM by the end | ||
| 60 | ASSERT(base == Memory::FCRAM_SIZE); | ||
| 61 | |||
| 62 | using ConfigMem::config_mem; | ||
| 63 | config_mem.app_mem_type = mem_type; | ||
| 64 | // app_mem_malloc does not always match the configured size for memory_region[0]: in case the | ||
| 65 | // n3DS type override is in effect it reports the size the game expects, not the real one. | ||
| 66 | config_mem.app_mem_alloc = memory_region_sizes[mem_type][0]; | ||
| 67 | config_mem.sys_mem_alloc = memory_regions[1].size; | ||
| 68 | config_mem.base_mem_alloc = memory_regions[2].size; | ||
| 69 | } | ||
| 70 | |||
| 71 | void MemoryShutdown() { | ||
| 72 | for (auto& region : memory_regions) { | ||
| 73 | region.base = 0; | ||
| 74 | region.size = 0; | ||
| 75 | region.linear_heap_memory = nullptr; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) { | ||
| 80 | switch (region) { | ||
| 81 | case MemoryRegion::APPLICATION: | ||
| 82 | return &memory_regions[0]; | ||
| 83 | case MemoryRegion::SYSTEM: | ||
| 84 | return &memory_regions[1]; | ||
| 85 | case MemoryRegion::BASE: | ||
| 86 | return &memory_regions[2]; | ||
| 87 | default: | ||
| 88 | UNREACHABLE(); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | } | ||
| 93 | |||
| 94 | namespace Memory { | ||
| 95 | |||
| 96 | namespace { | ||
| 97 | |||
| 98 | struct MemoryArea { | ||
| 99 | u32 base; | ||
| 100 | u32 size; | ||
| 101 | const char* name; | ||
| 102 | }; | ||
| 103 | |||
| 104 | // We don't declare the IO regions in here since its handled by other means. | ||
| 105 | static MemoryArea memory_areas[] = { | ||
| 106 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory | ||
| 107 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) | ||
| 108 | {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory | ||
| 109 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory | ||
| 110 | }; | ||
| 111 | |||
| 112 | } | ||
| 113 | |||
| 114 | void Init() { | ||
| 115 | InitMemoryMap(); | ||
| 116 | LOG_DEBUG(HW_Memory, "initialized OK"); | ||
| 117 | } | ||
| 118 | |||
| 119 | void InitLegacyAddressSpace(Kernel::VMManager& address_space) { | ||
| 120 | using namespace Kernel; | ||
| 121 | |||
| 122 | for (MemoryArea& area : memory_areas) { | ||
| 123 | auto block = std::make_shared<std::vector<u8>>(area.size); | ||
| 124 | address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap(); | ||
| 125 | } | ||
| 126 | |||
| 127 | auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR, | ||
| 128 | (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom(); | ||
| 129 | address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); | ||
| 130 | |||
| 131 | auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, | ||
| 132 | (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); | ||
| 133 | address_space.Reprotect(shared_page_vma, VMAPermission::Read); | ||
| 134 | } | ||
| 135 | |||
| 136 | } // namespace | ||
diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h new file mode 100644 index 000000000..36690b091 --- /dev/null +++ b/src/core/hle/kernel/memory.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | #include "core/hle/kernel/process.h" | ||
| 12 | |||
| 13 | namespace Kernel { | ||
| 14 | |||
| 15 | class VMManager; | ||
| 16 | |||
| 17 | struct MemoryRegionInfo { | ||
| 18 | u32 base; // Not an address, but offset from start of FCRAM | ||
| 19 | u32 size; | ||
| 20 | |||
| 21 | std::shared_ptr<std::vector<u8>> linear_heap_memory; | ||
| 22 | }; | ||
| 23 | |||
| 24 | void MemoryInit(u32 mem_type); | ||
| 25 | void MemoryShutdown(); | ||
| 26 | MemoryRegionInfo* GetMemoryRegion(MemoryRegion region); | ||
| 27 | |||
| 28 | } | ||
| 29 | |||
| 30 | namespace Memory { | ||
| 31 | |||
| 32 | void Init(); | ||
| 33 | void InitLegacyAddressSpace(Kernel::VMManager& address_space); | ||
| 34 | |||
| 35 | } // namespace | ||
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index a7892c652..124047a53 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -7,11 +7,11 @@ | |||
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "common/make_unique.h" | 8 | #include "common/make_unique.h" |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/memory.h" | ||
| 10 | #include "core/hle/kernel/process.h" | 11 | #include "core/hle/kernel/process.h" |
| 11 | #include "core/hle/kernel/resource_limit.h" | 12 | #include "core/hle/kernel/resource_limit.h" |
| 12 | #include "core/hle/kernel/thread.h" | 13 | #include "core/hle/kernel/thread.h" |
| 13 | #include "core/hle/kernel/vm_manager.h" | 14 | #include "core/hle/kernel/vm_manager.h" |
| 14 | #include "core/mem_map.h" | ||
| 15 | #include "core/memory.h" | 15 | #include "core/memory.h" |
| 16 | 16 | ||
| 17 | namespace Kernel { | 17 | namespace Kernel { |
| @@ -36,8 +36,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) { | |||
| 36 | process->codeset = std::move(code_set); | 36 | process->codeset = std::move(code_set); |
| 37 | process->flags.raw = 0; | 37 | process->flags.raw = 0; |
| 38 | process->flags.memory_region = MemoryRegion::APPLICATION; | 38 | process->flags.memory_region = MemoryRegion::APPLICATION; |
| 39 | process->address_space = Common::make_unique<VMManager>(); | 39 | Memory::InitLegacyAddressSpace(process->vm_manager); |
| 40 | Memory::InitLegacyAddressSpace(*process->address_space); | ||
| 41 | 40 | ||
| 42 | return process; | 41 | return process; |
| 43 | } | 42 | } |
| @@ -93,9 +92,11 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) { | |||
| 93 | mapping.unk_flag = false; | 92 | mapping.unk_flag = false; |
| 94 | } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF | 93 | } else if ((type & 0xFE0) == 0xFC0) { // 0x01FF |
| 95 | // Kernel version | 94 | // Kernel version |
| 96 | int minor = descriptor & 0xFF; | 95 | kernel_version = descriptor & 0xFFFF; |
| 97 | int major = (descriptor >> 8) & 0xFF; | 96 | |
| 98 | LOG_INFO(Loader, "ExHeader kernel version ignored: %d.%d", major, minor); | 97 | int minor = kernel_version & 0xFF; |
| 98 | int major = (kernel_version >> 8) & 0xFF; | ||
| 99 | LOG_INFO(Loader, "ExHeader kernel version: %d.%d", major, minor); | ||
| 99 | } else { | 100 | } else { |
| 100 | LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor); | 101 | LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor); |
| 101 | } | 102 | } |
| @@ -103,20 +104,153 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) { | |||
| 103 | } | 104 | } |
| 104 | 105 | ||
| 105 | void Process::Run(s32 main_thread_priority, u32 stack_size) { | 106 | void Process::Run(s32 main_thread_priority, u32 stack_size) { |
| 107 | memory_region = GetMemoryRegion(flags.memory_region); | ||
| 108 | |||
| 106 | auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { | 109 | auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { |
| 107 | auto vma = address_space->MapMemoryBlock(segment.addr, codeset->memory, | 110 | auto vma = vm_manager.MapMemoryBlock(segment.addr, codeset->memory, |
| 108 | segment.offset, segment.size, memory_state).Unwrap(); | 111 | segment.offset, segment.size, memory_state).Unwrap(); |
| 109 | address_space->Reprotect(vma, permissions); | 112 | vm_manager.Reprotect(vma, permissions); |
| 113 | misc_memory_used += segment.size; | ||
| 110 | }; | 114 | }; |
| 111 | 115 | ||
| 116 | // Map CodeSet segments | ||
| 112 | MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code); | 117 | MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code); |
| 113 | MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code); | 118 | MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code); |
| 114 | MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private); | 119 | MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private); |
| 115 | 120 | ||
| 116 | address_space->LogLayout(); | 121 | // Allocate and map stack |
| 122 | vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size, | ||
| 123 | std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked | ||
| 124 | ).Unwrap(); | ||
| 125 | misc_memory_used += stack_size; | ||
| 126 | |||
| 127 | vm_manager.LogLayout(Log::Level::Debug); | ||
| 117 | Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority); | 128 | Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority); |
| 118 | } | 129 | } |
| 119 | 130 | ||
| 131 | VAddr Process::GetLinearHeapBase() const { | ||
| 132 | return (kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_SIZE) | ||
| 133 | + memory_region->base; | ||
| 134 | } | ||
| 135 | |||
| 136 | VAddr Process::GetLinearHeapLimit() const { | ||
| 137 | return GetLinearHeapBase() + memory_region->size; | ||
| 138 | } | ||
| 139 | |||
| 140 | ResultVal<VAddr> Process::HeapAllocate(VAddr target, u32 size, VMAPermission perms) { | ||
| 141 | if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) { | ||
| 142 | return ERR_INVALID_ADDRESS; | ||
| 143 | } | ||
| 144 | |||
| 145 | if (heap_memory == nullptr) { | ||
| 146 | // Initialize heap | ||
| 147 | heap_memory = std::make_shared<std::vector<u8>>(); | ||
| 148 | heap_start = heap_end = target; | ||
| 149 | } | ||
| 150 | |||
| 151 | // If necessary, expand backing vector to cover new heap extents. | ||
| 152 | if (target < heap_start) { | ||
| 153 | heap_memory->insert(begin(*heap_memory), heap_start - target, 0); | ||
| 154 | heap_start = target; | ||
| 155 | vm_manager.RefreshMemoryBlockMappings(heap_memory.get()); | ||
| 156 | } | ||
| 157 | if (target + size > heap_end) { | ||
| 158 | heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0); | ||
| 159 | heap_end = target + size; | ||
| 160 | vm_manager.RefreshMemoryBlockMappings(heap_memory.get()); | ||
| 161 | } | ||
| 162 | ASSERT(heap_end - heap_start == heap_memory->size()); | ||
| 163 | |||
| 164 | CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, heap_memory, target - heap_start, size, MemoryState::Private)); | ||
| 165 | vm_manager.Reprotect(vma, perms); | ||
| 166 | |||
| 167 | heap_used += size; | ||
| 168 | |||
| 169 | return MakeResult<VAddr>(heap_end - size); | ||
| 170 | } | ||
| 171 | |||
| 172 | ResultCode Process::HeapFree(VAddr target, u32 size) { | ||
| 173 | if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) { | ||
| 174 | return ERR_INVALID_ADDRESS; | ||
| 175 | } | ||
| 176 | |||
| 177 | ResultCode result = vm_manager.UnmapRange(target, size); | ||
| 178 | if (result.IsError()) return result; | ||
| 179 | |||
| 180 | heap_used -= size; | ||
| 181 | |||
| 182 | return RESULT_SUCCESS; | ||
| 183 | } | ||
| 184 | |||
| 185 | ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission perms) { | ||
| 186 | auto& linheap_memory = memory_region->linear_heap_memory; | ||
| 187 | |||
| 188 | VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size(); | ||
| 189 | // Games and homebrew only ever seem to pass 0 here (which lets the kernel decide the address), | ||
| 190 | // but explicit addresses are also accepted and respected. | ||
| 191 | if (target == 0) { | ||
| 192 | target = heap_end; | ||
| 193 | } | ||
| 194 | |||
| 195 | if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() || | ||
| 196 | target > heap_end || target + size < target) { | ||
| 197 | |||
| 198 | return ERR_INVALID_ADDRESS; | ||
| 199 | } | ||
| 200 | |||
| 201 | // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its | ||
| 202 | // end. It's possible to free gaps in the middle of the heap and then reallocate them later, | ||
| 203 | // but expansions are only allowed at the end. | ||
| 204 | if (target == heap_end) { | ||
| 205 | linheap_memory->insert(linheap_memory->end(), size, 0); | ||
| 206 | vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); | ||
| 207 | } | ||
| 208 | |||
| 209 | // TODO(yuriks): As is, this lets processes map memory allocated by other processes from the | ||
| 210 | // same region. It is unknown if or how the 3DS kernel checks against this. | ||
| 211 | size_t offset = target - GetLinearHeapBase(); | ||
| 212 | CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, linheap_memory, offset, size, MemoryState::Continuous)); | ||
| 213 | vm_manager.Reprotect(vma, perms); | ||
| 214 | |||
| 215 | linear_heap_used += size; | ||
| 216 | |||
| 217 | return MakeResult<VAddr>(target); | ||
| 218 | } | ||
| 219 | |||
| 220 | ResultCode Process::LinearFree(VAddr target, u32 size) { | ||
| 221 | auto& linheap_memory = memory_region->linear_heap_memory; | ||
| 222 | |||
| 223 | if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() || | ||
| 224 | target + size < target) { | ||
| 225 | |||
| 226 | return ERR_INVALID_ADDRESS; | ||
| 227 | } | ||
| 228 | |||
| 229 | VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size(); | ||
| 230 | if (target + size > heap_end) { | ||
| 231 | return ERR_INVALID_ADDRESS_STATE; | ||
| 232 | } | ||
| 233 | |||
| 234 | ResultCode result = vm_manager.UnmapRange(target, size); | ||
| 235 | if (result.IsError()) return result; | ||
| 236 | |||
| 237 | linear_heap_used -= size; | ||
| 238 | |||
| 239 | if (target + size == heap_end) { | ||
| 240 | // End of linear heap has been freed, so check what's the last allocated block in it and | ||
| 241 | // reduce the size. | ||
| 242 | auto vma = vm_manager.FindVMA(target); | ||
| 243 | ASSERT(vma != vm_manager.vma_map.end()); | ||
| 244 | ASSERT(vma->second.type == VMAType::Free); | ||
| 245 | VAddr new_end = vma->second.base; | ||
| 246 | if (new_end >= GetLinearHeapBase()) { | ||
| 247 | linheap_memory->resize(new_end - GetLinearHeapBase()); | ||
| 248 | } | ||
| 249 | } | ||
| 250 | |||
| 251 | return RESULT_SUCCESS; | ||
| 252 | } | ||
| 253 | |||
| 120 | Kernel::Process::Process() {} | 254 | Kernel::Process::Process() {} |
| 121 | Kernel::Process::~Process() {} | 255 | Kernel::Process::~Process() {} |
| 122 | 256 | ||
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83d3aceae..60e17f251 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | 16 | ||
| 17 | #include "core/hle/kernel/kernel.h" | 17 | #include "core/hle/kernel/kernel.h" |
| 18 | #include "core/hle/kernel/vm_manager.h" | ||
| 18 | 19 | ||
| 19 | namespace Kernel { | 20 | namespace Kernel { |
| 20 | 21 | ||
| @@ -48,7 +49,7 @@ union ProcessFlags { | |||
| 48 | }; | 49 | }; |
| 49 | 50 | ||
| 50 | class ResourceLimit; | 51 | class ResourceLimit; |
| 51 | class VMManager; | 52 | struct MemoryRegionInfo; |
| 52 | 53 | ||
| 53 | struct CodeSet final : public Object { | 54 | struct CodeSet final : public Object { |
| 54 | static SharedPtr<CodeSet> Create(std::string name, u64 program_id); | 55 | static SharedPtr<CodeSet> Create(std::string name, u64 program_id); |
| @@ -104,14 +105,12 @@ public: | |||
| 104 | /// processes access to specific I/O regions and device memory. | 105 | /// processes access to specific I/O regions and device memory. |
| 105 | boost::container::static_vector<AddressMapping, 8> address_mappings; | 106 | boost::container::static_vector<AddressMapping, 8> address_mappings; |
| 106 | ProcessFlags flags; | 107 | ProcessFlags flags; |
| 108 | /// Kernel compatibility version for this process | ||
| 109 | u16 kernel_version = 0; | ||
| 107 | 110 | ||
| 108 | /// The id of this process | 111 | /// The id of this process |
| 109 | u32 process_id = next_process_id++; | 112 | u32 process_id = next_process_id++; |
| 110 | 113 | ||
| 111 | /// Bitmask of the used TLS slots | ||
| 112 | std::bitset<300> used_tls_slots; | ||
| 113 | std::unique_ptr<VMManager> address_space; | ||
| 114 | |||
| 115 | /** | 114 | /** |
| 116 | * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them | 115 | * Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them |
| 117 | * to this process. | 116 | * to this process. |
| @@ -123,6 +122,36 @@ public: | |||
| 123 | */ | 122 | */ |
| 124 | void Run(s32 main_thread_priority, u32 stack_size); | 123 | void Run(s32 main_thread_priority, u32 stack_size); |
| 125 | 124 | ||
| 125 | |||
| 126 | /////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 127 | // Memory Management | ||
| 128 | |||
| 129 | VMManager vm_manager; | ||
| 130 | |||
| 131 | // Memory used to back the allocations in the regular heap. A single vector is used to cover | ||
| 132 | // the entire virtual address space extents that bound the allocations, including any holes. | ||
| 133 | // This makes deallocation and reallocation of holes fast and keeps process memory contiguous | ||
| 134 | // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. | ||
| 135 | std::shared_ptr<std::vector<u8>> heap_memory; | ||
| 136 | // The left/right bounds of the address space covered by heap_memory. | ||
| 137 | VAddr heap_start = 0, heap_end = 0; | ||
| 138 | |||
| 139 | u32 heap_used = 0, linear_heap_used = 0, misc_memory_used = 0; | ||
| 140 | |||
| 141 | MemoryRegionInfo* memory_region = nullptr; | ||
| 142 | |||
| 143 | /// Bitmask of the used TLS slots | ||
| 144 | std::bitset<300> used_tls_slots; | ||
| 145 | |||
| 146 | VAddr GetLinearHeapBase() const; | ||
| 147 | VAddr GetLinearHeapLimit() const; | ||
| 148 | |||
| 149 | ResultVal<VAddr> HeapAllocate(VAddr target, u32 size, VMAPermission perms); | ||
| 150 | ResultCode HeapFree(VAddr target, u32 size); | ||
| 151 | |||
| 152 | ResultVal<VAddr> LinearAllocate(VAddr target, u32 size, VMAPermission perms); | ||
| 153 | ResultCode LinearFree(VAddr target, u32 size); | ||
| 154 | |||
| 126 | private: | 155 | private: |
| 127 | Process(); | 156 | Process(); |
| 128 | ~Process() override; | 157 | ~Process() override; |
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index 94b3e3298..67dde08c2 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | 8 | ||
| 9 | #include "core/mem_map.h" | ||
| 10 | #include "core/hle/kernel/resource_limit.h" | 9 | #include "core/hle/kernel/resource_limit.h" |
| 11 | 10 | ||
| 12 | namespace Kernel { | 11 | namespace Kernel { |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 29ea6d531..c10126513 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -117,6 +117,7 @@ void Thread::Stop() { | |||
| 117 | wait_objects.clear(); | 117 | wait_objects.clear(); |
| 118 | 118 | ||
| 119 | Kernel::g_current_process->used_tls_slots[tls_index] = false; | 119 | Kernel::g_current_process->used_tls_slots[tls_index] = false; |
| 120 | g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE; | ||
| 120 | 121 | ||
| 121 | HLE::Reschedule(__func__); | 122 | HLE::Reschedule(__func__); |
| 122 | } | 123 | } |
| @@ -414,6 +415,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, | |||
| 414 | } | 415 | } |
| 415 | 416 | ||
| 416 | ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); | 417 | ASSERT_MSG(thread->tls_index != -1, "Out of TLS space"); |
| 418 | g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE; | ||
| 417 | 419 | ||
| 418 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used | 420 | // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used |
| 419 | // to initialize the context | 421 | // to initialize the context |
| @@ -504,7 +506,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { | |||
| 504 | } | 506 | } |
| 505 | 507 | ||
| 506 | VAddr Thread::GetTLSAddress() const { | 508 | VAddr Thread::GetTLSAddress() const { |
| 507 | return Memory::TLS_AREA_VADDR + tls_index * 0x200; | 509 | return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE; |
| 508 | } | 510 | } |
| 509 | 511 | ||
| 510 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 512 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 205cc7b53..2610acf76 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -11,6 +11,15 @@ | |||
| 11 | 11 | ||
| 12 | namespace Kernel { | 12 | namespace Kernel { |
| 13 | 13 | ||
| 14 | static const char* GetMemoryStateName(MemoryState state) { | ||
| 15 | static const char* names[] = { | ||
| 16 | "Free", "Reserved", "IO", "Static", "Code", "Private", "Shared", "Continuous", "Aliased", | ||
| 17 | "Alias", "AliasCode", "Locked", | ||
| 18 | }; | ||
| 19 | |||
| 20 | return names[(int)state]; | ||
| 21 | } | ||
| 22 | |||
| 14 | bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | 23 | bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { |
| 15 | ASSERT(base + size == next.base); | 24 | ASSERT(base + size == next.base); |
| 16 | if (permissions != next.permissions || | 25 | if (permissions != next.permissions || |
| @@ -51,11 +60,15 @@ void VMManager::Reset() { | |||
| 51 | } | 60 | } |
| 52 | 61 | ||
| 53 | VMManager::VMAHandle VMManager::FindVMA(VAddr target) const { | 62 | VMManager::VMAHandle VMManager::FindVMA(VAddr target) const { |
| 54 | return std::prev(vma_map.upper_bound(target)); | 63 | if (target >= MAX_ADDRESS) { |
| 64 | return vma_map.end(); | ||
| 65 | } else { | ||
| 66 | return std::prev(vma_map.upper_bound(target)); | ||
| 67 | } | ||
| 55 | } | 68 | } |
| 56 | 69 | ||
| 57 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | 70 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, |
| 58 | std::shared_ptr<std::vector<u8>> block, u32 offset, u32 size, MemoryState state) { | 71 | std::shared_ptr<std::vector<u8>> block, size_t offset, u32 size, MemoryState state) { |
| 59 | ASSERT(block != nullptr); | 72 | ASSERT(block != nullptr); |
| 60 | ASSERT(offset + size <= block->size()); | 73 | ASSERT(offset + size <= block->size()); |
| 61 | 74 | ||
| @@ -106,10 +119,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u3 | |||
| 106 | return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); | 119 | return MakeResult<VMAHandle>(MergeAdjacent(vma_handle)); |
| 107 | } | 120 | } |
| 108 | 121 | ||
| 109 | void VMManager::Unmap(VMAHandle vma_handle) { | 122 | VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) { |
| 110 | VMAIter iter = StripIterConstness(vma_handle); | 123 | VirtualMemoryArea& vma = vma_handle->second; |
| 111 | |||
| 112 | VirtualMemoryArea& vma = iter->second; | ||
| 113 | vma.type = VMAType::Free; | 124 | vma.type = VMAType::Free; |
| 114 | vma.permissions = VMAPermission::None; | 125 | vma.permissions = VMAPermission::None; |
| 115 | vma.meminfo_state = MemoryState::Free; | 126 | vma.meminfo_state = MemoryState::Free; |
| @@ -121,26 +132,67 @@ void VMManager::Unmap(VMAHandle vma_handle) { | |||
| 121 | 132 | ||
| 122 | UpdatePageTableForVMA(vma); | 133 | UpdatePageTableForVMA(vma); |
| 123 | 134 | ||
| 124 | MergeAdjacent(iter); | 135 | return MergeAdjacent(vma_handle); |
| 136 | } | ||
| 137 | |||
| 138 | ResultCode VMManager::UnmapRange(VAddr target, u32 size) { | ||
| 139 | CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); | ||
| 140 | VAddr target_end = target + size; | ||
| 141 | |||
| 142 | VMAIter end = vma_map.end(); | ||
| 143 | // The comparison against the end of the range must be done using addresses since VMAs can be | ||
| 144 | // merged during this process, causing invalidation of the iterators. | ||
| 145 | while (vma != end && vma->second.base < target_end) { | ||
| 146 | vma = std::next(Unmap(vma)); | ||
| 147 | } | ||
| 148 | |||
| 149 | ASSERT(FindVMA(target)->second.size >= size); | ||
| 150 | return RESULT_SUCCESS; | ||
| 125 | } | 151 | } |
| 126 | 152 | ||
| 127 | void VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { | 153 | VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) { |
| 128 | VMAIter iter = StripIterConstness(vma_handle); | 154 | VMAIter iter = StripIterConstness(vma_handle); |
| 129 | 155 | ||
| 130 | VirtualMemoryArea& vma = iter->second; | 156 | VirtualMemoryArea& vma = iter->second; |
| 131 | vma.permissions = new_perms; | 157 | vma.permissions = new_perms; |
| 132 | UpdatePageTableForVMA(vma); | 158 | UpdatePageTableForVMA(vma); |
| 133 | 159 | ||
| 134 | MergeAdjacent(iter); | 160 | return MergeAdjacent(iter); |
| 161 | } | ||
| 162 | |||
| 163 | ResultCode VMManager::ReprotectRange(VAddr target, u32 size, VMAPermission new_perms) { | ||
| 164 | CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size)); | ||
| 165 | VAddr target_end = target + size; | ||
| 166 | |||
| 167 | VMAIter end = vma_map.end(); | ||
| 168 | // The comparison against the end of the range must be done using addresses since VMAs can be | ||
| 169 | // merged during this process, causing invalidation of the iterators. | ||
| 170 | while (vma != end && vma->second.base < target_end) { | ||
| 171 | vma = std::next(StripIterConstness(Reprotect(vma, new_perms))); | ||
| 172 | } | ||
| 173 | |||
| 174 | return RESULT_SUCCESS; | ||
| 135 | } | 175 | } |
| 136 | 176 | ||
| 137 | void VMManager::LogLayout() const { | 177 | void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { |
| 178 | // If this ever proves to have a noticeable performance impact, allow users of the function to | ||
| 179 | // specify a specific range of addresses to limit the scan to. | ||
| 138 | for (const auto& p : vma_map) { | 180 | for (const auto& p : vma_map) { |
| 139 | const VirtualMemoryArea& vma = p.second; | 181 | const VirtualMemoryArea& vma = p.second; |
| 140 | LOG_DEBUG(Kernel, "%08X - %08X size: %8X %c%c%c", vma.base, vma.base + vma.size, vma.size, | 182 | if (block == vma.backing_block.get()) { |
| 183 | UpdatePageTableForVMA(vma); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | void VMManager::LogLayout(Log::Level log_level) const { | ||
| 189 | for (const auto& p : vma_map) { | ||
| 190 | const VirtualMemoryArea& vma = p.second; | ||
| 191 | LOG_GENERIC(Log::Class::Kernel, log_level, "%08X - %08X size: %8X %c%c%c %s", | ||
| 192 | vma.base, vma.base + vma.size, vma.size, | ||
| 141 | (u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-', | 193 | (u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-', |
| 142 | (u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-', | 194 | (u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-', |
| 143 | (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-'); | 195 | (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', GetMemoryStateName(vma.meminfo_state)); |
| 144 | } | 196 | } |
| 145 | } | 197 | } |
| 146 | 198 | ||
| @@ -151,21 +203,19 @@ VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle & iter) { | |||
| 151 | } | 203 | } |
| 152 | 204 | ||
| 153 | ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { | 205 | ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { |
| 154 | ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: %8X", size); | 206 | ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size); |
| 155 | ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: %08X", base); | 207 | ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", base); |
| 156 | 208 | ||
| 157 | VMAIter vma_handle = StripIterConstness(FindVMA(base)); | 209 | VMAIter vma_handle = StripIterConstness(FindVMA(base)); |
| 158 | if (vma_handle == vma_map.end()) { | 210 | if (vma_handle == vma_map.end()) { |
| 159 | // Target address is outside the range managed by the kernel | 211 | // Target address is outside the range managed by the kernel |
| 160 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, | 212 | return ERR_INVALID_ADDRESS; |
| 161 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E01BF5 | ||
| 162 | } | 213 | } |
| 163 | 214 | ||
| 164 | VirtualMemoryArea& vma = vma_handle->second; | 215 | VirtualMemoryArea& vma = vma_handle->second; |
| 165 | if (vma.type != VMAType::Free) { | 216 | if (vma.type != VMAType::Free) { |
| 166 | // Region is already allocated | 217 | // Region is already allocated |
| 167 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, | 218 | return ERR_INVALID_ADDRESS_STATE; |
| 168 | ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5 | ||
| 169 | } | 219 | } |
| 170 | 220 | ||
| 171 | u32 start_in_vma = base - vma.base; | 221 | u32 start_in_vma = base - vma.base; |
| @@ -173,8 +223,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { | |||
| 173 | 223 | ||
| 174 | if (end_in_vma > vma.size) { | 224 | if (end_in_vma > vma.size) { |
| 175 | // Requested allocation doesn't fit inside VMA | 225 | // Requested allocation doesn't fit inside VMA |
| 176 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, | 226 | return ERR_INVALID_ADDRESS_STATE; |
| 177 | ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5 | ||
| 178 | } | 227 | } |
| 179 | 228 | ||
| 180 | if (end_in_vma != vma.size) { | 229 | if (end_in_vma != vma.size) { |
| @@ -189,6 +238,35 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) { | |||
| 189 | return MakeResult<VMAIter>(vma_handle); | 238 | return MakeResult<VMAIter>(vma_handle); |
| 190 | } | 239 | } |
| 191 | 240 | ||
| 241 | ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u32 size) { | ||
| 242 | ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size); | ||
| 243 | ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", target); | ||
| 244 | |||
| 245 | VAddr target_end = target + size; | ||
| 246 | ASSERT(target_end >= target); | ||
| 247 | ASSERT(target_end <= MAX_ADDRESS); | ||
| 248 | ASSERT(size > 0); | ||
| 249 | |||
| 250 | VMAIter begin_vma = StripIterConstness(FindVMA(target)); | ||
| 251 | VMAIter i_end = vma_map.lower_bound(target_end); | ||
| 252 | for (auto i = begin_vma; i != i_end; ++i) { | ||
| 253 | if (i->second.type == VMAType::Free) { | ||
| 254 | return ERR_INVALID_ADDRESS_STATE; | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | if (target != begin_vma->second.base) { | ||
| 259 | begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base); | ||
| 260 | } | ||
| 261 | |||
| 262 | VMAIter end_vma = StripIterConstness(FindVMA(target_end)); | ||
| 263 | if (end_vma != vma_map.end() && target_end != end_vma->second.base) { | ||
| 264 | end_vma = SplitVMA(end_vma, target_end - end_vma->second.base); | ||
| 265 | } | ||
| 266 | |||
| 267 | return MakeResult<VMAIter>(begin_vma); | ||
| 268 | } | ||
| 269 | |||
| 192 | VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) { | 270 | VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) { |
| 193 | VirtualMemoryArea& old_vma = vma_handle->second; | 271 | VirtualMemoryArea& old_vma = vma_handle->second; |
| 194 | VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA | 272 | VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index b3795a94a..4e95f1f0c 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -14,6 +14,14 @@ | |||
| 14 | 14 | ||
| 15 | namespace Kernel { | 15 | namespace Kernel { |
| 16 | 16 | ||
| 17 | const ResultCode ERR_INVALID_ADDRESS{ // 0xE0E01BF5 | ||
| 18 | ErrorDescription::InvalidAddress, ErrorModule::OS, | ||
| 19 | ErrorSummary::InvalidArgument, ErrorLevel::Usage}; | ||
| 20 | |||
| 21 | const ResultCode ERR_INVALID_ADDRESS_STATE{ // 0xE0A01BF5 | ||
| 22 | ErrorDescription::InvalidAddress, ErrorModule::OS, | ||
| 23 | ErrorSummary::InvalidState, ErrorLevel::Usage}; | ||
| 24 | |||
| 17 | enum class VMAType : u8 { | 25 | enum class VMAType : u8 { |
| 18 | /// VMA represents an unmapped region of the address space. | 26 | /// VMA represents an unmapped region of the address space. |
| 19 | Free, | 27 | Free, |
| @@ -75,7 +83,7 @@ struct VirtualMemoryArea { | |||
| 75 | /// Memory block backing this VMA. | 83 | /// Memory block backing this VMA. |
| 76 | std::shared_ptr<std::vector<u8>> backing_block = nullptr; | 84 | std::shared_ptr<std::vector<u8>> backing_block = nullptr; |
| 77 | /// Offset into the backing_memory the mapping starts from. | 85 | /// Offset into the backing_memory the mapping starts from. |
| 78 | u32 offset = 0; | 86 | size_t offset = 0; |
| 79 | 87 | ||
| 80 | // Settings for type = BackingMemory | 88 | // Settings for type = BackingMemory |
| 81 | /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed. | 89 | /// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed. |
| @@ -141,7 +149,7 @@ public: | |||
| 141 | * @param state MemoryState tag to attach to the VMA. | 149 | * @param state MemoryState tag to attach to the VMA. |
| 142 | */ | 150 | */ |
| 143 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, | 151 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, |
| 144 | u32 offset, u32 size, MemoryState state); | 152 | size_t offset, u32 size, MemoryState state); |
| 145 | 153 | ||
| 146 | /** | 154 | /** |
| 147 | * Maps an unmanaged host memory pointer at a given address. | 155 | * Maps an unmanaged host memory pointer at a given address. |
| @@ -163,14 +171,23 @@ public: | |||
| 163 | */ | 171 | */ |
| 164 | ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state); | 172 | ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state); |
| 165 | 173 | ||
| 166 | /// Unmaps the given VMA. | 174 | /// Unmaps a range of addresses, splitting VMAs as necessary. |
| 167 | void Unmap(VMAHandle vma); | 175 | ResultCode UnmapRange(VAddr target, u32 size); |
| 168 | 176 | ||
| 169 | /// Changes the permissions of the given VMA. | 177 | /// Changes the permissions of the given VMA. |
| 170 | void Reprotect(VMAHandle vma, VMAPermission new_perms); | 178 | VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms); |
| 179 | |||
| 180 | /// Changes the permissions of a range of addresses, splitting VMAs as necessary. | ||
| 181 | ResultCode ReprotectRange(VAddr target, u32 size, VMAPermission new_perms); | ||
| 182 | |||
| 183 | /** | ||
| 184 | * Scans all VMAs and updates the page table range of any that use the given vector as backing | ||
| 185 | * memory. This should be called after any operation that causes reallocation of the vector. | ||
| 186 | */ | ||
| 187 | void RefreshMemoryBlockMappings(const std::vector<u8>* block); | ||
| 171 | 188 | ||
| 172 | /// Dumps the address space layout to the log, for debugging | 189 | /// Dumps the address space layout to the log, for debugging |
| 173 | void LogLayout() const; | 190 | void LogLayout(Log::Level log_level) const; |
| 174 | 191 | ||
| 175 | private: | 192 | private: |
| 176 | using VMAIter = decltype(vma_map)::iterator; | 193 | using VMAIter = decltype(vma_map)::iterator; |
| @@ -178,6 +195,9 @@ private: | |||
| 178 | /// Converts a VMAHandle to a mutable VMAIter. | 195 | /// Converts a VMAHandle to a mutable VMAIter. |
| 179 | VMAIter StripIterConstness(const VMAHandle& iter); | 196 | VMAIter StripIterConstness(const VMAHandle& iter); |
| 180 | 197 | ||
| 198 | /// Unmaps the given VMA. | ||
| 199 | VMAIter Unmap(VMAIter vma); | ||
| 200 | |||
| 181 | /** | 201 | /** |
| 182 | * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing | 202 | * Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing |
| 183 | * the appropriate error checking. | 203 | * the appropriate error checking. |
| @@ -185,6 +205,12 @@ private: | |||
| 185 | ResultVal<VMAIter> CarveVMA(VAddr base, u32 size); | 205 | ResultVal<VMAIter> CarveVMA(VAddr base, u32 size); |
| 186 | 206 | ||
| 187 | /** | 207 | /** |
| 208 | * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each | ||
| 209 | * end of the range. | ||
| 210 | */ | ||
| 211 | ResultVal<VMAIter> CarveVMARange(VAddr base, u32 size); | ||
| 212 | |||
| 213 | /** | ||
| 188 | * Splits a VMA in two, at the specified offset. | 214 | * Splits a VMA in two, at the specified offset. |
| 189 | * @returns the right side of the split, with the original iterator becoming the left side. | 215 | * @returns the right side of the split, with the original iterator becoming the left side. |
| 190 | */ | 216 | */ |
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp index 35402341b..6a2fdea2b 100644 --- a/src/core/hle/service/apt/apt.cpp +++ b/src/core/hle/service/apt/apt.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "core/hle/hle.h" | 16 | #include "core/hle/hle.h" |
| 17 | #include "core/hle/kernel/event.h" | 17 | #include "core/hle/kernel/event.h" |
| 18 | #include "core/hle/kernel/mutex.h" | 18 | #include "core/hle/kernel/mutex.h" |
| 19 | #include "core/hle/kernel/process.h" | ||
| 19 | #include "core/hle/kernel/shared_memory.h" | 20 | #include "core/hle/kernel/shared_memory.h" |
| 20 | #include "core/hle/kernel/thread.h" | 21 | #include "core/hle/kernel/thread.h" |
| 21 | 22 | ||
| @@ -37,7 +38,7 @@ static Kernel::SharedPtr<Kernel::Mutex> lock; | |||
| 37 | static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event | 38 | static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event |
| 38 | static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event | 39 | static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event |
| 39 | 40 | ||
| 40 | static std::vector<u8> shared_font; | 41 | static std::shared_ptr<std::vector<u8>> shared_font; |
| 41 | 42 | ||
| 42 | static u32 cpu_percent; ///< CPU time available to the running application | 43 | static u32 cpu_percent; ///< CPU time available to the running application |
| 43 | 44 | ||
| @@ -74,11 +75,12 @@ void Initialize(Service::Interface* self) { | |||
| 74 | void GetSharedFont(Service::Interface* self) { | 75 | void GetSharedFont(Service::Interface* self) { |
| 75 | u32* cmd_buff = Kernel::GetCommandBuffer(); | 76 | u32* cmd_buff = Kernel::GetCommandBuffer(); |
| 76 | 77 | ||
| 77 | if (!shared_font.empty()) { | 78 | if (shared_font != nullptr) { |
| 78 | // TODO(bunnei): This function shouldn't copy the shared font every time it's called. | 79 | // TODO(yuriks): This is a hack to keep this working right now even with our completely |
| 79 | // Instead, it should probably map the shared font as RO memory. We don't currently have | 80 | // broken shared memory system. |
| 80 | // an easy way to do this, but the copy should be sufficient for now. | 81 | shared_font_mem->base_address = SHARED_FONT_VADDR; |
| 81 | memcpy(Memory::GetPointer(SHARED_FONT_VADDR), shared_font.data(), shared_font.size()); | 82 | Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->base_address, |
| 83 | shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared); | ||
| 82 | 84 | ||
| 83 | cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); | 85 | cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2); |
| 84 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error | 86 | cmd_buff[1] = RESULT_SUCCESS.raw; // No error |
| @@ -391,7 +393,6 @@ void Init() { | |||
| 391 | // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file | 393 | // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file |
| 392 | // "shared_font.bin" in the Citra "sysdata" directory. | 394 | // "shared_font.bin" in the Citra "sysdata" directory. |
| 393 | 395 | ||
| 394 | shared_font.clear(); | ||
| 395 | std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT; | 396 | std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT; |
| 396 | 397 | ||
| 397 | FileUtil::CreateFullPath(filepath); // Create path if not already created | 398 | FileUtil::CreateFullPath(filepath); // Create path if not already created |
| @@ -399,8 +400,8 @@ void Init() { | |||
| 399 | 400 | ||
| 400 | if (file.IsOpen()) { | 401 | if (file.IsOpen()) { |
| 401 | // Read shared font data | 402 | // Read shared font data |
| 402 | shared_font.resize((size_t)file.GetSize()); | 403 | shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize()); |
| 403 | file.ReadBytes(shared_font.data(), (size_t)file.GetSize()); | 404 | file.ReadBytes(shared_font->data(), shared_font->size()); |
| 404 | 405 | ||
| 405 | // Create shared font memory object | 406 | // Create shared font memory object |
| 406 | using Kernel::MemoryPermission; | 407 | using Kernel::MemoryPermission; |
| @@ -424,7 +425,7 @@ void Init() { | |||
| 424 | } | 425 | } |
| 425 | 426 | ||
| 426 | void Shutdown() { | 427 | void Shutdown() { |
| 427 | shared_font.clear(); | 428 | shared_font = nullptr; |
| 428 | shared_font_mem = nullptr; | 429 | shared_font_mem = nullptr; |
| 429 | lock = nullptr; | 430 | lock = nullptr; |
| 430 | notification_event = nullptr; | 431 | notification_event = nullptr; |
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index e93c1b436..fde508a13 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/bit_field.h" | 5 | #include "common/bit_field.h" |
| 6 | 6 | ||
| 7 | #include "core/mem_map.h" | ||
| 8 | #include "core/memory.h" | 7 | #include "core/memory.h" |
| 9 | #include "core/hle/kernel/event.h" | 8 | #include "core/hle/kernel/event.h" |
| 10 | #include "core/hle/kernel/shared_memory.h" | 9 | #include "core/hle/kernel/shared_memory.h" |
| @@ -418,7 +417,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||
| 418 | 417 | ||
| 419 | case CommandId::SET_DISPLAY_TRANSFER: | 418 | case CommandId::SET_DISPLAY_TRANSFER: |
| 420 | { | 419 | { |
| 421 | auto& params = command.image_copy; | 420 | auto& params = command.display_transfer; |
| 422 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | 421 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), |
| 423 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | 422 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 424 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | 423 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), |
| @@ -433,17 +432,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||
| 433 | // TODO: Check if texture copies are implemented correctly.. | 432 | // TODO: Check if texture copies are implemented correctly.. |
| 434 | case CommandId::SET_TEXTURE_COPY: | 433 | case CommandId::SET_TEXTURE_COPY: |
| 435 | { | 434 | { |
| 436 | auto& params = command.image_copy; | 435 | auto& params = command.texture_copy; |
| 437 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | 436 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), |
| 438 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | 437 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 439 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | 438 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), |
| 440 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | 439 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); |
| 441 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); | 440 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), |
| 442 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); | 441 | params.size); |
| 443 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); | 442 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), |
| 444 | 443 | params.in_width_gap); | |
| 445 | // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? | 444 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), |
| 446 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); | 445 | params.out_width_gap); |
| 446 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), | ||
| 447 | params.flags); | ||
| 448 | |||
| 449 | // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter. | ||
| 450 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||
| 447 | break; | 451 | break; |
| 448 | } | 452 | } |
| 449 | 453 | ||
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h index c89d0a467..8bcb30ad1 100644 --- a/src/core/hle/service/gsp_gpu.h +++ b/src/core/hle/service/gsp_gpu.h | |||
| @@ -127,7 +127,16 @@ struct Command { | |||
| 127 | u32 in_buffer_size; | 127 | u32 in_buffer_size; |
| 128 | u32 out_buffer_size; | 128 | u32 out_buffer_size; |
| 129 | u32 flags; | 129 | u32 flags; |
| 130 | } image_copy; | 130 | } display_transfer; |
| 131 | |||
| 132 | struct { | ||
| 133 | u32 in_buffer_address; | ||
| 134 | u32 out_buffer_address; | ||
| 135 | u32 size; | ||
| 136 | u32 in_width_gap; | ||
| 137 | u32 out_width_gap; | ||
| 138 | u32 flags; | ||
| 139 | } texture_copy; | ||
| 131 | 140 | ||
| 132 | u8 raw_data[0x1C]; | 141 | u8 raw_data[0x1C]; |
| 133 | }; | 142 | }; |
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp index 6e7dafaad..6b1b71fe4 100644 --- a/src/core/hle/service/y2r_u.cpp +++ b/src/core/hle/service/y2r_u.cpp | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "core/hle/kernel/event.h" | 10 | #include "core/hle/kernel/event.h" |
| 11 | #include "core/hle/service/y2r_u.h" | 11 | #include "core/hle/service/y2r_u.h" |
| 12 | #include "core/hw/y2r.h" | 12 | #include "core/hw/y2r.h" |
| 13 | #include "core/mem_map.h" | ||
| 14 | 13 | ||
| 15 | #include "video_core/renderer_base.h" | 14 | #include "video_core/renderer_base.h" |
| 16 | #include "video_core/utils.h" | 15 | #include "video_core/utils.h" |
diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp index 26d87c7e2..50c5bc01b 100644 --- a/src/core/hle/shared_page.cpp +++ b/src/core/hle/shared_page.cpp | |||
| @@ -18,7 +18,4 @@ void Init() { | |||
| 18 | shared_page.running_hw = 0x1; // product | 18 | shared_page.running_hw = 0x1; // product |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | void Shutdown() { | ||
| 22 | } | ||
| 23 | |||
| 24 | } // namespace | 21 | } // namespace |
diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h index db6a5340b..379bb7b63 100644 --- a/src/core/hle/shared_page.h +++ b/src/core/hle/shared_page.h | |||
| @@ -54,6 +54,5 @@ static_assert(sizeof(SharedPageDef) == Memory::SHARED_PAGE_SIZE, "Shared page st | |||
| 54 | extern SharedPageDef shared_page; | 54 | extern SharedPageDef shared_page; |
| 55 | 55 | ||
| 56 | void Init(); | 56 | void Init(); |
| 57 | void Shutdown(); | ||
| 58 | 57 | ||
| 59 | } // namespace | 58 | } // namespace |
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index bb64fdfb7..89ac45a6f 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp | |||
| @@ -10,11 +10,11 @@ | |||
| 10 | #include "common/symbols.h" | 10 | #include "common/symbols.h" |
| 11 | 11 | ||
| 12 | #include "core/core_timing.h" | 12 | #include "core/core_timing.h" |
| 13 | #include "core/mem_map.h" | ||
| 14 | #include "core/arm/arm_interface.h" | 13 | #include "core/arm/arm_interface.h" |
| 15 | 14 | ||
| 16 | #include "core/hle/kernel/address_arbiter.h" | 15 | #include "core/hle/kernel/address_arbiter.h" |
| 17 | #include "core/hle/kernel/event.h" | 16 | #include "core/hle/kernel/event.h" |
| 17 | #include "core/hle/kernel/memory.h" | ||
| 18 | #include "core/hle/kernel/mutex.h" | 18 | #include "core/hle/kernel/mutex.h" |
| 19 | #include "core/hle/kernel/process.h" | 19 | #include "core/hle/kernel/process.h" |
| 20 | #include "core/hle/kernel/resource_limit.h" | 20 | #include "core/hle/kernel/resource_limit.h" |
| @@ -41,32 +41,114 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel, | |||
| 41 | const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS, | 41 | const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS, |
| 42 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E | 42 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E |
| 43 | 43 | ||
| 44 | const ResultCode ERR_MISALIGNED_ADDRESS{ // 0xE0E01BF1 | ||
| 45 | ErrorDescription::MisalignedAddress, ErrorModule::OS, | ||
| 46 | ErrorSummary::InvalidArgument, ErrorLevel::Usage}; | ||
| 47 | const ResultCode ERR_MISALIGNED_SIZE{ // 0xE0E01BF2 | ||
| 48 | ErrorDescription::MisalignedSize, ErrorModule::OS, | ||
| 49 | ErrorSummary::InvalidArgument, ErrorLevel::Usage}; | ||
| 50 | const ResultCode ERR_INVALID_COMBINATION{ // 0xE0E01BEE | ||
| 51 | ErrorDescription::InvalidCombination, ErrorModule::OS, | ||
| 52 | ErrorSummary::InvalidArgument, ErrorLevel::Usage}; | ||
| 53 | |||
| 44 | enum ControlMemoryOperation { | 54 | enum ControlMemoryOperation { |
| 45 | MEMORY_OPERATION_HEAP = 0x00000003, | 55 | MEMOP_FREE = 1, |
| 46 | MEMORY_OPERATION_GSP_HEAP = 0x00010003, | 56 | MEMOP_RESERVE = 2, // This operation seems to be unsupported in the kernel |
| 57 | MEMOP_COMMIT = 3, | ||
| 58 | MEMOP_MAP = 4, | ||
| 59 | MEMOP_UNMAP = 5, | ||
| 60 | MEMOP_PROTECT = 6, | ||
| 61 | MEMOP_OPERATION_MASK = 0xFF, | ||
| 62 | |||
| 63 | MEMOP_REGION_APP = 0x100, | ||
| 64 | MEMOP_REGION_SYSTEM = 0x200, | ||
| 65 | MEMOP_REGION_BASE = 0x300, | ||
| 66 | MEMOP_REGION_MASK = 0xF00, | ||
| 67 | |||
| 68 | MEMOP_LINEAR = 0x10000, | ||
| 47 | }; | 69 | }; |
| 48 | 70 | ||
| 49 | /// Map application or GSP heap memory | 71 | /// Map application or GSP heap memory |
| 50 | static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) { | 72 | static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) { |
| 51 | LOG_TRACE(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X", | 73 | using namespace Kernel; |
| 74 | |||
| 75 | LOG_DEBUG(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=0x%X, permissions=0x%08X", | ||
| 52 | operation, addr0, addr1, size, permissions); | 76 | operation, addr0, addr1, size, permissions); |
| 53 | 77 | ||
| 54 | switch (operation) { | 78 | if ((addr0 & Memory::PAGE_MASK) != 0 || (addr1 & Memory::PAGE_MASK) != 0) { |
| 79 | return ERR_MISALIGNED_ADDRESS; | ||
| 80 | } | ||
| 81 | if ((size & Memory::PAGE_MASK) != 0) { | ||
| 82 | return ERR_MISALIGNED_SIZE; | ||
| 83 | } | ||
| 84 | |||
| 85 | u32 region = operation & MEMOP_REGION_MASK; | ||
| 86 | operation &= ~MEMOP_REGION_MASK; | ||
| 87 | |||
| 88 | if (region != 0) { | ||
| 89 | LOG_WARNING(Kernel_SVC, "ControlMemory with specified region not supported, region=%X", region); | ||
| 90 | } | ||
| 91 | |||
| 92 | if ((permissions & (u32)MemoryPermission::ReadWrite) != permissions) { | ||
| 93 | return ERR_INVALID_COMBINATION; | ||
| 94 | } | ||
| 95 | VMAPermission vma_permissions = (VMAPermission)permissions; | ||
| 96 | |||
| 97 | auto& process = *g_current_process; | ||
| 98 | |||
| 99 | switch (operation & MEMOP_OPERATION_MASK) { | ||
| 100 | case MEMOP_FREE: | ||
| 101 | { | ||
| 102 | if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) { | ||
| 103 | ResultCode result = process.HeapFree(addr0, size); | ||
| 104 | if (result.IsError()) return result; | ||
| 105 | } else if (addr0 >= process.GetLinearHeapBase() && addr0 < process.GetLinearHeapLimit()) { | ||
| 106 | ResultCode result = process.LinearFree(addr0, size); | ||
| 107 | if (result.IsError()) return result; | ||
| 108 | } else { | ||
| 109 | return ERR_INVALID_ADDRESS; | ||
| 110 | } | ||
| 111 | *out_addr = addr0; | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | |||
| 115 | case MEMOP_COMMIT: | ||
| 116 | { | ||
| 117 | if (operation & MEMOP_LINEAR) { | ||
| 118 | CASCADE_RESULT(*out_addr, process.LinearAllocate(addr0, size, vma_permissions)); | ||
| 119 | } else { | ||
| 120 | CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions)); | ||
| 121 | } | ||
| 122 | break; | ||
| 123 | } | ||
| 55 | 124 | ||
| 56 | // Map normal heap memory | 125 | case MEMOP_MAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented |
| 57 | case MEMORY_OPERATION_HEAP: | 126 | { |
| 58 | *out_addr = Memory::MapBlock_Heap(size, operation, permissions); | 127 | CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions)); |
| 59 | break; | 128 | break; |
| 129 | } | ||
| 130 | |||
| 131 | case MEMOP_UNMAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented | ||
| 132 | { | ||
| 133 | ResultCode result = process.HeapFree(addr0, size); | ||
| 134 | if (result.IsError()) return result; | ||
| 135 | break; | ||
| 136 | } | ||
| 60 | 137 | ||
| 61 | // Map GSP heap memory | 138 | case MEMOP_PROTECT: |
| 62 | case MEMORY_OPERATION_GSP_HEAP: | 139 | { |
| 63 | *out_addr = Memory::MapBlock_HeapLinear(size, operation, permissions); | 140 | ResultCode result = process.vm_manager.ReprotectRange(addr0, size, vma_permissions); |
| 141 | if (result.IsError()) return result; | ||
| 64 | break; | 142 | break; |
| 143 | } | ||
| 65 | 144 | ||
| 66 | // Unknown ControlMemory operation | ||
| 67 | default: | 145 | default: |
| 68 | LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation); | 146 | LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation); |
| 147 | return ERR_INVALID_COMBINATION; | ||
| 69 | } | 148 | } |
| 149 | |||
| 150 | process.vm_manager.LogLayout(Log::Level::Trace); | ||
| 151 | |||
| 70 | return RESULT_SUCCESS; | 152 | return RESULT_SUCCESS; |
| 71 | } | 153 | } |
| 72 | 154 | ||
| @@ -537,9 +619,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* page_inf | |||
| 537 | if (process == nullptr) | 619 | if (process == nullptr) |
| 538 | return ERR_INVALID_HANDLE; | 620 | return ERR_INVALID_HANDLE; |
| 539 | 621 | ||
| 540 | auto vma = process->address_space->FindVMA(addr); | 622 | auto vma = process->vm_manager.FindVMA(addr); |
| 541 | 623 | ||
| 542 | if (vma == process->address_space->vma_map.end()) | 624 | if (vma == Kernel::g_current_process->vm_manager.vma_map.end()) |
| 543 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); | 625 | return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage); |
| 544 | 626 | ||
| 545 | memory_info->base_address = vma->second.base; | 627 | memory_info->base_address = vma->second.base; |
| @@ -692,6 +774,52 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32 | |||
| 692 | return RESULT_SUCCESS; | 774 | return RESULT_SUCCESS; |
| 693 | } | 775 | } |
| 694 | 776 | ||
| 777 | static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) { | ||
| 778 | LOG_TRACE(Kernel_SVC, "called process=0x%08X type=%u", process_handle, type); | ||
| 779 | |||
| 780 | using Kernel::Process; | ||
| 781 | Kernel::SharedPtr<Process> process = Kernel::g_handle_table.Get<Process>(process_handle); | ||
| 782 | if (process == nullptr) | ||
| 783 | return ERR_INVALID_HANDLE; | ||
| 784 | |||
| 785 | switch (type) { | ||
| 786 | case 0: | ||
| 787 | case 2: | ||
| 788 | // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure | ||
| 789 | // what's the difference between them. | ||
| 790 | *out = process->heap_used + process->linear_heap_used + process->misc_memory_used; | ||
| 791 | break; | ||
| 792 | case 1: | ||
| 793 | case 3: | ||
| 794 | case 4: | ||
| 795 | case 5: | ||
| 796 | case 6: | ||
| 797 | case 7: | ||
| 798 | case 8: | ||
| 799 | // These are valid, but not implemented yet | ||
| 800 | LOG_ERROR(Kernel_SVC, "unimplemented GetProcessInfo type=%u", type); | ||
| 801 | break; | ||
| 802 | case 20: | ||
| 803 | *out = Memory::FCRAM_PADDR - process->GetLinearHeapBase(); | ||
| 804 | break; | ||
| 805 | default: | ||
| 806 | LOG_ERROR(Kernel_SVC, "unknown GetProcessInfo type=%u", type); | ||
| 807 | |||
| 808 | if (type >= 21 && type <= 23) { | ||
| 809 | return ResultCode( // 0xE0E01BF4 | ||
| 810 | ErrorDescription::NotImplemented, ErrorModule::OS, | ||
| 811 | ErrorSummary::InvalidArgument, ErrorLevel::Usage); | ||
| 812 | } else { | ||
| 813 | return ResultCode( // 0xD8E007ED | ||
| 814 | ErrorDescription::InvalidEnumValue, ErrorModule::Kernel, | ||
| 815 | ErrorSummary::InvalidArgument, ErrorLevel::Permanent); | ||
| 816 | } | ||
| 817 | break; | ||
| 818 | } | ||
| 819 | |||
| 820 | return RESULT_SUCCESS; | ||
| 821 | } | ||
| 822 | |||
| 695 | namespace { | 823 | namespace { |
| 696 | struct FunctionDef { | 824 | struct FunctionDef { |
| 697 | using Func = void(); | 825 | using Func = void(); |
| @@ -746,7 +874,7 @@ static const FunctionDef SVC_Table[] = { | |||
| 746 | {0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"}, | 874 | {0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"}, |
| 747 | {0x29, nullptr, "GetHandleInfo"}, | 875 | {0x29, nullptr, "GetHandleInfo"}, |
| 748 | {0x2A, nullptr, "GetSystemInfo"}, | 876 | {0x2A, nullptr, "GetSystemInfo"}, |
| 749 | {0x2B, nullptr, "GetProcessInfo"}, | 877 | {0x2B, HLE::Wrap<GetProcessInfo>, "GetProcessInfo"}, |
| 750 | {0x2C, nullptr, "GetThreadInfo"}, | 878 | {0x2C, nullptr, "GetThreadInfo"}, |
| 751 | {0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"}, | 879 | {0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"}, |
| 752 | {0x2E, nullptr, "SendSyncRequest1"}, | 880 | {0x2E, nullptr, "SendSyncRequest1"}, |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 3ccbc03b2..68ae38289 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <numeric> | ||
| 6 | #include <type_traits> | 7 | #include <type_traits> |
| 7 | 8 | ||
| 8 | #include "common/color.h" | 9 | #include "common/color.h" |
| @@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) { | |||
| 158 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); | 159 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); |
| 159 | u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); | 160 | u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); |
| 160 | 161 | ||
| 162 | if (config.is_texture_copy) { | ||
| 163 | u32 input_width = config.texture_copy.input_width * 16; | ||
| 164 | u32 input_gap = config.texture_copy.input_gap * 16; | ||
| 165 | u32 output_width = config.texture_copy.output_width * 16; | ||
| 166 | u32 output_gap = config.texture_copy.output_gap * 16; | ||
| 167 | |||
| 168 | size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); | ||
| 169 | VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size); | ||
| 170 | |||
| 171 | u32 remaining_size = config.texture_copy.size; | ||
| 172 | u32 remaining_input = input_width; | ||
| 173 | u32 remaining_output = output_width; | ||
| 174 | while (remaining_size > 0) { | ||
| 175 | u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); | ||
| 176 | |||
| 177 | std::memcpy(dst_pointer, src_pointer, copy_size); | ||
| 178 | src_pointer += copy_size; | ||
| 179 | dst_pointer += copy_size; | ||
| 180 | |||
| 181 | remaining_input -= copy_size; | ||
| 182 | remaining_output -= copy_size; | ||
| 183 | remaining_size -= copy_size; | ||
| 184 | |||
| 185 | if (remaining_input == 0) { | ||
| 186 | remaining_input = input_width; | ||
| 187 | src_pointer += input_gap; | ||
| 188 | } | ||
| 189 | if (remaining_output == 0) { | ||
| 190 | remaining_output = output_width; | ||
| 191 | dst_pointer += output_gap; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", | ||
| 196 | config.texture_copy.size, | ||
| 197 | config.GetPhysicalInputAddress(), input_width, input_gap, | ||
| 198 | config.GetPhysicalOutputAddress(), output_width, output_gap, | ||
| 199 | config.flags); | ||
| 200 | |||
| 201 | size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); | ||
| 202 | VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size); | ||
| 203 | |||
| 204 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||
| 205 | break; | ||
| 206 | } | ||
| 207 | |||
| 161 | if (config.scaling > config.ScaleXY) { | 208 | if (config.scaling > config.ScaleXY) { |
| 162 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); | 209 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); |
| 163 | UNIMPLEMENTED(); | 210 | UNIMPLEMENTED(); |
| 164 | break; | 211 | break; |
| 165 | } | 212 | } |
| 166 | 213 | ||
| 167 | if (config.output_tiled && | 214 | if (config.input_linear && config.scaling != config.NoScale) { |
| 168 | (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { | ||
| 169 | LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | 215 | LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); |
| 170 | UNIMPLEMENTED(); | 216 | UNIMPLEMENTED(); |
| 171 | break; | 217 | break; |
| @@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) { | |||
| 182 | 228 | ||
| 183 | VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); | 229 | VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); |
| 184 | 230 | ||
| 185 | if (config.raw_copy) { | ||
| 186 | // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions | ||
| 187 | // TODO(Subv): Verify if raw copies perform scaling | ||
| 188 | memcpy(dst_pointer, src_pointer, output_size); | ||
| 189 | |||
| 190 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", | ||
| 191 | output_size, | ||
| 192 | config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), | ||
| 193 | config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), | ||
| 194 | config.output_format.Value(), config.flags); | ||
| 195 | |||
| 196 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||
| 197 | |||
| 198 | VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); | ||
| 199 | break; | ||
| 200 | } | ||
| 201 | |||
| 202 | for (u32 y = 0; y < output_height; ++y) { | 231 | for (u32 y = 0; y < output_height; ++y) { |
| 203 | for (u32 x = 0; x < output_width; ++x) { | 232 | for (u32 x = 0; x < output_width; ++x) { |
| 204 | Math::Vec4<u8> src_color; | 233 | Math::Vec4<u8> src_color; |
| @@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) { | |||
| 220 | u32 src_offset; | 249 | u32 src_offset; |
| 221 | u32 dst_offset; | 250 | u32 dst_offset; |
| 222 | 251 | ||
| 223 | if (config.output_tiled) { | 252 | if (config.input_linear) { |
| 224 | if (!config.dont_swizzle) { | 253 | if (!config.dont_swizzle) { |
| 225 | // Interpret the input as linear and the output as tiled | 254 | // Interpret the input as linear and the output as tiled |
| 226 | u32 coarse_y = y & ~7; | 255 | u32 coarse_y = y & ~7; |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index daad506fe..2e3a9f779 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -201,12 +201,14 @@ struct Regs { | |||
| 201 | u32 flags; | 201 | u32 flags; |
| 202 | 202 | ||
| 203 | BitField< 0, 1, u32> flip_vertically; // flips input data vertically | 203 | BitField< 0, 1, u32> flip_vertically; // flips input data vertically |
| 204 | BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format | 204 | BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format |
| 205 | BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing | 205 | BitField< 2, 1, u32> crop_input_lines; |
| 206 | BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields | ||
| 206 | BitField< 5, 1, u32> dont_swizzle; | 207 | BitField< 5, 1, u32> dont_swizzle; |
| 207 | BitField< 8, 3, PixelFormat> input_format; | 208 | BitField< 8, 3, PixelFormat> input_format; |
| 208 | BitField<12, 3, PixelFormat> output_format; | 209 | BitField<12, 3, PixelFormat> output_format; |
| 209 | 210 | /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. | |
| 211 | BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented | ||
| 210 | BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer | 212 | BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer |
| 211 | }; | 213 | }; |
| 212 | 214 | ||
| @@ -214,10 +216,30 @@ struct Regs { | |||
| 214 | 216 | ||
| 215 | // it seems that writing to this field triggers the display transfer | 217 | // it seems that writing to this field triggers the display transfer |
| 216 | u32 trigger; | 218 | u32 trigger; |
| 219 | |||
| 220 | INSERT_PADDING_WORDS(0x1); | ||
| 221 | |||
| 222 | struct { | ||
| 223 | u32 size; | ||
| 224 | |||
| 225 | union { | ||
| 226 | u32 input_size; | ||
| 227 | |||
| 228 | BitField< 0, 16, u32> input_width; | ||
| 229 | BitField<16, 16, u32> input_gap; | ||
| 230 | }; | ||
| 231 | |||
| 232 | union { | ||
| 233 | u32 output_size; | ||
| 234 | |||
| 235 | BitField< 0, 16, u32> output_width; | ||
| 236 | BitField<16, 16, u32> output_gap; | ||
| 237 | }; | ||
| 238 | } texture_copy; | ||
| 217 | } display_transfer_config; | 239 | } display_transfer_config; |
| 218 | ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); | 240 | ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); |
| 219 | 241 | ||
| 220 | INSERT_PADDING_WORDS(0x331); | 242 | INSERT_PADDING_WORDS(0x32D); |
| 221 | 243 | ||
| 222 | struct { | 244 | struct { |
| 223 | // command list size (in bytes) | 245 | // command list size (in bytes) |
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp deleted file mode 100644 index cbe993fbe..000000000 --- a/src/core/mem_map.cpp +++ /dev/null | |||
| @@ -1,163 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <memory> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | |||
| 13 | #include "core/hle/config_mem.h" | ||
| 14 | #include "core/hle/kernel/vm_manager.h" | ||
| 15 | #include "core/hle/result.h" | ||
| 16 | #include "core/hle/shared_page.h" | ||
| 17 | #include "core/mem_map.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | #include "core/memory_setup.h" | ||
| 20 | |||
| 21 | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||
| 22 | |||
| 23 | namespace Memory { | ||
| 24 | |||
| 25 | namespace { | ||
| 26 | |||
| 27 | struct MemoryArea { | ||
| 28 | u32 base; | ||
| 29 | u32 size; | ||
| 30 | const char* name; | ||
| 31 | }; | ||
| 32 | |||
| 33 | // We don't declare the IO regions in here since its handled by other means. | ||
| 34 | static MemoryArea memory_areas[] = { | ||
| 35 | {HEAP_VADDR, HEAP_SIZE, "Heap"}, // Application heap (main memory) | ||
| 36 | {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory | ||
| 37 | {LINEAR_HEAP_VADDR, LINEAR_HEAP_SIZE, "Linear Heap"}, // Linear heap (main memory) | ||
| 38 | {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) | ||
| 39 | {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory | ||
| 40 | {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory | ||
| 41 | }; | ||
| 42 | |||
| 43 | /// Represents a block of memory mapped by ControlMemory/MapMemoryBlock | ||
| 44 | struct MemoryBlock { | ||
| 45 | MemoryBlock() : handle(0), base_address(0), address(0), size(0), operation(0), permissions(0) { | ||
| 46 | } | ||
| 47 | u32 handle; | ||
| 48 | u32 base_address; | ||
| 49 | u32 address; | ||
| 50 | u32 size; | ||
| 51 | u32 operation; | ||
| 52 | u32 permissions; | ||
| 53 | |||
| 54 | const u32 GetVirtualAddress() const{ | ||
| 55 | return base_address + address; | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | |||
| 59 | static std::map<u32, MemoryBlock> heap_map; | ||
| 60 | static std::map<u32, MemoryBlock> heap_linear_map; | ||
| 61 | |||
| 62 | } | ||
| 63 | |||
| 64 | u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) { | ||
| 65 | MemoryBlock block; | ||
| 66 | |||
| 67 | block.base_address = HEAP_VADDR; | ||
| 68 | block.size = size; | ||
| 69 | block.operation = operation; | ||
| 70 | block.permissions = permissions; | ||
| 71 | |||
| 72 | if (heap_map.size() > 0) { | ||
| 73 | const MemoryBlock last_block = heap_map.rbegin()->second; | ||
| 74 | block.address = last_block.address + last_block.size; | ||
| 75 | } | ||
| 76 | heap_map[block.GetVirtualAddress()] = block; | ||
| 77 | |||
| 78 | return block.GetVirtualAddress(); | ||
| 79 | } | ||
| 80 | |||
| 81 | u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions) { | ||
| 82 | MemoryBlock block; | ||
| 83 | |||
| 84 | block.base_address = LINEAR_HEAP_VADDR; | ||
| 85 | block.size = size; | ||
| 86 | block.operation = operation; | ||
| 87 | block.permissions = permissions; | ||
| 88 | |||
| 89 | if (heap_linear_map.size() > 0) { | ||
| 90 | const MemoryBlock last_block = heap_linear_map.rbegin()->second; | ||
| 91 | block.address = last_block.address + last_block.size; | ||
| 92 | } | ||
| 93 | heap_linear_map[block.GetVirtualAddress()] = block; | ||
| 94 | |||
| 95 | return block.GetVirtualAddress(); | ||
| 96 | } | ||
| 97 | |||
| 98 | PAddr VirtualToPhysicalAddress(const VAddr addr) { | ||
| 99 | if (addr == 0) { | ||
| 100 | return 0; | ||
| 101 | } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) { | ||
| 102 | return addr - VRAM_VADDR + VRAM_PADDR; | ||
| 103 | } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { | ||
| 104 | return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; | ||
| 105 | } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { | ||
| 106 | return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; | ||
| 107 | } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { | ||
| 108 | return addr - IO_AREA_VADDR + IO_AREA_PADDR; | ||
| 109 | } | ||
| 110 | |||
| 111 | LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08x", addr); | ||
| 112 | // To help with debugging, set bit on address so that it's obviously invalid. | ||
| 113 | return addr | 0x80000000; | ||
| 114 | } | ||
| 115 | |||
| 116 | VAddr PhysicalToVirtualAddress(const PAddr addr) { | ||
| 117 | if (addr == 0) { | ||
| 118 | return 0; | ||
| 119 | } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) { | ||
| 120 | return addr - VRAM_PADDR + VRAM_VADDR; | ||
| 121 | } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) { | ||
| 122 | return addr - FCRAM_PADDR + LINEAR_HEAP_VADDR; | ||
| 123 | } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) { | ||
| 124 | return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; | ||
| 125 | } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { | ||
| 126 | return addr - IO_AREA_PADDR + IO_AREA_VADDR; | ||
| 127 | } | ||
| 128 | |||
| 129 | LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08x", addr); | ||
| 130 | // To help with debugging, set bit on address so that it's obviously invalid. | ||
| 131 | return addr | 0x80000000; | ||
| 132 | } | ||
| 133 | |||
| 134 | void Init() { | ||
| 135 | InitMemoryMap(); | ||
| 136 | LOG_DEBUG(HW_Memory, "initialized OK"); | ||
| 137 | } | ||
| 138 | |||
| 139 | void InitLegacyAddressSpace(Kernel::VMManager& address_space) { | ||
| 140 | using namespace Kernel; | ||
| 141 | |||
| 142 | for (MemoryArea& area : memory_areas) { | ||
| 143 | auto block = std::make_shared<std::vector<u8>>(area.size); | ||
| 144 | address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap(); | ||
| 145 | } | ||
| 146 | |||
| 147 | auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR, | ||
| 148 | (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom(); | ||
| 149 | address_space.Reprotect(cfg_mem_vma, VMAPermission::Read); | ||
| 150 | |||
| 151 | auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, | ||
| 152 | (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); | ||
| 153 | address_space.Reprotect(shared_page_vma, VMAPermission::Read); | ||
| 154 | } | ||
| 155 | |||
| 156 | void Shutdown() { | ||
| 157 | heap_map.clear(); | ||
| 158 | heap_linear_map.clear(); | ||
| 159 | |||
| 160 | LOG_DEBUG(HW_Memory, "shutdown OK"); | ||
| 161 | } | ||
| 162 | |||
| 163 | } // namespace | ||
diff --git a/src/core/mem_map.h b/src/core/mem_map.h deleted file mode 100644 index 229ef82c5..000000000 --- a/src/core/mem_map.h +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2014 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | class VMManager; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Memory { | ||
| 14 | |||
| 15 | void Init(); | ||
| 16 | void InitLegacyAddressSpace(Kernel::VMManager& address_space); | ||
| 17 | void Shutdown(); | ||
| 18 | |||
| 19 | /** | ||
| 20 | * Maps a block of memory on the heap | ||
| 21 | * @param size Size of block in bytes | ||
| 22 | * @param operation Memory map operation type | ||
| 23 | * @param permissions Memory allocation permissions | ||
| 24 | */ | ||
| 25 | u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions); | ||
| 26 | |||
| 27 | /** | ||
| 28 | * Maps a block of memory on the GSP heap | ||
| 29 | * @param size Size of block in bytes | ||
| 30 | * @param operation Memory map operation type | ||
| 31 | * @param permissions Control memory permissions | ||
| 32 | */ | ||
| 33 | u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions); | ||
| 34 | |||
| 35 | /** | ||
| 36 | * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical | ||
| 37 | * address. This should be used by services to translate addresses for use by the hardware. | ||
| 38 | */ | ||
| 39 | PAddr VirtualToPhysicalAddress(VAddr addr); | ||
| 40 | |||
| 41 | /** | ||
| 42 | * Undoes a mapping performed by VirtualToPhysicalAddress(). | ||
| 43 | */ | ||
| 44 | VAddr PhysicalToVirtualAddress(PAddr addr); | ||
| 45 | |||
| 46 | } // namespace | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 1f66bb27d..cde390b8a 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 11 | 11 | ||
| 12 | #include "core/mem_map.h" | 12 | #include "core/hle/kernel/process.h" |
| 13 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| 14 | #include "core/memory_setup.h" | 14 | #include "core/memory_setup.h" |
| 15 | 15 | ||
| @@ -198,4 +198,42 @@ void WriteBlock(const VAddr addr, const u8* data, const size_t size) { | |||
| 198 | Write8(addr + offset, data[offset]); | 198 | Write8(addr + offset, data[offset]); |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | PAddr VirtualToPhysicalAddress(const VAddr addr) { | ||
| 202 | if (addr == 0) { | ||
| 203 | return 0; | ||
| 204 | } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) { | ||
| 205 | return addr - VRAM_VADDR + VRAM_PADDR; | ||
| 206 | } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) { | ||
| 207 | return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR; | ||
| 208 | } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) { | ||
| 209 | return addr - DSP_RAM_VADDR + DSP_RAM_PADDR; | ||
| 210 | } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) { | ||
| 211 | return addr - IO_AREA_VADDR + IO_AREA_PADDR; | ||
| 212 | } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) { | ||
| 213 | return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR; | ||
| 214 | } | ||
| 215 | |||
| 216 | LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr); | ||
| 217 | // To help with debugging, set bit on address so that it's obviously invalid. | ||
| 218 | return addr | 0x80000000; | ||
| 219 | } | ||
| 220 | |||
| 221 | VAddr PhysicalToVirtualAddress(const PAddr addr) { | ||
| 222 | if (addr == 0) { | ||
| 223 | return 0; | ||
| 224 | } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) { | ||
| 225 | return addr - VRAM_PADDR + VRAM_VADDR; | ||
| 226 | } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) { | ||
| 227 | return addr - FCRAM_PADDR + Kernel::g_current_process->GetLinearHeapBase(); | ||
| 228 | } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) { | ||
| 229 | return addr - DSP_RAM_PADDR + DSP_RAM_VADDR; | ||
| 230 | } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) { | ||
| 231 | return addr - IO_AREA_PADDR + IO_AREA_VADDR; | ||
| 232 | } | ||
| 233 | |||
| 234 | LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr); | ||
| 235 | // To help with debugging, set bit on address so that it's obviously invalid. | ||
| 236 | return addr | 0x80000000; | ||
| 237 | } | ||
| 238 | |||
| 201 | } // namespace | 239 | } // namespace |
diff --git a/src/core/memory.h b/src/core/memory.h index 418609de0..5af72b7a7 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -15,6 +15,8 @@ namespace Memory { | |||
| 15 | * be mapped. | 15 | * be mapped. |
| 16 | */ | 16 | */ |
| 17 | const u32 PAGE_SIZE = 0x1000; | 17 | const u32 PAGE_SIZE = 0x1000; |
| 18 | const u32 PAGE_MASK = PAGE_SIZE - 1; | ||
| 19 | const int PAGE_BITS = 12; | ||
| 18 | 20 | ||
| 19 | /// Physical memory regions as seen from the ARM11 | 21 | /// Physical memory regions as seen from the ARM11 |
| 20 | enum : PAddr { | 22 | enum : PAddr { |
| @@ -103,8 +105,15 @@ enum : VAddr { | |||
| 103 | // hardcoded value. | 105 | // hardcoded value. |
| 104 | /// Area where TLS (Thread-Local Storage) buffers are allocated. | 106 | /// Area where TLS (Thread-Local Storage) buffers are allocated. |
| 105 | TLS_AREA_VADDR = 0x1FF82000, | 107 | TLS_AREA_VADDR = 0x1FF82000, |
| 106 | TLS_AREA_SIZE = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads | 108 | TLS_ENTRY_SIZE = 0x200, |
| 109 | TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size | ||
| 107 | TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, | 110 | TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE, |
| 111 | |||
| 112 | |||
| 113 | /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS. | ||
| 114 | NEW_LINEAR_HEAP_VADDR = 0x30000000, | ||
| 115 | NEW_LINEAR_HEAP_SIZE = 0x10000000, | ||
| 116 | NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE, | ||
| 108 | }; | 117 | }; |
| 109 | 118 | ||
| 110 | u8 Read8(VAddr addr); | 119 | u8 Read8(VAddr addr); |
| @@ -122,6 +131,17 @@ void WriteBlock(VAddr addr, const u8* data, size_t size); | |||
| 122 | u8* GetPointer(VAddr virtual_address); | 131 | u8* GetPointer(VAddr virtual_address); |
| 123 | 132 | ||
| 124 | /** | 133 | /** |
| 134 | * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical | ||
| 135 | * address. This should be used by services to translate addresses for use by the hardware. | ||
| 136 | */ | ||
| 137 | PAddr VirtualToPhysicalAddress(VAddr addr); | ||
| 138 | |||
| 139 | /** | ||
| 140 | * Undoes a mapping performed by VirtualToPhysicalAddress(). | ||
| 141 | */ | ||
| 142 | VAddr PhysicalToVirtualAddress(PAddr addr); | ||
| 143 | |||
| 144 | /** | ||
| 125 | * Gets a pointer to the memory region beginning at the specified physical address. | 145 | * Gets a pointer to the memory region beginning at the specified physical address. |
| 126 | * | 146 | * |
| 127 | * @note This is currently implemented using PhysicalToVirtualAddress(). | 147 | * @note This is currently implemented using PhysicalToVirtualAddress(). |
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h index 361bfc816..84ff30120 100644 --- a/src/core/memory_setup.h +++ b/src/core/memory_setup.h | |||
| @@ -10,9 +10,6 @@ | |||
| 10 | 10 | ||
| 11 | namespace Memory { | 11 | namespace Memory { |
| 12 | 12 | ||
| 13 | const u32 PAGE_MASK = PAGE_SIZE - 1; | ||
| 14 | const int PAGE_BITS = 12; | ||
| 15 | |||
| 16 | void InitMemoryMap(); | 13 | void InitMemoryMap(); |
| 17 | 14 | ||
| 18 | /** | 15 | /** |
diff --git a/src/core/system.cpp b/src/core/system.cpp index 561ff82f0..3cd84bf5e 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp | |||
| @@ -4,11 +4,11 @@ | |||
| 4 | 4 | ||
| 5 | #include "core/core.h" | 5 | #include "core/core.h" |
| 6 | #include "core/core_timing.h" | 6 | #include "core/core_timing.h" |
| 7 | #include "core/mem_map.h" | ||
| 8 | #include "core/system.h" | 7 | #include "core/system.h" |
| 9 | #include "core/hw/hw.h" | 8 | #include "core/hw/hw.h" |
| 10 | #include "core/hle/hle.h" | 9 | #include "core/hle/hle.h" |
| 11 | #include "core/hle/kernel/kernel.h" | 10 | #include "core/hle/kernel/kernel.h" |
| 11 | #include "core/hle/kernel/memory.h" | ||
| 12 | 12 | ||
| 13 | #include "video_core/video_core.h" | 13 | #include "video_core/video_core.h" |
| 14 | 14 | ||
| @@ -29,7 +29,6 @@ void Shutdown() { | |||
| 29 | HLE::Shutdown(); | 29 | HLE::Shutdown(); |
| 30 | Kernel::Shutdown(); | 30 | Kernel::Shutdown(); |
| 31 | HW::Shutdown(); | 31 | HW::Shutdown(); |
| 32 | Memory::Shutdown(); | ||
| 33 | CoreTiming::Shutdown(); | 32 | CoreTiming::Shutdown(); |
| 34 | Core::Shutdown(); | 33 | Core::Shutdown(); |
| 35 | } | 34 | } |
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index e14de0768..ae5a30441 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp | |||
| @@ -197,12 +197,19 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 197 | 197 | ||
| 198 | case OpCode::Id::DP3: | 198 | case OpCode::Id::DP3: |
| 199 | case OpCode::Id::DP4: | 199 | case OpCode::Id::DP4: |
| 200 | case OpCode::Id::DPH: | ||
| 201 | case OpCode::Id::DPHI: | ||
| 200 | { | 202 | { |
| 201 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 203 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 202 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | 204 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); |
| 203 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 205 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 206 | |||
| 207 | OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode(); | ||
| 208 | if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI) | ||
| 209 | src1[3] = float24::FromFloat32(1.0f); | ||
| 210 | |||
| 204 | float24 dot = float24::FromFloat32(0.f); | 211 | float24 dot = float24::FromFloat32(0.f); |
| 205 | int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | 212 | int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4; |
| 206 | for (int i = 0; i < num_components; ++i) | 213 | for (int i = 0; i < num_components; ++i) |
| 207 | dot = dot + src1[i] * src2[i]; | 214 | dot = dot + src1[i] * src2[i]; |
| 208 | 215 | ||
| @@ -221,13 +228,12 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 221 | { | 228 | { |
| 222 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 229 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 223 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 230 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 231 | float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32()); | ||
| 224 | for (int i = 0; i < 4; ++i) { | 232 | for (int i = 0; i < 4; ++i) { |
| 225 | if (!swizzle.DestComponentEnabled(i)) | 233 | if (!swizzle.DestComponentEnabled(i)) |
| 226 | continue; | 234 | continue; |
| 227 | 235 | ||
| 228 | // TODO: Be stable against division by zero! | 236 | dest[i] = rcp_res; |
| 229 | // TODO: I think this might be wrong... we should only use one component here | ||
| 230 | dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | ||
| 231 | } | 237 | } |
| 232 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 238 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 233 | break; | 239 | break; |
| @@ -238,13 +244,12 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 238 | { | 244 | { |
| 239 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 245 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| 240 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | 246 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); |
| 247 | float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32())); | ||
| 241 | for (int i = 0; i < 4; ++i) { | 248 | for (int i = 0; i < 4; ++i) { |
| 242 | if (!swizzle.DestComponentEnabled(i)) | 249 | if (!swizzle.DestComponentEnabled(i)) |
| 243 | continue; | 250 | continue; |
| 244 | 251 | ||
| 245 | // TODO: Be stable against division by zero! | 252 | dest[i] = rsq_res; |
| 246 | // TODO: I think this might be wrong... we should only use one component here | ||
| 247 | dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | ||
| 248 | } | 253 | } |
| 249 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | 254 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); |
| 250 | break; | 255 | break; |
| @@ -278,6 +283,20 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 278 | break; | 283 | break; |
| 279 | } | 284 | } |
| 280 | 285 | ||
| 286 | case OpCode::Id::SGE: | ||
| 287 | case OpCode::Id::SGEI: | ||
| 288 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 289 | Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||
| 290 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 291 | for (int i = 0; i < 4; ++i) { | ||
| 292 | if (!swizzle.DestComponentEnabled(i)) | ||
| 293 | continue; | ||
| 294 | |||
| 295 | dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||
| 296 | } | ||
| 297 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 298 | break; | ||
| 299 | |||
| 281 | case OpCode::Id::SLT: | 300 | case OpCode::Id::SLT: |
| 282 | case OpCode::Id::SLTI: | 301 | case OpCode::Id::SLTI: |
| 283 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | 302 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); |
| @@ -334,6 +353,42 @@ void RunInterpreter(UnitState<Debug>& state) { | |||
| 334 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | 353 | Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); |
| 335 | break; | 354 | break; |
| 336 | 355 | ||
| 356 | case OpCode::Id::EX2: | ||
| 357 | { | ||
| 358 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 359 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 360 | |||
| 361 | // EX2 only takes first component exp2 and writes it to all dest components | ||
| 362 | float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32())); | ||
| 363 | for (int i = 0; i < 4; ++i) { | ||
| 364 | if (!swizzle.DestComponentEnabled(i)) | ||
| 365 | continue; | ||
| 366 | |||
| 367 | dest[i] = ex2_res; | ||
| 368 | } | ||
| 369 | |||
| 370 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 371 | break; | ||
| 372 | } | ||
| 373 | |||
| 374 | case OpCode::Id::LG2: | ||
| 375 | { | ||
| 376 | Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||
| 377 | Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||
| 378 | |||
| 379 | // LG2 only takes the first component log2 and writes it to all dest components | ||
| 380 | float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32())); | ||
| 381 | for (int i = 0; i < 4; ++i) { | ||
| 382 | if (!swizzle.DestComponentEnabled(i)) | ||
| 383 | continue; | ||
| 384 | |||
| 385 | dest[i] = lg2_res; | ||
| 386 | } | ||
| 387 | |||
| 388 | Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||
| 389 | break; | ||
| 390 | } | ||
| 391 | |||
| 337 | default: | 392 | default: |
| 338 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", | 393 | LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", |
| 339 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | 394 | (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); |
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 836942c6b..cc66fc8d6 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp | |||
| @@ -23,14 +23,14 @@ const JitFunction instr_table[64] = { | |||
| 23 | &JitCompiler::Compile_ADD, // add | 23 | &JitCompiler::Compile_ADD, // add |
| 24 | &JitCompiler::Compile_DP3, // dp3 | 24 | &JitCompiler::Compile_DP3, // dp3 |
| 25 | &JitCompiler::Compile_DP4, // dp4 | 25 | &JitCompiler::Compile_DP4, // dp4 |
| 26 | nullptr, // dph | 26 | &JitCompiler::Compile_DPH, // dph |
| 27 | nullptr, // unknown | 27 | nullptr, // unknown |
| 28 | nullptr, // ex2 | 28 | &JitCompiler::Compile_EX2, // ex2 |
| 29 | nullptr, // lg2 | 29 | &JitCompiler::Compile_LG2, // lg2 |
| 30 | nullptr, // unknown | 30 | nullptr, // unknown |
| 31 | &JitCompiler::Compile_MUL, // mul | 31 | &JitCompiler::Compile_MUL, // mul |
| 32 | nullptr, // lge | 32 | &JitCompiler::Compile_SGE, // sge |
| 33 | nullptr, // slt | 33 | &JitCompiler::Compile_SLT, // slt |
| 34 | &JitCompiler::Compile_FLR, // flr | 34 | &JitCompiler::Compile_FLR, // flr |
| 35 | &JitCompiler::Compile_MAX, // max | 35 | &JitCompiler::Compile_MAX, // max |
| 36 | &JitCompiler::Compile_MIN, // min | 36 | &JitCompiler::Compile_MIN, // min |
| @@ -44,10 +44,10 @@ const JitFunction instr_table[64] = { | |||
| 44 | nullptr, // unknown | 44 | nullptr, // unknown |
| 45 | nullptr, // unknown | 45 | nullptr, // unknown |
| 46 | nullptr, // unknown | 46 | nullptr, // unknown |
| 47 | nullptr, // dphi | 47 | &JitCompiler::Compile_DPH, // dphi |
| 48 | nullptr, // unknown | 48 | nullptr, // unknown |
| 49 | nullptr, // sgei | 49 | &JitCompiler::Compile_SGE, // sgei |
| 50 | &JitCompiler::Compile_SLTI, // slti | 50 | &JitCompiler::Compile_SLT, // slti |
| 51 | nullptr, // unknown | 51 | nullptr, // unknown |
| 52 | nullptr, // unknown | 52 | nullptr, // unknown |
| 53 | nullptr, // unknown | 53 | nullptr, // unknown |
| @@ -280,6 +280,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) { | |||
| 280 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); | 280 | CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | void JitCompiler::Compile_PushCallerSavedXMM() { | ||
| 284 | #ifndef _WIN32 | ||
| 285 | SUB(64, R(RSP), Imm8(2 * 16)); | ||
| 286 | MOVUPS(MDisp(RSP, 16), ONE); | ||
| 287 | MOVUPS(MDisp(RSP, 0), NEGBIT); | ||
| 288 | #endif | ||
| 289 | } | ||
| 290 | |||
| 291 | void JitCompiler::Compile_PopCallerSavedXMM() { | ||
| 292 | #ifndef _WIN32 | ||
| 293 | MOVUPS(NEGBIT, MDisp(RSP, 0)); | ||
| 294 | MOVUPS(ONE, MDisp(RSP, 16)); | ||
| 295 | ADD(64, R(RSP), Imm8(2 * 16)); | ||
| 296 | #endif | ||
| 297 | } | ||
| 298 | |||
| 283 | void JitCompiler::Compile_ADD(Instruction instr) { | 299 | void JitCompiler::Compile_ADD(Instruction instr) { |
| 284 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 300 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 285 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 301 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| @@ -331,6 +347,71 @@ void JitCompiler::Compile_DP4(Instruction instr) { | |||
| 331 | Compile_DestEnable(instr, SRC1); | 347 | Compile_DestEnable(instr, SRC1); |
| 332 | } | 348 | } |
| 333 | 349 | ||
| 350 | void JitCompiler::Compile_DPH(Instruction instr) { | ||
| 351 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) { | ||
| 352 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 353 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 354 | } else { | ||
| 355 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 356 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 357 | } | ||
| 358 | |||
| 359 | if (Common::GetCPUCaps().sse4_1) { | ||
| 360 | // Set 4th component to 1.0 | ||
| 361 | BLENDPS(SRC1, R(ONE), 0x8); // 0b1000 | ||
| 362 | DPPS(SRC1, R(SRC2), 0xff); | ||
| 363 | } else { | ||
| 364 | // Reverse to set the 4th component to 1.0 | ||
| 365 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); | ||
| 366 | MOVSS(SRC1, R(ONE)); | ||
| 367 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); | ||
| 368 | |||
| 369 | MULPS(SRC1, R(SRC2)); | ||
| 370 | |||
| 371 | MOVAPS(SRC2, R(SRC1)); | ||
| 372 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY | ||
| 373 | ADDPS(SRC1, R(SRC2)); | ||
| 374 | |||
| 375 | MOVAPS(SRC2, R(SRC1)); | ||
| 376 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX | ||
| 377 | ADDPS(SRC1, R(SRC2)); | ||
| 378 | } | ||
| 379 | |||
| 380 | Compile_DestEnable(instr, SRC1); | ||
| 381 | } | ||
| 382 | |||
| 383 | void JitCompiler::Compile_EX2(Instruction instr) { | ||
| 384 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 385 | MOVSS(XMM0, R(SRC1)); | ||
| 386 | |||
| 387 | // The following will actually break the stack alignment | ||
| 388 | ABI_PushAllCallerSavedRegsAndAdjustStack(); | ||
| 389 | Compile_PushCallerSavedXMM(); | ||
| 390 | ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); | ||
| 391 | Compile_PopCallerSavedXMM(); | ||
| 392 | ABI_PopAllCallerSavedRegsAndAdjustStack(); | ||
| 393 | |||
| 394 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); | ||
| 395 | MOVAPS(SRC1, R(XMM0)); | ||
| 396 | Compile_DestEnable(instr, SRC1); | ||
| 397 | } | ||
| 398 | |||
| 399 | void JitCompiler::Compile_LG2(Instruction instr) { | ||
| 400 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 401 | MOVSS(XMM0, R(SRC1)); | ||
| 402 | |||
| 403 | // The following will actually break the stack alignment | ||
| 404 | ABI_PushAllCallerSavedRegsAndAdjustStack(); | ||
| 405 | Compile_PushCallerSavedXMM(); | ||
| 406 | ABI_CallFunction(reinterpret_cast<const void*>(log2f)); | ||
| 407 | Compile_PopCallerSavedXMM(); | ||
| 408 | ABI_PopAllCallerSavedRegsAndAdjustStack(); | ||
| 409 | |||
| 410 | SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); | ||
| 411 | MOVAPS(SRC1, R(XMM0)); | ||
| 412 | Compile_DestEnable(instr, SRC1); | ||
| 413 | } | ||
| 414 | |||
| 334 | void JitCompiler::Compile_MUL(Instruction instr) { | 415 | void JitCompiler::Compile_MUL(Instruction instr) { |
| 335 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 416 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 336 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | 417 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); |
| @@ -338,6 +419,36 @@ void JitCompiler::Compile_MUL(Instruction instr) { | |||
| 338 | Compile_DestEnable(instr, SRC1); | 419 | Compile_DestEnable(instr, SRC1); |
| 339 | } | 420 | } |
| 340 | 421 | ||
| 422 | void JitCompiler::Compile_SGE(Instruction instr) { | ||
| 423 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) { | ||
| 424 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 425 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 426 | } else { | ||
| 427 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 428 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 429 | } | ||
| 430 | |||
| 431 | CMPPS(SRC1, R(SRC2), CMP_NLT); | ||
| 432 | ANDPS(SRC1, R(ONE)); | ||
| 433 | |||
| 434 | Compile_DestEnable(instr, SRC1); | ||
| 435 | } | ||
| 436 | |||
| 437 | void JitCompiler::Compile_SLT(Instruction instr) { | ||
| 438 | if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) { | ||
| 439 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 440 | Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2); | ||
| 441 | } else { | ||
| 442 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | ||
| 443 | Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); | ||
| 444 | } | ||
| 445 | |||
| 446 | CMPPS(SRC1, R(SRC2), CMP_LT); | ||
| 447 | ANDPS(SRC1, R(ONE)); | ||
| 448 | |||
| 449 | Compile_DestEnable(instr, SRC1); | ||
| 450 | } | ||
| 451 | |||
| 341 | void JitCompiler::Compile_FLR(Instruction instr) { | 452 | void JitCompiler::Compile_FLR(Instruction instr) { |
| 342 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 453 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 343 | 454 | ||
| @@ -415,22 +526,13 @@ void JitCompiler::Compile_MOV(Instruction instr) { | |||
| 415 | Compile_DestEnable(instr, SRC1); | 526 | Compile_DestEnable(instr, SRC1); |
| 416 | } | 527 | } |
| 417 | 528 | ||
| 418 | void JitCompiler::Compile_SLTI(Instruction instr) { | ||
| 419 | Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1); | ||
| 420 | Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2); | ||
| 421 | |||
| 422 | CMPSS(SRC1, R(SRC2), CMP_LT); | ||
| 423 | ANDPS(SRC1, R(ONE)); | ||
| 424 | |||
| 425 | Compile_DestEnable(instr, SRC1); | ||
| 426 | } | ||
| 427 | |||
| 428 | void JitCompiler::Compile_RCP(Instruction instr) { | 529 | void JitCompiler::Compile_RCP(Instruction instr) { |
| 429 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 530 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 430 | 531 | ||
| 431 | // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica | 532 | // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica |
| 432 | // performs this operation more accurately. This should be checked on hardware. | 533 | // performs this operation more accurately. This should be checked on hardware. |
| 433 | RCPPS(SRC1, R(SRC1)); | 534 | RCPSS(SRC1, R(SRC1)); |
| 535 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX | ||
| 434 | 536 | ||
| 435 | Compile_DestEnable(instr, SRC1); | 537 | Compile_DestEnable(instr, SRC1); |
| 436 | } | 538 | } |
| @@ -438,9 +540,10 @@ void JitCompiler::Compile_RCP(Instruction instr) { | |||
| 438 | void JitCompiler::Compile_RSQ(Instruction instr) { | 540 | void JitCompiler::Compile_RSQ(Instruction instr) { |
| 439 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); | 541 | Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); |
| 440 | 542 | ||
| 441 | // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica | 543 | // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica |
| 442 | // performs this operation more accurately. This should be checked on hardware. | 544 | // performs this operation more accurately. This should be checked on hardware. |
| 443 | RSQRTPS(SRC1, R(SRC1)); | 545 | RSQRTSS(SRC1, R(SRC1)); |
| 546 | SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX | ||
| 444 | 547 | ||
| 445 | Compile_DestEnable(instr, SRC1); | 548 | Compile_DestEnable(instr, SRC1); |
| 446 | } | 549 | } |
| @@ -646,12 +749,12 @@ CompiledShader* JitCompiler::Compile() { | |||
| 646 | // Used to set a register to one | 749 | // Used to set a register to one |
| 647 | static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; | 750 | static const __m128 one = { 1.f, 1.f, 1.f, 1.f }; |
| 648 | MOV(PTRBITS, R(RAX), ImmPtr(&one)); | 751 | MOV(PTRBITS, R(RAX), ImmPtr(&one)); |
| 649 | MOVAPS(ONE, MDisp(RAX, 0)); | 752 | MOVAPS(ONE, MatR(RAX)); |
| 650 | 753 | ||
| 651 | // Used to negate registers | 754 | // Used to negate registers |
| 652 | static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; | 755 | static const __m128 neg = { -0.f, -0.f, -0.f, -0.f }; |
| 653 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); | 756 | MOV(PTRBITS, R(RAX), ImmPtr(&neg)); |
| 654 | MOVAPS(NEGBIT, MDisp(RAX, 0)); | 757 | MOVAPS(NEGBIT, MatR(RAX)); |
| 655 | 758 | ||
| 656 | looping = false; | 759 | looping = false; |
| 657 | 760 | ||
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index b88f2a0d2..fbe19fe93 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h | |||
| @@ -37,7 +37,12 @@ public: | |||
| 37 | void Compile_ADD(Instruction instr); | 37 | void Compile_ADD(Instruction instr); |
| 38 | void Compile_DP3(Instruction instr); | 38 | void Compile_DP3(Instruction instr); |
| 39 | void Compile_DP4(Instruction instr); | 39 | void Compile_DP4(Instruction instr); |
| 40 | void Compile_DPH(Instruction instr); | ||
| 41 | void Compile_EX2(Instruction instr); | ||
| 42 | void Compile_LG2(Instruction instr); | ||
| 40 | void Compile_MUL(Instruction instr); | 43 | void Compile_MUL(Instruction instr); |
| 44 | void Compile_SGE(Instruction instr); | ||
| 45 | void Compile_SLT(Instruction instr); | ||
| 41 | void Compile_FLR(Instruction instr); | 46 | void Compile_FLR(Instruction instr); |
| 42 | void Compile_MAX(Instruction instr); | 47 | void Compile_MAX(Instruction instr); |
| 43 | void Compile_MIN(Instruction instr); | 48 | void Compile_MIN(Instruction instr); |
| @@ -45,7 +50,6 @@ public: | |||
| 45 | void Compile_RSQ(Instruction instr); | 50 | void Compile_RSQ(Instruction instr); |
| 46 | void Compile_MOVA(Instruction instr); | 51 | void Compile_MOVA(Instruction instr); |
| 47 | void Compile_MOV(Instruction instr); | 52 | void Compile_MOV(Instruction instr); |
| 48 | void Compile_SLTI(Instruction instr); | ||
| 49 | void Compile_NOP(Instruction instr); | 53 | void Compile_NOP(Instruction instr); |
| 50 | void Compile_END(Instruction instr); | 54 | void Compile_END(Instruction instr); |
| 51 | void Compile_CALL(Instruction instr); | 55 | void Compile_CALL(Instruction instr); |
| @@ -67,6 +71,9 @@ private: | |||
| 67 | void Compile_EvaluateCondition(Instruction instr); | 71 | void Compile_EvaluateCondition(Instruction instr); |
| 68 | void Compile_UniformCondition(Instruction instr); | 72 | void Compile_UniformCondition(Instruction instr); |
| 69 | 73 | ||
| 74 | void Compile_PushCallerSavedXMM(); | ||
| 75 | void Compile_PopCallerSavedXMM(); | ||
| 76 | |||
| 70 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. | 77 | /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. |
| 71 | unsigned* offset_ptr = nullptr; | 78 | unsigned* offset_ptr = nullptr; |
| 72 | 79 | ||