summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/x64/xbyak_abi.h16
-rw-r--r--src/core/hid/emulated_controller.cpp1
-rw-r--r--src/core/hid/motion_input.cpp4
-rw-r--r--src/core/hle/kernel/k_page_table.cpp204
-rw-r--r--src/core/hle/kernel/k_page_table.h11
-rw-r--r--src/core/hle/kernel/k_priority_queue.h2
-rw-r--r--src/core/hle/kernel/k_scheduler.cpp45
-rw-r--r--src/core/hle/kernel/svc.cpp4
-rw-r--r--src/input_common/input_poller.cpp13
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp12
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp18
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/macro/macro.cpp13
-rw-r--r--src/video_core/macro/macro.h2
-rw-r--r--src/video_core/macro/macro_hle.cpp34
-rw-r--r--src/video_core/macro/macro_hle.h21
-rw-r--r--src/video_core/macro/macro_interpreter.cpp92
-rw-r--r--src/video_core/macro/macro_interpreter.h78
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp104
-rw-r--r--src/video_core/macro/macro_jit_x64.h71
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp12
22 files changed, 385 insertions, 376 deletions
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 0ddf9b83e..87b3d63a4 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -37,12 +37,12 @@ constexpr Xbyak::Reg IndexToReg(size_t reg_index) {
37 } 37 }
38} 38}
39 39
40inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { 40constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
41 std::bitset<32> bits; 41 size_t bits = 0;
42 for (const Xbyak::Reg& reg : regs) { 42 for (const Xbyak::Reg& reg : regs) {
43 bits[RegToIndex(reg)] = true; 43 bits |= size_t{1} << RegToIndex(reg);
44 } 44 }
45 return bits; 45 return {bits};
46} 46}
47 47
48constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); 48constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
@@ -57,7 +57,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
57constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; 57constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
58constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; 58constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
59 59
60const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ 60constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
61 // GPRs 61 // GPRs
62 Xbyak::util::rcx, 62 Xbyak::util::rcx,
63 Xbyak::util::rdx, 63 Xbyak::util::rdx,
@@ -74,7 +74,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
74 Xbyak::util::xmm5, 74 Xbyak::util::xmm5,
75}); 75});
76 76
77const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ 77constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
78 // GPRs 78 // GPRs
79 Xbyak::util::rbx, 79 Xbyak::util::rbx,
80 Xbyak::util::rsi, 80 Xbyak::util::rsi,
@@ -108,7 +108,7 @@ constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
108constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; 108constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
109constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; 109constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
110 110
111const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ 111constexpr inline std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
112 // GPRs 112 // GPRs
113 Xbyak::util::rcx, 113 Xbyak::util::rcx,
114 Xbyak::util::rdx, 114 Xbyak::util::rdx,
@@ -137,7 +137,7 @@ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
137 Xbyak::util::xmm15, 137 Xbyak::util::xmm15,
138}); 138});
139 139
140const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ 140constexpr inline std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
141 // GPRs 141 // GPRs
142 Xbyak::util::rbx, 142 Xbyak::util::rbx,
143 Xbyak::util::rbp, 143 Xbyak::util::rbp,
diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp
index d12037b11..a7cdf45e6 100644
--- a/src/core/hid/emulated_controller.cpp
+++ b/src/core/hid/emulated_controller.cpp
@@ -749,6 +749,7 @@ void EmulatedController::SetMotion(const Common::Input::CallbackStatus& callback
749 raw_status.gyro.y.value, 749 raw_status.gyro.y.value,
750 raw_status.gyro.z.value, 750 raw_status.gyro.z.value,
751 }); 751 });
752 emulated.SetGyroThreshold(raw_status.gyro.x.properties.threshold);
752 emulated.UpdateRotation(raw_status.delta_timestamp); 753 emulated.UpdateRotation(raw_status.delta_timestamp);
753 emulated.UpdateOrientation(raw_status.delta_timestamp); 754 emulated.UpdateOrientation(raw_status.delta_timestamp);
754 force_update_motion = raw_status.force_update; 755 force_update_motion = raw_status.force_update;
diff --git a/src/core/hid/motion_input.cpp b/src/core/hid/motion_input.cpp
index 6e126be19..05042fd99 100644
--- a/src/core/hid/motion_input.cpp
+++ b/src/core/hid/motion_input.cpp
@@ -10,7 +10,7 @@ namespace Core::HID {
10MotionInput::MotionInput() { 10MotionInput::MotionInput() {
11 // Initialize PID constants with default values 11 // Initialize PID constants with default values
12 SetPID(0.3f, 0.005f, 0.0f); 12 SetPID(0.3f, 0.005f, 0.0f);
13 SetGyroThreshold(0.00005f); 13 SetGyroThreshold(0.007f);
14} 14}
15 15
16void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) { 16void MotionInput::SetPID(f32 new_kp, f32 new_ki, f32 new_kd) {
@@ -31,7 +31,7 @@ void MotionInput::SetGyroscope(const Common::Vec3f& gyroscope) {
31 gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f); 31 gyro_bias = (gyro_bias * 0.9999f) + (gyroscope * 0.0001f);
32 } 32 }
33 33
34 if (gyro.Length2() < gyro_threshold) { 34 if (gyro.Length() < gyro_threshold) {
35 gyro = {}; 35 gyro = {};
36 } else { 36 } else {
37 only_accelerometer = false; 37 only_accelerometer = false;
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index b650ea31d..2ebbc0819 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -276,22 +276,23 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_
276 276
277ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state, 277ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemoryState state,
278 KMemoryPermission perm) { 278 KMemoryPermission perm) {
279 std::lock_guard lock{page_table_lock};
280
281 const u64 size{num_pages * PageSize}; 279 const u64 size{num_pages * PageSize};
282 280
283 if (!CanContain(addr, size, state)) { 281 // Validate the mapping request.
284 return ResultInvalidCurrentMemory; 282 R_UNLESS(this->CanContain(addr, size, state), ResultInvalidCurrentMemory);
285 }
286 283
287 if (IsRegionMapped(addr, size)) { 284 // Lock the table.
288 return ResultInvalidCurrentMemory; 285 std::lock_guard lock{page_table_lock};
289 } 286
287 // Verify that the destination memory is unmapped.
288 R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free,
289 KMemoryPermission::None, KMemoryPermission::None,
290 KMemoryAttribute::None, KMemoryAttribute::None));
290 291
291 KPageLinkedList page_linked_list; 292 KPageLinkedList page_linked_list;
292 CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool, 293 R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool,
293 allocation_option)); 294 allocation_option));
294 CASCADE_CODE(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup)); 295 R_TRY(Operate(addr, num_pages, page_linked_list, OperationType::MapGroup));
295 296
296 block_manager->Update(addr, num_pages, state, perm); 297 block_manager->Update(addr, num_pages, state, perm);
297 298
@@ -395,39 +396,12 @@ ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size,
395 396
396 return ResultSuccess; 397 return ResultSuccess;
397} 398}
398void KPageTable::MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end) {
399 auto node{page_linked_list.Nodes().begin()};
400 PAddr map_addr{node->GetAddress()};
401 std::size_t src_num_pages{node->GetNumPages()};
402
403 block_manager->IterateForRange(start, end, [&](const KMemoryInfo& info) {
404 if (info.state != KMemoryState::Free) {
405 return;
406 }
407
408 std::size_t dst_num_pages{GetSizeInRange(info, start, end) / PageSize};
409 VAddr dst_addr{GetAddressInRange(info, start)};
410
411 while (dst_num_pages) {
412 if (!src_num_pages) {
413 node = std::next(node);
414 map_addr = node->GetAddress();
415 src_num_pages = node->GetNumPages();
416 }
417
418 const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)};
419 Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map,
420 map_addr);
421
422 dst_addr += num_pages * PageSize;
423 map_addr += num_pages * PageSize;
424 src_num_pages -= num_pages;
425 dst_num_pages -= num_pages;
426 }
427 });
428}
429 399
430ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { 400ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
401 // Lock the physical memory lock.
402 std::lock_guard phys_lk(map_physical_memory_lock);
403
404 // Lock the table.
431 std::lock_guard lock{page_table_lock}; 405 std::lock_guard lock{page_table_lock};
432 406
433 std::size_t mapped_size{}; 407 std::size_t mapped_size{};
@@ -463,7 +437,35 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) {
463 // We succeeded, so commit the memory reservation. 437 // We succeeded, so commit the memory reservation.
464 memory_reservation.Commit(); 438 memory_reservation.Commit();
465 439
466 MapPhysicalMemory(page_linked_list, addr, end_addr); 440 // Map the memory.
441 auto node{page_linked_list.Nodes().begin()};
442 PAddr map_addr{node->GetAddress()};
443 std::size_t src_num_pages{node->GetNumPages()};
444 block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
445 if (info.state != KMemoryState::Free) {
446 return;
447 }
448
449 std::size_t dst_num_pages{GetSizeInRange(info, addr, end_addr) / PageSize};
450 VAddr dst_addr{GetAddressInRange(info, addr)};
451
452 while (dst_num_pages) {
453 if (!src_num_pages) {
454 node = std::next(node);
455 map_addr = node->GetAddress();
456 src_num_pages = node->GetNumPages();
457 }
458
459 const std::size_t num_pages{std::min(src_num_pages, dst_num_pages)};
460 Operate(dst_addr, num_pages, KMemoryPermission::UserReadWrite, OperationType::Map,
461 map_addr);
462
463 dst_addr += num_pages * PageSize;
464 map_addr += num_pages * PageSize;
465 src_num_pages -= num_pages;
466 dst_num_pages -= num_pages;
467 }
468 });
467 469
468 mapped_physical_memory_size += remaining_size; 470 mapped_physical_memory_size += remaining_size;
469 471
@@ -503,23 +505,8 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) {
503 return ResultSuccess; 505 return ResultSuccess;
504 } 506 }
505 507
506 CASCADE_CODE(UnmapMemory(addr, size));
507
508 auto process{system.Kernel().CurrentProcess()};
509 process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
510 mapped_physical_memory_size -= mapped_size;
511
512 return ResultSuccess;
513}
514
515ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
516 std::lock_guard lock{page_table_lock};
517
518 const VAddr end_addr{addr + size};
519 ResultCode result{ResultSuccess};
520 KPageLinkedList page_linked_list;
521
522 // Unmap each region within the range 508 // Unmap each region within the range
509 KPageLinkedList page_linked_list;
523 block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) { 510 block_manager->IterateForRange(addr, end_addr, [&](const KMemoryInfo& info) {
524 if (info.state == KMemoryState::Normal) { 511 if (info.state == KMemoryState::Normal) {
525 const std::size_t block_size{GetSizeInRange(info, addr, end_addr)}; 512 const std::size_t block_size{GetSizeInRange(info, addr, end_addr)};
@@ -535,7 +522,6 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
535 } 522 }
536 } 523 }
537 }); 524 });
538
539 if (result.IsError()) { 525 if (result.IsError()) {
540 return result; 526 return result;
541 } 527 }
@@ -546,10 +532,14 @@ ResultCode KPageTable::UnmapMemory(VAddr addr, std::size_t size) {
546 532
547 block_manager->Update(addr, num_pages, KMemoryState::Free); 533 block_manager->Update(addr, num_pages, KMemoryState::Free);
548 534
535 auto process{system.Kernel().CurrentProcess()};
536 process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size);
537 mapped_physical_memory_size -= mapped_size;
538
549 return ResultSuccess; 539 return ResultSuccess;
550} 540}
551 541
552ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { 542ResultCode KPageTable::MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
553 std::lock_guard lock{page_table_lock}; 543 std::lock_guard lock{page_table_lock};
554 544
555 KMemoryState src_state{}; 545 KMemoryState src_state{};
@@ -588,7 +578,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) {
588 return ResultSuccess; 578 return ResultSuccess;
589} 579}
590 580
591ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { 581ResultCode KPageTable::UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
592 std::lock_guard lock{page_table_lock}; 582 std::lock_guard lock{page_table_lock};
593 583
594 KMemoryState src_state{}; 584 KMemoryState src_state{};
@@ -652,24 +642,26 @@ ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_l
652 return ResultSuccess; 642 return ResultSuccess;
653} 643}
654 644
655ResultCode KPageTable::MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, 645ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list,
656 KMemoryPermission perm) { 646 KMemoryState state, KMemoryPermission perm) {
657 std::lock_guard lock{page_table_lock}; 647 // Check that the map is in range.
658
659 const std::size_t num_pages{page_linked_list.GetNumPages()}; 648 const std::size_t num_pages{page_linked_list.GetNumPages()};
660 const std::size_t size{num_pages * PageSize}; 649 const std::size_t size{num_pages * PageSize};
650 R_UNLESS(this->CanContain(address, size, state), ResultInvalidCurrentMemory);
661 651
662 if (!CanContain(addr, size, state)) { 652 // Lock the table.
663 return ResultInvalidCurrentMemory; 653 std::lock_guard lock{page_table_lock};
664 }
665 654
666 if (IsRegionMapped(addr, num_pages * PageSize)) { 655 // Check the memory state.
667 return ResultInvalidCurrentMemory; 656 R_TRY(this->CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free,
668 } 657 KMemoryPermission::None, KMemoryPermission::None,
658 KMemoryAttribute::None, KMemoryAttribute::None));
669 659
670 CASCADE_CODE(MapPages(addr, page_linked_list, perm)); 660 // Map the pages.
661 R_TRY(MapPages(address, page_linked_list, perm));
671 662
672 block_manager->Update(addr, num_pages, state, perm); 663 // Update the blocks.
664 block_manager->Update(address, num_pages, state, perm);
673 665
674 return ResultSuccess; 666 return ResultSuccess;
675} 667}
@@ -693,21 +685,23 @@ ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked
693 685
694ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, 686ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list,
695 KMemoryState state) { 687 KMemoryState state) {
696 std::lock_guard lock{page_table_lock}; 688 // Check that the unmap is in range.
697
698 const std::size_t num_pages{page_linked_list.GetNumPages()}; 689 const std::size_t num_pages{page_linked_list.GetNumPages()};
699 const std::size_t size{num_pages * PageSize}; 690 const std::size_t size{num_pages * PageSize};
691 R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory);
700 692
701 if (!CanContain(addr, size, state)) { 693 // Lock the table.
702 return ResultInvalidCurrentMemory; 694 std::lock_guard lock{page_table_lock};
703 }
704 695
705 if (IsRegionMapped(addr, num_pages * PageSize)) { 696 // Check the memory state.
706 return ResultInvalidCurrentMemory; 697 R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, state, KMemoryPermission::None,
707 } 698 KMemoryPermission::None, KMemoryAttribute::All,
699 KMemoryAttribute::None));
708 700
709 CASCADE_CODE(UnmapPages(addr, page_linked_list)); 701 // Perform the unmap.
702 R_TRY(UnmapPages(addr, page_linked_list));
710 703
704 // Update the blocks.
711 block_manager->Update(addr, num_pages, state, KMemoryPermission::None); 705 block_manager->Update(addr, num_pages, state, KMemoryPermission::None);
712 706
713 return ResultSuccess; 707 return ResultSuccess;
@@ -765,7 +759,6 @@ ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size,
765 759
766 // Ensure cache coherency, if we're setting pages as executable. 760 // Ensure cache coherency, if we're setting pages as executable.
767 if (is_x) { 761 if (is_x) {
768 // Memory execution state is changing, invalidate CPU cache range
769 system.InvalidateCpuInstructionCacheRange(addr, size); 762 system.InvalidateCpuInstructionCacheRange(addr, size);
770 } 763 }
771 764
@@ -793,12 +786,12 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo
793 KMemoryState state{}; 786 KMemoryState state{};
794 KMemoryAttribute attribute{}; 787 KMemoryAttribute attribute{};
795 788
796 CASCADE_CODE(CheckMemoryState( 789 R_TRY(CheckMemoryState(&state, nullptr, &attribute, nullptr, addr, size,
797 &state, nullptr, &attribute, nullptr, addr, size, 790 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
798 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, 791 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
799 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All, 792 KMemoryPermission::All, KMemoryPermission::UserReadWrite,
800 KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, 793 KMemoryAttribute::Mask, KMemoryAttribute::None,
801 KMemoryAttribute::IpcAndDeviceMapped)); 794 KMemoryAttribute::IpcAndDeviceMapped));
802 795
803 block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked); 796 block_manager->Update(addr, size / PageSize, state, perm, attribute | KMemoryAttribute::Locked);
804 797
@@ -810,12 +803,11 @@ ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) {
810 803
811 KMemoryState state{}; 804 KMemoryState state{};
812 805
813 CASCADE_CODE( 806 R_TRY(CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size,
814 CheckMemoryState(&state, nullptr, nullptr, nullptr, addr, size, 807 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
815 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, 808 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted,
816 KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, 809 KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask,
817 KMemoryPermission::None, KMemoryPermission::None, KMemoryAttribute::Mask, 810 KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
818 KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
819 811
820 block_manager->Update(addr, size / PageSize, state, KMemoryPermission::UserReadWrite); 812 block_manager->Update(addr, size / PageSize, state, KMemoryPermission::UserReadWrite);
821 return ResultSuccess; 813 return ResultSuccess;
@@ -871,8 +863,9 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, u32 mask
871 AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask)); 863 AttributeTestMask, KMemoryAttribute::None, ~AttributeTestMask));
872 864
873 // Determine the new attribute. 865 // Determine the new attribute.
874 const auto new_attr = ((old_attr & static_cast<KMemoryAttribute>(~mask)) | 866 const KMemoryAttribute new_attr =
875 static_cast<KMemoryAttribute>(attr & mask)); 867 static_cast<KMemoryAttribute>(((old_attr & static_cast<KMemoryAttribute>(~mask)) |
868 static_cast<KMemoryAttribute>(attr & mask)));
876 869
877 // Perform operation. 870 // Perform operation.
878 this->Operate(addr, num_pages, old_perm, OperationType::ChangePermissionsAndRefresh); 871 this->Operate(addr, num_pages, old_perm, OperationType::ChangePermissionsAndRefresh);
@@ -896,6 +889,9 @@ ResultCode KPageTable::SetMaxHeapSize(std::size_t size) {
896} 889}
897 890
898ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { 891ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) {
892 // Lock the physical memory lock.
893 std::lock_guard phys_lk(map_physical_memory_lock);
894
899 // Try to perform a reduction in heap, instead of an extension. 895 // Try to perform a reduction in heap, instead of an extension.
900 VAddr cur_address{}; 896 VAddr cur_address{};
901 std::size_t allocation_size{}; 897 std::size_t allocation_size{};
@@ -1025,12 +1021,12 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages,
1025 } 1021 }
1026 1022
1027 if (is_map_only) { 1023 if (is_map_only) {
1028 CASCADE_CODE(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr)); 1024 R_TRY(Operate(addr, needed_num_pages, perm, OperationType::Map, map_addr));
1029 } else { 1025 } else {
1030 KPageLinkedList page_group; 1026 KPageLinkedList page_group;
1031 CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, 1027 R_TRY(system.Kernel().MemoryManager().Allocate(page_group, needed_num_pages, memory_pool,
1032 memory_pool, allocation_option)); 1028 allocation_option));
1033 CASCADE_CODE(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup)); 1029 R_TRY(Operate(addr, needed_num_pages, page_group, OperationType::MapGroup));
1034 } 1030 }
1035 1031
1036 block_manager->Update(addr, needed_num_pages, state, perm); 1032 block_manager->Update(addr, needed_num_pages, state, perm);
@@ -1186,7 +1182,7 @@ VAddr KPageTable::AllocateVirtualMemory(VAddr start, std::size_t region_num_page
1186 1182
1187ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group, 1183ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLinkedList& page_group,
1188 OperationType operation) { 1184 OperationType operation) {
1189 std::lock_guard lock{page_table_lock}; 1185 ASSERT(this->IsLockedByCurrentThread());
1190 1186
1191 ASSERT(Common::IsAligned(addr, PageSize)); 1187 ASSERT(Common::IsAligned(addr, PageSize));
1192 ASSERT(num_pages > 0); 1188 ASSERT(num_pages > 0);
@@ -1211,7 +1207,7 @@ ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, const KPageLin
1211 1207
1212ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm, 1208ResultCode KPageTable::Operate(VAddr addr, std::size_t num_pages, KMemoryPermission perm,
1213 OperationType operation, PAddr map_addr) { 1209 OperationType operation, PAddr map_addr) {
1214 std::lock_guard lock{page_table_lock}; 1210 ASSERT(this->IsLockedByCurrentThread());
1215 1211
1216 ASSERT(num_pages > 0); 1212 ASSERT(num_pages > 0);
1217 ASSERT(Common::IsAligned(addr, PageSize)); 1213 ASSERT(Common::IsAligned(addr, PageSize));
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index f67986e91..60ae9b9e8 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -37,9 +37,8 @@ public:
37 VAddr src_addr); 37 VAddr src_addr);
38 ResultCode MapPhysicalMemory(VAddr addr, std::size_t size); 38 ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
39 ResultCode UnmapPhysicalMemory(VAddr addr, std::size_t size); 39 ResultCode UnmapPhysicalMemory(VAddr addr, std::size_t size);
40 ResultCode UnmapMemory(VAddr addr, std::size_t size); 40 ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
41 ResultCode Map(VAddr dst_addr, VAddr src_addr, std::size_t size); 41 ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
42 ResultCode Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size);
43 ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state, 42 ResultCode MapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state,
44 KMemoryPermission perm); 43 KMemoryPermission perm);
45 ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state); 44 ResultCode UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, KMemoryState state);
@@ -88,7 +87,6 @@ private:
88 ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list, 87 ResultCode MapPages(VAddr addr, const KPageLinkedList& page_linked_list,
89 KMemoryPermission perm); 88 KMemoryPermission perm);
90 ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list); 89 ResultCode UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list);
91 void MapPhysicalMemory(KPageLinkedList& page_linked_list, VAddr start, VAddr end);
92 bool IsRegionMapped(VAddr address, u64 size); 90 bool IsRegionMapped(VAddr address, u64 size);
93 bool IsRegionContiguous(VAddr addr, u64 size) const; 91 bool IsRegionContiguous(VAddr addr, u64 size) const;
94 void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list); 92 void AddRegionToPages(VAddr start, std::size_t num_pages, KPageLinkedList& page_linked_list);
@@ -148,6 +146,7 @@ private:
148 } 146 }
149 147
150 std::recursive_mutex page_table_lock; 148 std::recursive_mutex page_table_lock;
149 std::mutex map_physical_memory_lock;
151 std::unique_ptr<KMemoryBlockManager> block_manager; 150 std::unique_ptr<KMemoryBlockManager> block_manager;
152 151
153public: 152public:
@@ -249,7 +248,9 @@ public:
249 return !IsOutsideASLRRegion(address, size); 248 return !IsOutsideASLRRegion(address, size);
250 } 249 }
251 constexpr PAddr GetPhysicalAddr(VAddr addr) { 250 constexpr PAddr GetPhysicalAddr(VAddr addr) {
252 return page_table_impl.backing_addr[addr >> PageBits] + addr; 251 const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits];
252 ASSERT(backing_addr);
253 return backing_addr + addr;
253 } 254 }
254 constexpr bool Contains(VAddr addr) const { 255 constexpr bool Contains(VAddr addr) const {
255 return address_space_start <= addr && addr <= address_space_end - 1; 256 return address_space_start <= addr && addr <= address_space_end - 1;
diff --git a/src/core/hle/kernel/k_priority_queue.h b/src/core/hle/kernel/k_priority_queue.h
index 0b894c8cf..bd779739d 100644
--- a/src/core/hle/kernel/k_priority_queue.h
+++ b/src/core/hle/kernel/k_priority_queue.h
@@ -258,7 +258,7 @@ private:
258 258
259private: 259private:
260 constexpr void ClearAffinityBit(u64& affinity, s32 core) { 260 constexpr void ClearAffinityBit(u64& affinity, s32 core) {
261 affinity &= ~(u64(1) << core); 261 affinity &= ~(UINT64_C(1) << core);
262 } 262 }
263 263
264 constexpr s32 GetNextCore(u64& affinity) { 264 constexpr s32 GetNextCore(u64& affinity) {
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp
index b32d4f285..c96520828 100644
--- a/src/core/hle/kernel/k_scheduler.cpp
+++ b/src/core/hle/kernel/k_scheduler.cpp
@@ -710,23 +710,19 @@ void KScheduler::Unload(KThread* thread) {
710} 710}
711 711
712void KScheduler::Reload(KThread* thread) { 712void KScheduler::Reload(KThread* thread) {
713 LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread ? thread->GetName() : "nullptr"); 713 LOG_TRACE(Kernel, "core {}, reload thread {}", core_id, thread->GetName());
714 714
715 if (thread) { 715 Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
716 ASSERT_MSG(thread->GetState() == ThreadState::Runnable, "Thread must be runnable."); 716 cpu_core.LoadContext(thread->GetContext32());
717 717 cpu_core.LoadContext(thread->GetContext64());
718 Core::ARM_Interface& cpu_core = system.ArmInterface(core_id); 718 cpu_core.SetTlsAddress(thread->GetTLSAddress());
719 cpu_core.LoadContext(thread->GetContext32()); 719 cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
720 cpu_core.LoadContext(thread->GetContext64()); 720 cpu_core.ClearExclusiveState();
721 cpu_core.SetTlsAddress(thread->GetTLSAddress());
722 cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
723 cpu_core.ClearExclusiveState();
724 }
725} 721}
726 722
727void KScheduler::SwitchContextStep2() { 723void KScheduler::SwitchContextStep2() {
728 // Load context of new thread 724 // Load context of new thread
729 Reload(current_thread.load()); 725 Reload(GetCurrentThread());
730 726
731 RescheduleCurrentCore(); 727 RescheduleCurrentCore();
732} 728}
@@ -735,13 +731,17 @@ void KScheduler::ScheduleImpl() {
735 KThread* previous_thread = GetCurrentThread(); 731 KThread* previous_thread = GetCurrentThread();
736 KThread* next_thread = state.highest_priority_thread; 732 KThread* next_thread = state.highest_priority_thread;
737 733
738 state.needs_scheduling = false; 734 state.needs_scheduling.store(false);
739 735
740 // We never want to schedule a null thread, so use the idle thread if we don't have a next. 736 // We never want to schedule a null thread, so use the idle thread if we don't have a next.
741 if (next_thread == nullptr) { 737 if (next_thread == nullptr) {
742 next_thread = idle_thread; 738 next_thread = idle_thread;
743 } 739 }
744 740
741 if (next_thread->GetCurrentCore() != core_id) {
742 next_thread->SetCurrentCore(core_id);
743 }
744
745 // We never want to schedule a dummy thread, as these are only used by host threads for locking. 745 // We never want to schedule a dummy thread, as these are only used by host threads for locking.
746 if (next_thread->GetThreadType() == ThreadType::Dummy) { 746 if (next_thread->GetThreadType() == ThreadType::Dummy) {
747 ASSERT_MSG(false, "Dummy threads should never be scheduled!"); 747 ASSERT_MSG(false, "Dummy threads should never be scheduled!");
@@ -755,14 +755,8 @@ void KScheduler::ScheduleImpl() {
755 return; 755 return;
756 } 756 }
757 757
758 if (next_thread->GetCurrentCore() != core_id) { 758 // Update the CPU time tracking variables.
759 next_thread->SetCurrentCore(core_id);
760 }
761
762 current_thread.store(next_thread);
763
764 KProcess* const previous_process = system.Kernel().CurrentProcess(); 759 KProcess* const previous_process = system.Kernel().CurrentProcess();
765
766 UpdateLastContextSwitchTime(previous_thread, previous_process); 760 UpdateLastContextSwitchTime(previous_thread, previous_process);
767 761
768 // Save context for previous thread 762 // Save context for previous thread
@@ -770,6 +764,10 @@ void KScheduler::ScheduleImpl() {
770 764
771 std::shared_ptr<Common::Fiber>* old_context; 765 std::shared_ptr<Common::Fiber>* old_context;
772 old_context = &previous_thread->GetHostContext(); 766 old_context = &previous_thread->GetHostContext();
767
768 // Set the new thread.
769 current_thread.store(next_thread);
770
773 guard.Unlock(); 771 guard.Unlock();
774 772
775 Common::Fiber::YieldTo(*old_context, *switch_fiber); 773 Common::Fiber::YieldTo(*old_context, *switch_fiber);
@@ -797,8 +795,8 @@ void KScheduler::SwitchToCurrent() {
797 do { 795 do {
798 auto next_thread = current_thread.load(); 796 auto next_thread = current_thread.load();
799 if (next_thread != nullptr) { 797 if (next_thread != nullptr) {
800 next_thread->context_guard.Lock(); 798 const auto locked = next_thread->context_guard.TryLock();
801 if (next_thread->GetRawState() != ThreadState::Runnable) { 799 if (state.needs_scheduling.load()) {
802 next_thread->context_guard.Unlock(); 800 next_thread->context_guard.Unlock();
803 break; 801 break;
804 } 802 }
@@ -806,6 +804,9 @@ void KScheduler::SwitchToCurrent() {
806 next_thread->context_guard.Unlock(); 804 next_thread->context_guard.Unlock();
807 break; 805 break;
808 } 806 }
807 if (!locked) {
808 continue;
809 }
809 } 810 }
810 auto thread = next_thread ? next_thread : idle_thread; 811 auto thread = next_thread ? next_thread : idle_thread;
811 Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext()); 812 Common::Fiber::YieldTo(switch_fiber, *thread->GetHostContext());
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index c7f5140f4..40bb893ac 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -230,7 +230,7 @@ static ResultCode MapMemory(Core::System& system, VAddr dst_addr, VAddr src_addr
230 return result; 230 return result;
231 } 231 }
232 232
233 return page_table.Map(dst_addr, src_addr, size); 233 return page_table.MapMemory(dst_addr, src_addr, size);
234} 234}
235 235
236static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { 236static ResultCode MapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
@@ -249,7 +249,7 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
249 return result; 249 return result;
250 } 250 }
251 251
252 return page_table.Unmap(dst_addr, src_addr, size); 252 return page_table.UnmapMemory(dst_addr, src_addr, size);
253} 253}
254 254
255static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) { 255static ResultCode UnmapMemory32(Core::System& system, u32 dst_addr, u32 src_addr, u32 size) {
diff --git a/src/input_common/input_poller.cpp b/src/input_common/input_poller.cpp
index 7b370335f..2f3c0735a 100644
--- a/src/input_common/input_poller.cpp
+++ b/src/input_common/input_poller.cpp
@@ -504,9 +504,10 @@ private:
504 504
505class InputFromMotion final : public Common::Input::InputDevice { 505class InputFromMotion final : public Common::Input::InputDevice {
506public: 506public:
507 explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, 507 explicit InputFromMotion(PadIdentifier identifier_, int motion_sensor_, float gyro_threshold_,
508 InputEngine* input_engine_) 508 InputEngine* input_engine_)
509 : identifier(identifier_), motion_sensor(motion_sensor_), input_engine(input_engine_) { 509 : identifier(identifier_), motion_sensor(motion_sensor_), gyro_threshold(gyro_threshold_),
510 input_engine(input_engine_) {
510 UpdateCallback engine_callback{[this]() { OnChange(); }}; 511 UpdateCallback engine_callback{[this]() { OnChange(); }};
511 const InputIdentifier input_identifier{ 512 const InputIdentifier input_identifier{
512 .identifier = identifier, 513 .identifier = identifier,
@@ -525,8 +526,9 @@ public:
525 const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor); 526 const auto basic_motion = input_engine->GetMotion(identifier, motion_sensor);
526 Common::Input::MotionStatus status{}; 527 Common::Input::MotionStatus status{};
527 const Common::Input::AnalogProperties properties = { 528 const Common::Input::AnalogProperties properties = {
528 .deadzone = 0.001f, 529 .deadzone = 0.0f,
529 .range = 1.0f, 530 .range = 1.0f,
531 .threshold = gyro_threshold,
530 .offset = 0.0f, 532 .offset = 0.0f,
531 }; 533 };
532 status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties}; 534 status.accel.x = {.raw_value = basic_motion.accel_x, .properties = properties};
@@ -551,6 +553,7 @@ public:
551private: 553private:
552 const PadIdentifier identifier; 554 const PadIdentifier identifier;
553 const int motion_sensor; 555 const int motion_sensor;
556 const float gyro_threshold;
554 int callback_key; 557 int callback_key;
555 InputEngine* input_engine; 558 InputEngine* input_engine;
556}; 559};
@@ -873,9 +876,11 @@ std::unique_ptr<Common::Input::InputDevice> InputFactory::CreateMotionDevice(
873 876
874 if (params.Has("motion")) { 877 if (params.Has("motion")) {
875 const auto motion_sensor = params.Get("motion", 0); 878 const auto motion_sensor = params.Get("motion", 0);
879 const auto gyro_threshold = params.Get("threshold", 0.007f);
876 input_engine->PreSetController(identifier); 880 input_engine->PreSetController(identifier);
877 input_engine->PreSetMotion(identifier, motion_sensor); 881 input_engine->PreSetMotion(identifier, motion_sensor);
878 return std::make_unique<InputFromMotion>(identifier, motion_sensor, input_engine.get()); 882 return std::make_unique<InputFromMotion>(identifier, motion_sensor, gyro_threshold,
883 input_engine.get());
879 } 884 }
880 885
881 const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f); 886 const auto deadzone = std::clamp(params.Get("deadzone", 0.15f), 0.0f, 1.0f);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 50918317f..08b3a81ce 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,6 +387,14 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
387 } 387 }
388} 388}
389 389
390void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
391 if (ctx.runtime_info.xfb_varyings.empty()) {
392 return;
393 }
394 ctx.AddCapability(spv::Capability::TransformFeedback);
395 ctx.AddExecutionMode(main_func, spv::ExecutionMode::Xfb);
396}
397
390void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { 398void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) {
391 if (info.uses_sampled_1d) { 399 if (info.uses_sampled_1d) {
392 ctx.AddCapability(spv::Capability::Sampled1D); 400 ctx.AddCapability(spv::Capability::Sampled1D);
@@ -442,9 +450,6 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
442 if (info.uses_sample_id) { 450 if (info.uses_sample_id) {
443 ctx.AddCapability(spv::Capability::SampleRateShading); 451 ctx.AddCapability(spv::Capability::SampleRateShading);
444 } 452 }
445 if (!ctx.runtime_info.xfb_varyings.empty()) {
446 ctx.AddCapability(spv::Capability::TransformFeedback);
447 }
448 if (info.uses_derivatives) { 453 if (info.uses_derivatives) {
449 ctx.AddCapability(spv::Capability::DerivativeControl); 454 ctx.AddCapability(spv::Capability::DerivativeControl);
450 } 455 }
@@ -484,6 +489,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_in
484 SetupSignedNanCapabilities(profile, program, ctx, main); 489 SetupSignedNanCapabilities(profile, program, ctx, main);
485 } 490 }
486 SetupCapabilities(profile, program.info, ctx); 491 SetupCapabilities(profile, program.info, ctx);
492 SetupTransformFeedbackCapabilities(ctx, main);
487 PatchPhiNodes(program, ctx); 493 PatchPhiNodes(program, ctx);
488 return ctx.Assemble(); 494 return ctx.Assemble();
489} 495}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
index 78869601f..4851b0b8d 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -57,16 +57,6 @@ void TranslatorVisitor::VMNMX(u64 insn) {
57 if (vmnmx.sat != 0) { 57 if (vmnmx.sat != 0) {
58 throw NotImplementedException("VMNMX SAT"); 58 throw NotImplementedException("VMNMX SAT");
59 } 59 }
60 // Selectors were shown to default to 2 in unit tests
61 if (vmnmx.src_a_selector != 2) {
62 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
63 }
64 if (vmnmx.src_b_selector != 2) {
65 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
66 }
67 if (vmnmx.src_a_width != VideoWidth::Word) {
68 throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
69 }
70 60
71 const bool is_b_imm{vmnmx.is_src_b_reg == 0}; 61 const bool is_b_imm{vmnmx.is_src_b_reg == 0};
72 const IR::U32 src_a{GetReg8(insn)}; 62 const IR::U32 src_a{GetReg8(insn)};
@@ -76,10 +66,14 @@ void TranslatorVisitor::VMNMX(u64 insn) {
76 const VideoWidth a_width{vmnmx.src_a_width}; 66 const VideoWidth a_width{vmnmx.src_a_width};
77 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; 67 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
78 68
69 const u32 a_selector{static_cast<u32>(vmnmx.src_a_selector)};
70 // Immediate values can't have a selector
71 const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmnmx.src_b_selector)};
72
79 const bool src_a_signed{vmnmx.src_a_sign != 0}; 73 const bool src_a_signed{vmnmx.src_a_sign != 0};
80 const bool src_b_signed{vmnmx.src_b_sign != 0}; 74 const bool src_b_signed{vmnmx.src_b_sign != 0};
81 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; 75 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
82 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; 76 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
83 77
84 // First operation's sign is only dependent on operand b's sign 78 // First operation's sign is only dependent on operand b's sign
85 const bool op_1_signed{src_b_signed}; 79 const bool op_1_signed{src_b_signed};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 43bed63ac..048dba4f3 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1474,6 +1474,8 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1474 // When this memory region has been joined a bunch of times, we assume it's being used 1474 // When this memory region has been joined a bunch of times, we assume it's being used
1475 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1475 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1476 has_stream_leap = true; 1476 has_stream_leap = true;
1477 begin -= PAGE_SIZE * 256;
1478 cpu_addr = begin;
1477 end += PAGE_SIZE * 256; 1479 end += PAGE_SIZE * 256;
1478 } 1480 }
1479 } 1481 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b18b8a02a..c38ebd670 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
240 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); 240 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
241 241
242 // Execute the current macro. 242 // Execute the current macro.
243 macro_engine->Execute(*this, macro_positions[entry], parameters); 243 macro_engine->Execute(macro_positions[entry], parameters);
244 if (mme_draw.current_mode != MMEDrawMode::Undefined) { 244 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
245 FlushMMEInlineDraw(); 245 FlushMMEInlineDraw();
246 } 246 }
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index d7fabe605..0aeda4ce8 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,12 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
5#include <optional> 6#include <optional>
7
6#include <boost/container_hash/hash.hpp> 8#include <boost/container_hash/hash.hpp>
9
7#include "common/assert.h" 10#include "common/assert.h"
8#include "common/logging/log.h"
9#include "common/settings.h" 11#include "common/settings.h"
10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/macro/macro.h" 12#include "video_core/macro/macro.h"
12#include "video_core/macro/macro_hle.h" 13#include "video_core/macro/macro_hle.h"
13#include "video_core/macro/macro_interpreter.h" 14#include "video_core/macro/macro_interpreter.h"
@@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) {
24 uploaded_macro_code[method].push_back(data); 25 uploaded_macro_code[method].push_back(data);
25} 26}
26 27
27void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, 28void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
28 const std::vector<u32>& parameters) {
29 auto compiled_macro = macro_cache.find(method); 29 auto compiled_macro = macro_cache.find(method);
30 if (compiled_macro != macro_cache.end()) { 30 if (compiled_macro != macro_cache.end()) {
31 const auto& cache_info = compiled_macro->second; 31 const auto& cache_info = compiled_macro->second;
@@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
66 cache_info.lle_program = Compile(code); 66 cache_info.lle_program = Compile(code);
67 } 67 }
68 68
69 auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); 69 if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
70 if (hle_program.has_value()) {
71 cache_info.has_hle_program = true; 70 cache_info.has_hle_program = true;
72 cache_info.hle_program = std::move(hle_program.value()); 71 cache_info.hle_program = std::move(hle_program);
73 cache_info.hle_program->Execute(parameters, method); 72 cache_info.hle_program->Execute(parameters, method);
74 } else { 73 } else {
75 cache_info.lle_program->Execute(parameters, method); 74 cache_info.lle_program->Execute(parameters, method);
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index 31ee3440a..7aaa49286 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -119,7 +119,7 @@ public:
119 void AddCode(u32 method, u32 data); 119 void AddCode(u32 method, u32 data);
120 120
121 // Compiles the macro if its not in the cache, and executes the compiled macro 121 // Compiles the macro if its not in the cache, and executes the compiled macro
122 void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); 122 void Execute(u32 method, const std::vector<u32>& parameters);
123 123
124protected: 124protected:
125 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; 125 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 70ac7c620..900ad23c9 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -5,12 +5,15 @@
5#include <array> 5#include <array>
6#include <vector> 6#include <vector>
7#include "video_core/engines/maxwell_3d.h" 7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro/macro.h"
8#include "video_core/macro/macro_hle.h" 9#include "video_core/macro/macro_hle.h"
9#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
10 11
11namespace Tegra { 12namespace Tegra {
12
13namespace { 13namespace {
14
15using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
16
14// HLE'd functions 17// HLE'd functions
15void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { 18void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
16 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); 19 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
@@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
77 maxwell3d.CallMethodFromMME(0x8e5, 0x0); 80 maxwell3d.CallMethodFromMME(0x8e5, 0x0);
78 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; 81 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
79} 82}
80} // Anonymous namespace
81 83
82constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ 84constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
83 {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, 85 {0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
@@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
85 {0x0217920100488FF7, &HLE_0217920100488FF7}, 87 {0x0217920100488FF7, &HLE_0217920100488FF7},
86}}; 88}};
87 89
90class HLEMacroImpl final : public CachedMacro {
91public:
92 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
93 : maxwell3d{maxwell3d_}, func{func_} {}
94
95 void Execute(const std::vector<u32>& parameters, u32 method) override {
96 func(maxwell3d, parameters);
97 }
98
99private:
100 Engines::Maxwell3D& maxwell3d;
101 HLEFunction func;
102};
103} // Anonymous namespace
104
88HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} 105HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
89HLEMacro::~HLEMacro() = default; 106HLEMacro::~HLEMacro() = default;
90 107
91std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { 108std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
92 const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), 109 const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
93 [hash](const auto& pair) { return pair.first == hash; }); 110 [hash](const auto& pair) { return pair.first == hash; });
94 if (it == hle_funcs.end()) { 111 if (it == hle_funcs.end()) {
95 return std::nullopt; 112 return nullptr;
96 } 113 }
97 return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); 114 return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
98} 115}
99 116
100HLEMacroImpl::~HLEMacroImpl() = default;
101
102HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
103 : maxwell3d{maxwell3d_}, func{func_} {}
104
105void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
106 func(maxwell3d, parameters);
107}
108
109} // namespace Tegra 117} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
index cb3bd1600..b86ba84a1 100644
--- a/src/video_core/macro/macro_hle.h
+++ b/src/video_core/macro/macro_hle.h
@@ -5,10 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <memory> 7#include <memory>
8#include <optional>
9#include <vector>
10#include "common/common_types.h" 8#include "common/common_types.h"
11#include "video_core/macro/macro.h"
12 9
13namespace Tegra { 10namespace Tegra {
14 11
@@ -16,29 +13,17 @@ namespace Engines {
16class Maxwell3D; 13class Maxwell3D;
17} 14}
18 15
19using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
20
21class HLEMacro { 16class HLEMacro {
22public: 17public:
23 explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); 18 explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
24 ~HLEMacro(); 19 ~HLEMacro();
25 20
26 std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; 21 // Allocates and returns a cached macro if the hash matches a known function.
27 22 // Returns nullptr otherwise.
28private: 23 [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
29 Engines::Maxwell3D& maxwell3d;
30};
31
32class HLEMacroImpl : public CachedMacro {
33public:
34 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
35 ~HLEMacroImpl();
36
37 void Execute(const std::vector<u32>& parameters, u32 method) override;
38 24
39private: 25private:
40 Engines::Maxwell3D& maxwell3d; 26 Engines::Maxwell3D& maxwell3d;
41 HLEFunction func;
42}; 27};
43 28
44} // namespace Tegra 29} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 8da26fd59..fba755448 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -2,6 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <optional>
7
5#include "common/assert.h" 8#include "common/assert.h"
6#include "common/logging/log.h" 9#include "common/logging/log.h"
7#include "common/microprofile.h" 10#include "common/microprofile.h"
@@ -11,16 +14,81 @@
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); 14MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12 15
13namespace Tegra { 16namespace Tegra {
14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) 17namespace {
15 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} 18class MacroInterpreterImpl final : public CachedMacro {
19public:
20 explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
21 : maxwell3d{maxwell3d_}, code{code_} {}
16 22
17std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { 23 void Execute(const std::vector<u32>& params, u32 method) override;
18 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); 24
19} 25private:
26 /// Resets the execution engine state, zeroing registers, etc.
27 void Reset();
28
29 /**
30 * Executes a single macro instruction located at the current program counter. Returns whether
31 * the interpreter should keep running.
32 *
33 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
34 * previous instruction.
35 */
36 bool Step(bool is_delay_slot);
37
38 /// Calculates the result of an ALU operation. src_a OP src_b;
39 u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
40
41 /// Performs the result operation on the input result and stores it in the specified register
42 /// (if necessary).
43 void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
44
45 /// Evaluates the branch condition and returns whether the branch should be taken or not.
46 bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
47
48 /// Reads an opcode at the current program counter location.
49 Macro::Opcode GetOpcode() const;
50
51 /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
52 u32 GetRegister(u32 register_id) const;
53
54 /// Sets the register to the input value.
55 void SetRegister(u32 register_id, u32 value);
56
57 /// Sets the method address to use for the next Send instruction.
58 void SetMethodAddress(u32 address);
20 59
21MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, 60 /// Calls a GPU Engine method with the input parameter.
22 const std::vector<u32>& code_) 61 void Send(u32 value);
23 : maxwell3d{maxwell3d_}, code{code_} {} 62
63 /// Reads a GPU register located at the method address.
64 u32 Read(u32 method) const;
65
66 /// Returns the next parameter in the parameter queue.
67 u32 FetchParameter();
68
69 Engines::Maxwell3D& maxwell3d;
70
71 /// Current program counter
72 u32 pc{};
73 /// Program counter to execute at after the delay slot is executed.
74 std::optional<u32> delayed_pc;
75
76 /// General purpose macro registers.
77 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
78
79 /// Method address to use for the next Send instruction.
80 Macro::MethodAddress method_address = {};
81
82 /// Input parameters of the current macro.
83 std::unique_ptr<u32[]> parameters;
84 std::size_t num_parameters = 0;
85 std::size_t parameters_capacity = 0;
86 /// Index of the next parameter that will be fetched by the 'parm' instruction.
87 u32 next_parameter_index = 0;
88
89 bool carry_flag = false;
90 const std::vector<u32>& code;
91};
24 92
25void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { 93void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
26 MICROPROFILE_SCOPE(MacroInterp); 94 MICROPROFILE_SCOPE(MacroInterp);
@@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() {
283 ASSERT(next_parameter_index < num_parameters); 351 ASSERT(next_parameter_index < num_parameters);
284 return parameters[next_parameter_index++]; 352 return parameters[next_parameter_index++];
285} 353}
354} // Anonymous namespace
355
356MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
357 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
358
359std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
360 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
361}
286 362
287} // namespace Tegra 363} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index d50c619ce..8a9648e46 100644
--- a/src/video_core/macro/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -3,10 +3,9 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6#include <array> 6
7#include <optional>
8#include <vector> 7#include <vector>
9#include "common/bit_field.h" 8
10#include "common/common_types.h" 9#include "common/common_types.h"
11#include "video_core/macro/macro.h" 10#include "video_core/macro/macro.h"
12 11
@@ -26,77 +25,4 @@ private:
26 Engines::Maxwell3D& maxwell3d; 25 Engines::Maxwell3D& maxwell3d;
27}; 26};
28 27
29class MacroInterpreterImpl : public CachedMacro {
30public:
31 explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
32 void Execute(const std::vector<u32>& params, u32 method) override;
33
34private:
35 /// Resets the execution engine state, zeroing registers, etc.
36 void Reset();
37
38 /**
39 * Executes a single macro instruction located at the current program counter. Returns whether
40 * the interpreter should keep running.
41 *
42 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
43 * previous instruction.
44 */
45 bool Step(bool is_delay_slot);
46
47 /// Calculates the result of an ALU operation. src_a OP src_b;
48 u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
49
50 /// Performs the result operation on the input result and stores it in the specified register
51 /// (if necessary).
52 void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
53
54 /// Evaluates the branch condition and returns whether the branch should be taken or not.
55 bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
56
57 /// Reads an opcode at the current program counter location.
58 Macro::Opcode GetOpcode() const;
59
60 /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
61 u32 GetRegister(u32 register_id) const;
62
63 /// Sets the register to the input value.
64 void SetRegister(u32 register_id, u32 value);
65
66 /// Sets the method address to use for the next Send instruction.
67 void SetMethodAddress(u32 address);
68
69 /// Calls a GPU Engine method with the input parameter.
70 void Send(u32 value);
71
72 /// Reads a GPU register located at the method address.
73 u32 Read(u32 method) const;
74
75 /// Returns the next parameter in the parameter queue.
76 u32 FetchParameter();
77
78 Engines::Maxwell3D& maxwell3d;
79
80 /// Current program counter
81 u32 pc;
82 /// Program counter to execute at after the delay slot is executed.
83 std::optional<u32> delayed_pc;
84
85 /// General purpose macro registers.
86 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
87
88 /// Method address to use for the next Send instruction.
89 Macro::MethodAddress method_address = {};
90
91 /// Input parameters of the current macro.
92 std::unique_ptr<u32[]> parameters;
93 std::size_t num_parameters = 0;
94 std::size_t parameters_capacity = 0;
95 /// Index of the next parameter that will be fetched by the 'parm' instruction.
96 u32 next_parameter_index = 0;
97
98 bool carry_flag = false;
99 const std::vector<u32>& code;
100};
101
102} // namespace Tegra 28} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index c6b2b2109..47b28ad16 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -2,9 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <bitset>
7#include <optional>
8
9#include <xbyak/xbyak.h>
10
5#include "common/assert.h" 11#include "common/assert.h"
12#include "common/bit_field.h"
6#include "common/logging/log.h" 13#include "common/logging/log.h"
7#include "common/microprofile.h" 14#include "common/microprofile.h"
15#include "common/x64/xbyak_abi.h"
8#include "common/x64/xbyak_util.h" 16#include "common/x64/xbyak_util.h"
9#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
10#include "video_core/macro/macro_interpreter.h" 18#include "video_core/macro/macro_interpreter.h"
@@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); 22MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
15 23
16namespace Tegra { 24namespace Tegra {
25namespace {
17constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; 26constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
18constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; 27constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
19constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; 28constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
20constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; 29constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
21constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; 30constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
22 31
23static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ 32constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
24 STATE, 33 STATE,
25 RESULT, 34 RESULT,
26 PARAMETERS, 35 PARAMETERS,
@@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
28 BRANCH_HOLDER, 37 BRANCH_HOLDER,
29}); 38});
30 39
31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) 40// Arbitrarily chosen based on current booting games.
32 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} 41constexpr size_t MAX_CODE_SIZE = 0x10000;
33 42
34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { 43std::bitset<32> PersistentCallerSavedRegs() {
35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code); 44 return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
36} 45}
37 46
38MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) 47class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
39 : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { 48public:
40 Compile(); 49 explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
41} 50 : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
51 Compile();
52 }
53
54 void Execute(const std::vector<u32>& parameters, u32 method) override;
55
56 void Compile_ALU(Macro::Opcode opcode);
57 void Compile_AddImmediate(Macro::Opcode opcode);
58 void Compile_ExtractInsert(Macro::Opcode opcode);
59 void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
60 void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
61 void Compile_Read(Macro::Opcode opcode);
62 void Compile_Branch(Macro::Opcode opcode);
63
64private:
65 void Optimizer_ScanFlags();
66
67 void Compile();
68 bool Compile_NextInstruction();
69
70 Xbyak::Reg32 Compile_FetchParameter();
71 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
72
73 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
74 void Compile_Send(Xbyak::Reg32 value);
42 75
43MacroJITx64Impl::~MacroJITx64Impl() = default; 76 Macro::Opcode GetOpCode() const;
77
78 struct JITState {
79 Engines::Maxwell3D* maxwell3d{};
80 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
81 u32 carry_flag{};
82 };
83 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
84 using ProgramType = void (*)(JITState*, const u32*);
85
86 struct OptimizerState {
87 bool can_skip_carry{};
88 bool has_delayed_pc{};
89 bool zero_reg_skip{};
90 bool skip_dummy_addimmediate{};
91 bool optimize_for_method_move{};
92 bool enable_asserts{};
93 };
94 OptimizerState optimizer{};
95
96 std::optional<Macro::Opcode> next_opcode{};
97 ProgramType program{nullptr};
98
99 std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
100 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
101 Xbyak::Label end_of_code{};
102
103 bool is_delay_slot{};
104 u32 pc{};
105
106 const std::vector<u32>& code;
107 Engines::Maxwell3D& maxwell3d;
108};
44 109
45void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { 110void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
46 MICROPROFILE_SCOPE(MacroJitExecute); 111 MICROPROFILE_SCOPE(MacroJitExecute);
@@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
307 Compile_ProcessResult(opcode.result_operation, opcode.dst); 372 Compile_ProcessResult(opcode.result_operation, opcode.dst);
308} 373}
309 374
310static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { 375void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
311 maxwell3d->CallMethodFromMME(method_address.address, value); 376 maxwell3d->CallMethodFromMME(method_address.address, value);
312} 377}
313 378
314void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { 379void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
315 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); 380 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
316 mov(Common::X64::ABI_PARAM1, qword[STATE]); 381 mov(Common::X64::ABI_PARAM1, qword[STATE]);
317 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); 382 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
@@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
338 L(dont_process); 403 L(dont_process);
339} 404}
340 405
341void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { 406void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
342 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); 407 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
343 const s32 jump_address = 408 const s32 jump_address =
344 static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); 409 static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
@@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
392 L(end); 457 L(end);
393} 458}
394 459
395void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { 460void MacroJITx64Impl::Optimizer_ScanFlags() {
396 optimizer.can_skip_carry = true; 461 optimizer.can_skip_carry = true;
397 optimizer.has_delayed_pc = false; 462 optimizer.has_delayed_pc = false;
398 for (auto raw_op : code) { 463 for (auto raw_op : code) {
@@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
534 return true; 599 return true;
535} 600}
536 601
537Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { 602Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
538 mov(eax, dword[PARAMETERS]); 603 mov(eax, dword[PARAMETERS]);
539 add(PARAMETERS, sizeof(u32)); 604 add(PARAMETERS, sizeof(u32));
540 return eax; 605 return eax;
@@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const {
611 ASSERT(pc < code.size()); 676 ASSERT(pc < code.size());
612 return {code[pc]}; 677 return {code[pc]};
613} 678}
679} // Anonymous namespace
614 680
615std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { 681MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
616 return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; 682 : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
617}
618 683
684std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
685 return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
686}
619} // namespace Tegra 687} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index d03d480b4..773b037ae 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -4,12 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <bitset>
9#include <xbyak/xbyak.h>
10#include "common/bit_field.h"
11#include "common/common_types.h" 7#include "common/common_types.h"
12#include "common/x64/xbyak_abi.h"
13#include "video_core/macro/macro.h" 8#include "video_core/macro/macro.h"
14 9
15namespace Tegra { 10namespace Tegra {
@@ -18,9 +13,6 @@ namespace Engines {
18class Maxwell3D; 13class Maxwell3D;
19} 14}
20 15
21/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
22constexpr size_t MAX_CODE_SIZE = 0x10000;
23
24class MacroJITx64 final : public MacroEngine { 16class MacroJITx64 final : public MacroEngine {
25public: 17public:
26 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); 18 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
@@ -32,67 +24,4 @@ private:
32 Engines::Maxwell3D& maxwell3d; 24 Engines::Maxwell3D& maxwell3d;
33}; 25};
34 26
35class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
36public:
37 explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
38 ~MacroJITx64Impl();
39
40 void Execute(const std::vector<u32>& parameters, u32 method) override;
41
42 void Compile_ALU(Macro::Opcode opcode);
43 void Compile_AddImmediate(Macro::Opcode opcode);
44 void Compile_ExtractInsert(Macro::Opcode opcode);
45 void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
46 void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
47 void Compile_Read(Macro::Opcode opcode);
48 void Compile_Branch(Macro::Opcode opcode);
49
50private:
51 void Optimizer_ScanFlags();
52
53 void Compile();
54 bool Compile_NextInstruction();
55
56 Xbyak::Reg32 Compile_FetchParameter();
57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
58
59 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
60 void Compile_Send(Xbyak::Reg32 value);
61
62 Macro::Opcode GetOpCode() const;
63 std::bitset<32> PersistentCallerSavedRegs() const;
64
65 struct JITState {
66 Engines::Maxwell3D* maxwell3d{};
67 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
68 u32 carry_flag{};
69 };
70 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
71 using ProgramType = void (*)(JITState*, const u32*);
72
73 struct OptimizerState {
74 bool can_skip_carry{};
75 bool has_delayed_pc{};
76 bool zero_reg_skip{};
77 bool skip_dummy_addimmediate{};
78 bool optimize_for_method_move{};
79 bool enable_asserts{};
80 };
81 OptimizerState optimizer{};
82
83 std::optional<Macro::Opcode> next_opcode{};
84 ProgramType program{nullptr};
85
86 std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
87 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
88 Xbyak::Label end_of_code{};
89
90 bool is_delay_slot{};
91 u32 pc{};
92 std::optional<u32> delayed_pc;
93
94 const std::vector<u32>& code;
95 Engines::Maxwell3D& maxwell3d;
96};
97
98} // namespace Tegra 27} // namespace Tegra
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 7029287a9..752504236 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -403,10 +403,22 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
403 connect(button, &QPushButton::customContextMenuRequested, 403 connect(button, &QPushButton::customContextMenuRequested,
404 [=, this](const QPoint& menu_location) { 404 [=, this](const QPoint& menu_location) {
405 QMenu context_menu; 405 QMenu context_menu;
406 Common::ParamPackage param = emulated_controller->GetMotionParam(motion_id);
406 context_menu.addAction(tr("Clear"), [&] { 407 context_menu.addAction(tr("Clear"), [&] {
407 emulated_controller->SetMotionParam(motion_id, {}); 408 emulated_controller->SetMotionParam(motion_id, {});
408 motion_map[motion_id]->setText(tr("[not set]")); 409 motion_map[motion_id]->setText(tr("[not set]"));
409 }); 410 });
411 if (param.Has("motion")) {
412 context_menu.addAction(tr("Set gyro threshold"), [&] {
413 const int gyro_threshold =
414 static_cast<int>(param.Get("threshold", 0.007f) * 1000.0f);
415 const int new_threshold = QInputDialog::getInt(
416 this, tr("Set threshold"), tr("Choose a value between 0% and 100%"),
417 gyro_threshold, 0, 100);
418 param.Set("threshold", new_threshold / 1000.0f);
419 emulated_controller->SetMotionParam(motion_id, param);
420 });
421 }
410 context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location)); 422 context_menu.exec(motion_map[motion_id]->mapToGlobal(menu_location));
411 }); 423 });
412 } 424 }