summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar liamwhite2023-11-30 09:20:55 -0500
committerGravatar GitHub2023-11-30 09:20:55 -0500
commit57a391e71db13ade7a3d96f59d53781eff18d2ac (patch)
tree0b4223de40a2d77598ac9095b1374353c2e9da7c /src
parentMerge pull request #12223 from liamwhite/fruit-company (diff)
parentcore: Rename patcher file (diff)
downloadyuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.tar.gz
yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.tar.xz
yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.zip
Merge pull request #12074 from GPUCode/yuwu-on-the-metal
Implement Native Code Execution (NCE)
Diffstat (limited to 'src')
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt5
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt1
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt9
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt1
-rw-r--r--src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt4
-rw-r--r--src/android/app/src/main/jni/native.cpp8
-rw-r--r--src/android/app/src/main/res/values/arrays.xml18
-rw-r--r--src/android/app/src/main/res/values/strings.xml5
-rw-r--r--src/common/CMakeLists.txt10
-rw-r--r--src/common/free_region_manager.h55
-rw-r--r--src/common/host_memory.cpp191
-rw-r--r--src/common/host_memory.h15
-rw-r--r--src/common/settings.cpp17
-rw-r--r--src/common/settings.h11
-rw-r--r--src/common/settings_enums.h2
-rw-r--r--src/common/signal_chain.cpp42
-rw-r--r--src/common/signal_chain.h19
-rw-r--r--src/common/wall_clock.cpp4
-rw-r--r--src/core/CMakeLists.txt16
-rw-r--r--src/core/arm/arm_interface.cpp2
-rw-r--r--src/core/arm/arm_interface.h3
-rw-r--r--src/core/arm/nce/arm_nce.cpp400
-rw-r--r--src/core/arm/nce/arm_nce.h108
-rw-r--r--src/core/arm/nce/arm_nce.s222
-rw-r--r--src/core/arm/nce/arm_nce_asm_definitions.h29
-rw-r--r--src/core/arm/nce/guest_context.h50
-rw-r--r--src/core/arm/nce/instructions.h147
-rw-r--r--src/core/arm/nce/patcher.cpp474
-rw-r--r--src/core/arm/nce/patcher.h98
-rw-r--r--src/core/cpu_manager.cpp2
-rw-r--r--src/core/device_memory.cpp3
-rw-r--r--src/core/hle/kernel/code_set.h14
-rw-r--r--src/core/hle/kernel/k_address_space_info.cpp4
-rw-r--r--src/core/hle/kernel/k_page_table_base.cpp33
-rw-r--r--src/core/hle/kernel/k_page_table_base.h3
-rw-r--r--src/core/hle/kernel/k_process.cpp23
-rw-r--r--src/core/hle/kernel/k_process.h14
-rw-r--r--src/core/hle/kernel/k_process_page_table.h9
-rw-r--r--src/core/hle/kernel/k_thread.h16
-rw-r--r--src/core/hle/kernel/physical_core.cpp14
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp63
-rw-r--r--src/core/loader/kip.cpp3
-rw-r--r--src/core/loader/nro.cpp63
-rw-r--r--src/core/loader/nro.h2
-rw-r--r--src/core/loader/nso.cpp67
-rw-r--r--src/core/loader/nso.h7
-rw-r--r--src/core/memory.cpp71
-rw-r--r--src/core/memory.h18
-rw-r--r--src/tests/common/host_memory.cpp71
-rw-r--r--src/yuzu/configuration/configure_cpu.cpp12
-rw-r--r--src/yuzu/configuration/configure_cpu.h1
-rw-r--r--src/yuzu/configuration/configure_cpu.ui30
-rw-r--r--src/yuzu/configuration/shared_translation.cpp6
53 files changed, 2374 insertions, 141 deletions
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt
index f2ba2504c..e0f01127c 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/NativeLibrary.kt
@@ -300,6 +300,11 @@ object NativeLibrary {
300 external fun getPerfStats(): DoubleArray 300 external fun getPerfStats(): DoubleArray
301 301
302 /** 302 /**
303 * Returns the current CPU backend.
304 */
305 external fun getCpuBackend(): String
306
307 /**
303 * Notifies the core emulation that the orientation has changed. 308 * Notifies the core emulation that the orientation has changed.
304 */ 309 */
305 external fun notifyOrientationChange(layout_option: Int, rotation: Int) 310 external fun notifyOrientationChange(layout_option: Int, rotation: Int)
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt
index 151362124..ef10b209f 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt
@@ -10,6 +10,7 @@ enum class IntSetting(
10 override val category: Settings.Category, 10 override val category: Settings.Category,
11 override val androidDefault: Int? = null 11 override val androidDefault: Int? = null
12) : AbstractIntSetting { 12) : AbstractIntSetting {
13 CPU_BACKEND("cpu_backend", Settings.Category.Cpu),
13 CPU_ACCURACY("cpu_accuracy", Settings.Category.Cpu), 14 CPU_ACCURACY("cpu_accuracy", Settings.Category.Cpu),
14 REGION_INDEX("region_index", Settings.Category.System), 15 REGION_INDEX("region_index", Settings.Category.System),
15 LANGUAGE_INDEX("language_index", Settings.Category.System), 16 LANGUAGE_INDEX("language_index", Settings.Category.System),
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
index 6aba69dbe..e198b18a0 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
@@ -79,6 +79,15 @@ abstract class SettingsItem(
79 ) 79 )
80 put( 80 put(
81 SingleChoiceSetting( 81 SingleChoiceSetting(
82 IntSetting.CPU_BACKEND,
83 R.string.cpu_backend,
84 0,
85 R.array.cpuBackendArm64Names,
86 R.array.cpuBackendArm64Values
87 )
88 )
89 put(
90 SingleChoiceSetting(
82 IntSetting.CPU_ACCURACY, 91 IntSetting.CPU_ACCURACY,
83 R.string.cpu_accuracy, 92 R.string.cpu_accuracy,
84 0, 93 0,
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
index 8b71e32f3..7425728c6 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
@@ -269,6 +269,7 @@ class SettingsFragmentPresenter(
269 add(BooleanSetting.RENDERER_DEBUG.key) 269 add(BooleanSetting.RENDERER_DEBUG.key)
270 270
271 add(HeaderSetting(R.string.cpu)) 271 add(HeaderSetting(R.string.cpu))
272 add(IntSetting.CPU_BACKEND.key)
272 add(IntSetting.CPU_ACCURACY.key) 273 add(IntSetting.CPU_ACCURACY.key)
273 add(BooleanSetting.CPU_DEBUG_MODE.key) 274 add(BooleanSetting.CPU_DEBUG_MODE.key)
274 add(SettingsItem.FASTMEM_COMBINED) 275 add(SettingsItem.FASTMEM_COMBINED)
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt
index c32fa0d7e..734c1d5ca 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt
@@ -414,8 +414,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback {
414 perfStatsUpdater = { 414 perfStatsUpdater = {
415 if (emulationViewModel.emulationStarted.value) { 415 if (emulationViewModel.emulationStarted.value) {
416 val perfStats = NativeLibrary.getPerfStats() 416 val perfStats = NativeLibrary.getPerfStats()
417 val cpuBackend = NativeLibrary.getCpuBackend()
417 if (_binding != null) { 418 if (_binding != null) {
418 binding.showFpsText.text = String.format("FPS: %.1f", perfStats[FPS]) 419 binding.showFpsText.text =
420 String.format("FPS: %.1f\n%s", perfStats[FPS], cpuBackend)
419 } 421 }
420 perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800) 422 perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800)
421 } 423 }
diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp
index 617288ae4..ed5ce6f8a 100644
--- a/src/android/app/src/main/jni/native.cpp
+++ b/src/android/app/src/main/jni/native.cpp
@@ -694,6 +694,14 @@ jdoubleArray Java_org_yuzu_yuzu_1emu_NativeLibrary_getPerfStats(JNIEnv* env, jcl
694 return j_stats; 694 return j_stats;
695} 695}
696 696
697jstring Java_org_yuzu_yuzu_1emu_NativeLibrary_getCpuBackend(JNIEnv* env, jclass clazz) {
698 if (Settings::IsNceEnabled()) {
699 return ToJString(env, "NCE");
700 }
701
702 return ToJString(env, "JIT");
703}
704
697void Java_org_yuzu_yuzu_1emu_utils_DirectoryInitialization_setSysDirectory(JNIEnv* env, 705void Java_org_yuzu_yuzu_1emu_utils_DirectoryInitialization_setSysDirectory(JNIEnv* env,
698 jclass clazz, 706 jclass clazz,
699 jstring j_path) {} 707 jstring j_path) {}
diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml
index 51bcc49a3..ab435dce9 100644
--- a/src/android/app/src/main/res/values/arrays.xml
+++ b/src/android/app/src/main/res/values/arrays.xml
@@ -175,6 +175,24 @@
175 <item>2</item> 175 <item>2</item>
176 </integer-array> 176 </integer-array>
177 177
178 <string-array name="cpuBackendArm64Names">
179 <item>@string/cpu_backend_dynarmic</item>
180 <item>@string/cpu_backend_nce</item>
181 </string-array>
182
183 <integer-array name="cpuBackendArm64Values">
184 <item>0</item>
185 <item>1</item>
186 </integer-array>
187
188 <string-array name="cpuBackendX86Names">
189 <item>@string/cpu_backend_dynarmic</item>
190 </string-array>
191
192 <integer-array name="cpuBackendX86Values">
193 <item>0</item>
194 </integer-array>
195
178 <string-array name="cpuAccuracyNames"> 196 <string-array name="cpuAccuracyNames">
179 <item>@string/auto</item> 197 <item>@string/auto</item>
180 <item>@string/cpu_accuracy_accurate</item> 198 <item>@string/cpu_accuracy_accurate</item>
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index fa9b153b6..a6ccef8a1 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -191,6 +191,7 @@
191 <string name="frame_limit_enable_description">Limits emulation speed to a specified percentage of normal speed.</string> 191 <string name="frame_limit_enable_description">Limits emulation speed to a specified percentage of normal speed.</string>
192 <string name="frame_limit_slider">Limit speed percent</string> 192 <string name="frame_limit_slider">Limit speed percent</string>
193 <string name="frame_limit_slider_description">Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit.</string> 193 <string name="frame_limit_slider_description">Specifies the percentage to limit emulation speed. 100% is the normal speed. Values higher or lower will increase or decrease the speed limit.</string>
194 <string name="cpu_backend">CPU backend</string>
194 <string name="cpu_accuracy">CPU accuracy</string> 195 <string name="cpu_accuracy">CPU accuracy</string>
195 <string name="value_with_units">%1$s%2$s</string> 196 <string name="value_with_units">%1$s%2$s</string>
196 197
@@ -423,6 +424,10 @@
423 <string name="ratio_force_sixteen_ten">Force 16:10</string> 424 <string name="ratio_force_sixteen_ten">Force 16:10</string>
424 <string name="ratio_stretch">Stretch to window</string> 425 <string name="ratio_stretch">Stretch to window</string>
425 426
427 <!-- CPU Backend -->
428 <string name="cpu_backend_dynarmic">Dynarmic (Slow)</string>
429 <string name="cpu_backend_nce">Native code execution (NCE)</string>
430
426 <!-- CPU Accuracy --> 431 <!-- CPU Accuracy -->
427 <string name="cpu_accuracy_accurate">Accurate</string> 432 <string name="cpu_accuracy_accurate">Accurate</string>
428 <string name="cpu_accuracy_unsafe">Unsafe</string> 433 <string name="cpu_accuracy_unsafe">Unsafe</string>
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2651daadb..bbc55eb56 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -52,6 +52,7 @@ add_library(common STATIC
52 fiber.cpp 52 fiber.cpp
53 fiber.h 53 fiber.h
54 fixed_point.h 54 fixed_point.h
55 free_region_manager.h
55 fs/file.cpp 56 fs/file.cpp
56 fs/file.h 57 fs/file.h
57 fs/fs.cpp 58 fs/fs.cpp
@@ -166,6 +167,13 @@ if (WIN32)
166 target_link_libraries(common PRIVATE ntdll) 167 target_link_libraries(common PRIVATE ntdll)
167endif() 168endif()
168 169
170if (NOT WIN32)
171 target_sources(common PRIVATE
172 signal_chain.cpp
173 signal_chain.h
174 )
175endif()
176
169if(ANDROID) 177if(ANDROID)
170 target_sources(common 178 target_sources(common
171 PRIVATE 179 PRIVATE
@@ -200,7 +208,7 @@ if(ARCHITECTURE_x86_64)
200 target_link_libraries(common PRIVATE xbyak::xbyak) 208 target_link_libraries(common PRIVATE xbyak::xbyak)
201endif() 209endif()
202 210
203if (ARCHITECTURE_arm64 AND (ANDROID OR LINUX)) 211if (HAS_NCE)
204 target_sources(common 212 target_sources(common
205 PRIVATE 213 PRIVATE
206 arm64/native_clock.cpp 214 arm64/native_clock.cpp
diff --git a/src/common/free_region_manager.h b/src/common/free_region_manager.h
new file mode 100644
index 000000000..2e590d609
--- /dev/null
+++ b/src/common/free_region_manager.h
@@ -0,0 +1,55 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <mutex>
7#include <boost/icl/interval_set.hpp>
8
9namespace Common {
10
11class FreeRegionManager {
12public:
13 explicit FreeRegionManager() = default;
14 ~FreeRegionManager() = default;
15
16 void SetAddressSpace(void* start, size_t size) {
17 this->FreeBlock(start, size);
18 }
19
20 std::pair<void*, size_t> FreeBlock(void* block_ptr, size_t size) {
21 std::scoped_lock lk(m_mutex);
22
23 // Check to see if we are adjacent to any regions.
24 auto start_address = reinterpret_cast<uintptr_t>(block_ptr);
25 auto end_address = start_address + size;
26 auto it = m_free_regions.find({start_address - 1, end_address + 1});
27
28 // If we are, join with them, ensuring we stay in bounds.
29 if (it != m_free_regions.end()) {
30 start_address = std::min(start_address, it->lower());
31 end_address = std::max(end_address, it->upper());
32 }
33
34 // Free the relevant region.
35 m_free_regions.insert({start_address, end_address});
36
37 // Return the adjusted pointers.
38 block_ptr = reinterpret_cast<void*>(start_address);
39 size = end_address - start_address;
40 return {block_ptr, size};
41 }
42
43 void AllocateBlock(void* block_ptr, size_t size) {
44 std::scoped_lock lk(m_mutex);
45
46 auto address = reinterpret_cast<uintptr_t>(block_ptr);
47 m_free_regions.subtract({address, address + size});
48 }
49
50private:
51 std::mutex m_mutex;
52 boost::icl::interval_set<uintptr_t> m_free_regions;
53};
54
55} // namespace Common
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index ba22595e0..3a9ea6eb4 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -21,15 +21,18 @@
21#include <boost/icl/interval_set.hpp> 21#include <boost/icl/interval_set.hpp>
22#include <fcntl.h> 22#include <fcntl.h>
23#include <sys/mman.h> 23#include <sys/mman.h>
24#include <sys/random.h>
24#include <unistd.h> 25#include <unistd.h>
25#include "common/scope_exit.h" 26#include "common/scope_exit.h"
26 27
27#endif // ^^^ Linux ^^^ 28#endif // ^^^ Linux ^^^
28 29
29#include <mutex> 30#include <mutex>
31#include <random>
30 32
31#include "common/alignment.h" 33#include "common/alignment.h"
32#include "common/assert.h" 34#include "common/assert.h"
35#include "common/free_region_manager.h"
33#include "common/host_memory.h" 36#include "common/host_memory.h"
34#include "common/logging/log.h" 37#include "common/logging/log.h"
35 38
@@ -141,7 +144,7 @@ public:
141 Release(); 144 Release();
142 } 145 }
143 146
144 void Map(size_t virtual_offset, size_t host_offset, size_t length) { 147 void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
145 std::unique_lock lock{placeholder_mutex}; 148 std::unique_lock lock{placeholder_mutex};
146 if (!IsNiechePlaceholder(virtual_offset, length)) { 149 if (!IsNiechePlaceholder(virtual_offset, length)) {
147 Split(virtual_offset, length); 150 Split(virtual_offset, length);
@@ -160,7 +163,7 @@ public:
160 } 163 }
161 } 164 }
162 165
163 void Protect(size_t virtual_offset, size_t length, bool read, bool write) { 166 void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
164 DWORD new_flags{}; 167 DWORD new_flags{};
165 if (read && write) { 168 if (read && write) {
166 new_flags = PAGE_READWRITE; 169 new_flags = PAGE_READWRITE;
@@ -186,6 +189,11 @@ public:
186 } 189 }
187 } 190 }
188 191
192 void EnableDirectMappedAddress() {
193 // TODO
194 UNREACHABLE();
195 }
196
189 const size_t backing_size; ///< Size of the backing memory in bytes 197 const size_t backing_size; ///< Size of the backing memory in bytes
190 const size_t virtual_size; ///< Size of the virtual address placeholder in bytes 198 const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
191 199
@@ -353,6 +361,55 @@ private:
353 361
354#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv 362#elif defined(__linux__) || defined(__FreeBSD__) // ^^^ Windows ^^^ vvv Linux vvv
355 363
364#ifdef ARCHITECTURE_arm64
365
366static void* ChooseVirtualBase(size_t virtual_size) {
367 constexpr uintptr_t Map39BitSize = (1ULL << 39);
368 constexpr uintptr_t Map36BitSize = (1ULL << 36);
369
370 // This is not a cryptographic application, we just want something random.
371 std::mt19937_64 rng;
372
373 // We want to ensure we are allocating at an address aligned to the L2 block size.
374 // For Qualcomm devices, we must also allocate memory above 36 bits.
375 const size_t lower = Map36BitSize / HugePageSize;
376 const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
377 const size_t range = upper - lower;
378
379 // Try up to 64 times to allocate memory at random addresses in the range.
380 for (int i = 0; i < 64; i++) {
381 // Calculate a possible location.
382 uintptr_t hint_address = ((rng() % range) + lower) * HugePageSize;
383
384 // Try to map.
385 // Note: we may be able to take advantage of MAP_FIXED_NOREPLACE here.
386 void* map_pointer =
387 mmap(reinterpret_cast<void*>(hint_address), virtual_size, PROT_READ | PROT_WRITE,
388 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
389
390 // If we successfully mapped, we're done.
391 if (reinterpret_cast<uintptr_t>(map_pointer) == hint_address) {
392 return map_pointer;
393 }
394
395 // Unmap if necessary, and try again.
396 if (map_pointer != MAP_FAILED) {
397 munmap(map_pointer, virtual_size);
398 }
399 }
400
401 return MAP_FAILED;
402}
403
404#else
405
406static void* ChooseVirtualBase(size_t virtual_size) {
407 return mmap(nullptr, virtual_size, PROT_READ | PROT_WRITE,
408 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
409}
410
411#endif
412
356class HostMemory::Impl { 413class HostMemory::Impl {
357public: 414public:
358 explicit Impl(size_t backing_size_, size_t virtual_size_) 415 explicit Impl(size_t backing_size_, size_t virtual_size_)
@@ -415,8 +472,7 @@ public:
415 } 472 }
416 } 473 }
417#else 474#else
418 virtual_base = static_cast<u8*>(mmap(nullptr, virtual_size, PROT_NONE, 475 virtual_base = virtual_map_base = static_cast<u8*>(ChooseVirtualBase(virtual_size));
419 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0));
420 if (virtual_base == MAP_FAILED) { 476 if (virtual_base == MAP_FAILED) {
421 LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno)); 477 LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
422 throw std::bad_alloc{}; 478 throw std::bad_alloc{};
@@ -424,7 +480,7 @@ public:
424 madvise(virtual_base, virtual_size, MADV_HUGEPAGE); 480 madvise(virtual_base, virtual_size, MADV_HUGEPAGE);
425#endif 481#endif
426 482
427 placeholders.add({0, virtual_size}); 483 free_manager.SetAddressSpace(virtual_base, virtual_size);
428 good = true; 484 good = true;
429 } 485 }
430 486
@@ -432,14 +488,29 @@ public:
432 Release(); 488 Release();
433 } 489 }
434 490
435 void Map(size_t virtual_offset, size_t host_offset, size_t length) { 491 void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms) {
436 { 492 // Intersect the range with our address space.
437 std::scoped_lock lock{placeholder_mutex}; 493 AdjustMap(&virtual_offset, &length);
438 placeholders.subtract({virtual_offset, virtual_offset + length}); 494
495 // We are removing a placeholder.
496 free_manager.AllocateBlock(virtual_base + virtual_offset, length);
497
498 // Deduce mapping protection flags.
499 int flags = PROT_NONE;
500 if (True(perms & MemoryPermission::Read)) {
501 flags |= PROT_READ;
439 } 502 }
503 if (True(perms & MemoryPermission::Write)) {
504 flags |= PROT_WRITE;
505 }
506#ifdef ARCHITECTURE_arm64
507 if (True(perms & MemoryPermission::Execute)) {
508 flags |= PROT_EXEC;
509 }
510#endif
440 511
441 void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE, 512 void* ret = mmap(virtual_base + virtual_offset, length, flags, MAP_SHARED | MAP_FIXED, fd,
442 MAP_SHARED | MAP_FIXED, fd, host_offset); 513 host_offset);
443 ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); 514 ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
444 } 515 }
445 516
@@ -447,47 +518,54 @@ public:
447 // The method name is wrong. We're still talking about the virtual range. 518 // The method name is wrong. We're still talking about the virtual range.
448 // We don't want to unmap, we want to reserve this memory. 519 // We don't want to unmap, we want to reserve this memory.
449 520
450 { 521 // Intersect the range with our address space.
451 std::scoped_lock lock{placeholder_mutex}; 522 AdjustMap(&virtual_offset, &length);
452 auto it = placeholders.find({virtual_offset - 1, virtual_offset + length + 1});
453 523
454 if (it != placeholders.end()) { 524 // Merge with any adjacent placeholder mappings.
455 size_t prev_upper = virtual_offset + length; 525 auto [merged_pointer, merged_size] =
456 virtual_offset = std::min(virtual_offset, it->lower()); 526 free_manager.FreeBlock(virtual_base + virtual_offset, length);
457 length = std::max(it->upper(), prev_upper) - virtual_offset;
458 }
459
460 placeholders.add({virtual_offset, virtual_offset + length});
461 }
462 527
463 void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE, 528 void* ret = mmap(merged_pointer, merged_size, PROT_NONE,
464 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); 529 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
465 ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno)); 530 ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
466 } 531 }
467 532
468 void Protect(size_t virtual_offset, size_t length, bool read, bool write) { 533 void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {
469 int flags = 0; 534 // Intersect the range with our address space.
535 AdjustMap(&virtual_offset, &length);
536
537 int flags = PROT_NONE;
470 if (read) { 538 if (read) {
471 flags |= PROT_READ; 539 flags |= PROT_READ;
472 } 540 }
473 if (write) { 541 if (write) {
474 flags |= PROT_WRITE; 542 flags |= PROT_WRITE;
475 } 543 }
544#ifdef HAS_NCE
545 if (execute) {
546 flags |= PROT_EXEC;
547 }
548#endif
476 int ret = mprotect(virtual_base + virtual_offset, length, flags); 549 int ret = mprotect(virtual_base + virtual_offset, length, flags);
477 ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno)); 550 ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
478 } 551 }
479 552
553 void EnableDirectMappedAddress() {
554 virtual_base = nullptr;
555 }
556
480 const size_t backing_size; ///< Size of the backing memory in bytes 557 const size_t backing_size; ///< Size of the backing memory in bytes
481 const size_t virtual_size; ///< Size of the virtual address placeholder in bytes 558 const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
482 559
483 u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)}; 560 u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)};
484 u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)}; 561 u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)};
562 u8* virtual_map_base{reinterpret_cast<u8*>(MAP_FAILED)};
485 563
486private: 564private:
487 /// Release all resources in the object 565 /// Release all resources in the object
488 void Release() { 566 void Release() {
489 if (virtual_base != MAP_FAILED) { 567 if (virtual_map_base != MAP_FAILED) {
490 int ret = munmap(virtual_base, virtual_size); 568 int ret = munmap(virtual_map_base, virtual_size);
491 ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno)); 569 ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
492 } 570 }
493 571
@@ -502,10 +580,29 @@ private:
502 } 580 }
503 } 581 }
504 582
505 int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create 583 void AdjustMap(size_t* virtual_offset, size_t* length) {
584 if (virtual_base != nullptr) {
585 return;
586 }
587
588 // If we are direct mapped, we want to make sure we are operating on a region
589 // that is in range of our virtual mapping.
590 size_t intended_start = *virtual_offset;
591 size_t intended_end = intended_start + *length;
592 size_t address_space_start = reinterpret_cast<size_t>(virtual_map_base);
593 size_t address_space_end = address_space_start + virtual_size;
594
595 if (address_space_start > intended_end || intended_start > address_space_end) {
596 *virtual_offset = 0;
597 *length = 0;
598 } else {
599 *virtual_offset = std::max(intended_start, address_space_start);
600 *length = std::min(intended_end, address_space_end) - *virtual_offset;
601 }
602 }
506 603
507 boost::icl::interval_set<size_t> placeholders; ///< Mapped placeholders 604 int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
508 std::mutex placeholder_mutex; ///< Mutex for placeholders 605 FreeRegionManager free_manager{};
509}; 606};
510 607
511#else // ^^^ Linux ^^^ vvv Generic vvv 608#else // ^^^ Linux ^^^ vvv Generic vvv
@@ -518,11 +615,13 @@ public:
518 throw std::bad_alloc{}; 615 throw std::bad_alloc{};
519 } 616 }
520 617
521 void Map(size_t virtual_offset, size_t host_offset, size_t length) {} 618 void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm) {}
522 619
523 void Unmap(size_t virtual_offset, size_t length) {} 620 void Unmap(size_t virtual_offset, size_t length) {}
524 621
525 void Protect(size_t virtual_offset, size_t length, bool read, bool write) {} 622 void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute) {}
623
624 void EnableDirectMappedAddress() {}
526 625
527 u8* backing_base{nullptr}; 626 u8* backing_base{nullptr};
528 u8* virtual_base{nullptr}; 627 u8* virtual_base{nullptr};
@@ -535,15 +634,16 @@ HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_)
535 try { 634 try {
536 // Try to allocate a fastmem arena. 635 // Try to allocate a fastmem arena.
537 // The implementation will fail with std::bad_alloc on errors. 636 // The implementation will fail with std::bad_alloc on errors.
538 impl = std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment), 637 impl =
539 AlignUp(virtual_size, PageAlignment) + 638 std::make_unique<HostMemory::Impl>(AlignUp(backing_size, PageAlignment),
540 3 * HugePageSize); 639 AlignUp(virtual_size, PageAlignment) + HugePageSize);
541 backing_base = impl->backing_base; 640 backing_base = impl->backing_base;
542 virtual_base = impl->virtual_base; 641 virtual_base = impl->virtual_base;
543 642
544 if (virtual_base) { 643 if (virtual_base) {
545 virtual_base += 2 * HugePageSize - 1; 644 // Ensure the virtual base is aligned to the L2 block size.
546 virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1); 645 virtual_base = reinterpret_cast<u8*>(
646 Common::AlignUp(reinterpret_cast<uintptr_t>(virtual_base), HugePageSize));
547 virtual_base_offset = virtual_base - impl->virtual_base; 647 virtual_base_offset = virtual_base - impl->virtual_base;
548 } 648 }
549 649
@@ -562,7 +662,8 @@ HostMemory::HostMemory(HostMemory&&) noexcept = default;
562 662
563HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default; 663HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default;
564 664
565void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) { 665void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length,
666 MemoryPermission perms) {
566 ASSERT(virtual_offset % PageAlignment == 0); 667 ASSERT(virtual_offset % PageAlignment == 0);
567 ASSERT(host_offset % PageAlignment == 0); 668 ASSERT(host_offset % PageAlignment == 0);
568 ASSERT(length % PageAlignment == 0); 669 ASSERT(length % PageAlignment == 0);
@@ -571,7 +672,7 @@ void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
571 if (length == 0 || !virtual_base || !impl) { 672 if (length == 0 || !virtual_base || !impl) {
572 return; 673 return;
573 } 674 }
574 impl->Map(virtual_offset + virtual_base_offset, host_offset, length); 675 impl->Map(virtual_offset + virtual_base_offset, host_offset, length, perms);
575} 676}
576 677
577void HostMemory::Unmap(size_t virtual_offset, size_t length) { 678void HostMemory::Unmap(size_t virtual_offset, size_t length) {
@@ -584,14 +685,22 @@ void HostMemory::Unmap(size_t virtual_offset, size_t length) {
584 impl->Unmap(virtual_offset + virtual_base_offset, length); 685 impl->Unmap(virtual_offset + virtual_base_offset, length);
585} 686}
586 687
587void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) { 688void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write,
689 bool execute) {
588 ASSERT(virtual_offset % PageAlignment == 0); 690 ASSERT(virtual_offset % PageAlignment == 0);
589 ASSERT(length % PageAlignment == 0); 691 ASSERT(length % PageAlignment == 0);
590 ASSERT(virtual_offset + length <= virtual_size); 692 ASSERT(virtual_offset + length <= virtual_size);
591 if (length == 0 || !virtual_base || !impl) { 693 if (length == 0 || !virtual_base || !impl) {
592 return; 694 return;
593 } 695 }
594 impl->Protect(virtual_offset + virtual_base_offset, length, read, write); 696 impl->Protect(virtual_offset + virtual_base_offset, length, read, write, execute);
697}
698
699void HostMemory::EnableDirectMappedAddress() {
700 if (impl) {
701 impl->EnableDirectMappedAddress();
702 virtual_size += reinterpret_cast<uintptr_t>(virtual_base);
703 }
595} 704}
596 705
597} // namespace Common 706} // namespace Common
diff --git a/src/common/host_memory.h b/src/common/host_memory.h
index 447975ded..cebfacab2 100644
--- a/src/common/host_memory.h
+++ b/src/common/host_memory.h
@@ -4,11 +4,20 @@
4#pragma once 4#pragma once
5 5
6#include <memory> 6#include <memory>
7#include "common/common_funcs.h"
7#include "common/common_types.h" 8#include "common/common_types.h"
8#include "common/virtual_buffer.h" 9#include "common/virtual_buffer.h"
9 10
10namespace Common { 11namespace Common {
11 12
13enum class MemoryPermission : u32 {
14 Read = 1 << 0,
15 Write = 1 << 1,
16 ReadWrite = Read | Write,
17 Execute = 1 << 2,
18};
19DECLARE_ENUM_FLAG_OPERATORS(MemoryPermission)
20
12/** 21/**
13 * A low level linear memory buffer, which supports multiple mappings 22 * A low level linear memory buffer, which supports multiple mappings
14 * Its purpose is to rebuild a given sparse memory layout, including mirrors. 23 * Its purpose is to rebuild a given sparse memory layout, including mirrors.
@@ -31,11 +40,13 @@ public:
31 HostMemory(HostMemory&& other) noexcept; 40 HostMemory(HostMemory&& other) noexcept;
32 HostMemory& operator=(HostMemory&& other) noexcept; 41 HostMemory& operator=(HostMemory&& other) noexcept;
33 42
34 void Map(size_t virtual_offset, size_t host_offset, size_t length); 43 void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perms);
35 44
36 void Unmap(size_t virtual_offset, size_t length); 45 void Unmap(size_t virtual_offset, size_t length);
37 46
38 void Protect(size_t virtual_offset, size_t length, bool read, bool write); 47 void Protect(size_t virtual_offset, size_t length, bool read, bool write, bool execute = false);
48
49 void EnableDirectMappedAddress();
39 50
40 [[nodiscard]] u8* BackingBasePointer() noexcept { 51 [[nodiscard]] u8* BackingBasePointer() noexcept {
41 return backing_base; 52 return backing_base;
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 3e829253f..4666bd0a0 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -41,6 +41,7 @@ SWITCHABLE(AspectRatio, true);
41SWITCHABLE(AstcDecodeMode, true); 41SWITCHABLE(AstcDecodeMode, true);
42SWITCHABLE(AstcRecompression, true); 42SWITCHABLE(AstcRecompression, true);
43SWITCHABLE(AudioMode, true); 43SWITCHABLE(AudioMode, true);
44SWITCHABLE(CpuBackend, true);
44SWITCHABLE(CpuAccuracy, true); 45SWITCHABLE(CpuAccuracy, true);
45SWITCHABLE(FullscreenMode, true); 46SWITCHABLE(FullscreenMode, true);
46SWITCHABLE(GpuAccuracy, true); 47SWITCHABLE(GpuAccuracy, true);
@@ -155,6 +156,22 @@ bool IsFastmemEnabled() {
155 return true; 156 return true;
156} 157}
157 158
159static bool is_nce_enabled = false;
160
161void SetNceEnabled(bool is_39bit) {
162 const bool is_nce_selected = values.cpu_backend.GetValue() == CpuBackend::Nce;
163 is_nce_enabled = IsFastmemEnabled() && is_nce_selected && is_39bit;
164 if (is_nce_selected && !is_nce_enabled) {
165 LOG_WARNING(
166 Common,
167 "Program does not utilize 39-bit address space, unable to natively execute code");
168 }
169}
170
171bool IsNceEnabled() {
172 return is_nce_enabled;
173}
174
158bool IsDockedMode() { 175bool IsDockedMode() {
159 return values.use_docked_mode.GetValue() == Settings::ConsoleMode::Docked; 176 return values.use_docked_mode.GetValue() == Settings::ConsoleMode::Docked;
160} 177}
diff --git a/src/common/settings.h b/src/common/settings.h
index 6425cd98f..98341ad96 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -63,6 +63,7 @@ SWITCHABLE(AspectRatio, true);
63SWITCHABLE(AstcDecodeMode, true); 63SWITCHABLE(AstcDecodeMode, true);
64SWITCHABLE(AstcRecompression, true); 64SWITCHABLE(AstcRecompression, true);
65SWITCHABLE(AudioMode, true); 65SWITCHABLE(AudioMode, true);
66SWITCHABLE(CpuBackend, true);
66SWITCHABLE(CpuAccuracy, true); 67SWITCHABLE(CpuAccuracy, true);
67SWITCHABLE(FullscreenMode, true); 68SWITCHABLE(FullscreenMode, true);
68SWITCHABLE(GpuAccuracy, true); 69SWITCHABLE(GpuAccuracy, true);
@@ -179,6 +180,14 @@ struct Values {
179 &use_speed_limit}; 180 &use_speed_limit};
180 181
181 // Cpu 182 // Cpu
183 SwitchableSetting<CpuBackend, true> cpu_backend{
184 linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic,
185#ifdef HAS_NCE
186 CpuBackend::Nce,
187#else
188 CpuBackend::Dynarmic,
189#endif
190 "cpu_backend", Category::Cpu};
182 SwitchableSetting<CpuAccuracy, true> cpu_accuracy{linkage, CpuAccuracy::Auto, 191 SwitchableSetting<CpuAccuracy, true> cpu_accuracy{linkage, CpuAccuracy::Auto,
183 CpuAccuracy::Auto, CpuAccuracy::Paranoid, 192 CpuAccuracy::Auto, CpuAccuracy::Paranoid,
184 "cpu_accuracy", Category::Cpu}; 193 "cpu_accuracy", Category::Cpu};
@@ -569,6 +578,8 @@ bool IsGPULevelExtreme();
569bool IsGPULevelHigh(); 578bool IsGPULevelHigh();
570 579
571bool IsFastmemEnabled(); 580bool IsFastmemEnabled();
581void SetNceEnabled(bool is_64bit);
582bool IsNceEnabled();
572 583
573bool IsDockedMode(); 584bool IsDockedMode();
574 585
diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h
index 11429d7a8..d6351e57e 100644
--- a/src/common/settings_enums.h
+++ b/src/common/settings_enums.h
@@ -129,6 +129,8 @@ ENUM(ShaderBackend, Glsl, Glasm, SpirV);
129 129
130ENUM(GpuAccuracy, Normal, High, Extreme); 130ENUM(GpuAccuracy, Normal, High, Extreme);
131 131
132ENUM(CpuBackend, Dynarmic, Nce);
133
132ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid); 134ENUM(CpuAccuracy, Auto, Accurate, Unsafe, Paranoid);
133 135
134ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb); 136ENUM(MemoryLayout, Memory_4Gb, Memory_6Gb, Memory_8Gb);
diff --git a/src/common/signal_chain.cpp b/src/common/signal_chain.cpp
new file mode 100644
index 000000000..2e4fecc48
--- /dev/null
+++ b/src/common/signal_chain.cpp
@@ -0,0 +1,42 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <dlfcn.h>
5
6#include "common/assert.h"
7#include "common/dynamic_library.h"
8#include "common/scope_exit.h"
9#include "common/signal_chain.h"
10
11namespace Common {
12
13template <typename T>
14T* LookupLibcSymbol(const char* name) {
15#if defined(__BIONIC__)
16 Common::DynamicLibrary provider("libc.so");
17 if (!provider.IsOpen()) {
18 UNREACHABLE_MSG("Failed to open libc!");
19 }
20#else
21 // For other operating environments, we assume the symbol is not overridden.
22 const char* base = nullptr;
23 Common::DynamicLibrary provider(base);
24#endif
25
26 void* sym = provider.GetSymbolAddress(name);
27 if (sym == nullptr) {
28 sym = dlsym(RTLD_DEFAULT, name);
29 }
30 if (sym == nullptr) {
31 UNREACHABLE_MSG("Unable to find symbol {}!", name);
32 }
33
34 return reinterpret_cast<T*>(sym);
35}
36
37int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) {
38 static auto libc_sigaction = LookupLibcSymbol<decltype(sigaction)>("sigaction");
39 return libc_sigaction(signum, act, oldact);
40}
41
42} // namespace Common
diff --git a/src/common/signal_chain.h b/src/common/signal_chain.h
new file mode 100644
index 000000000..8d06a1bd1
--- /dev/null
+++ b/src/common/signal_chain.h
@@ -0,0 +1,19 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#ifndef _WIN32
7
8#include <signal.h>
9
10namespace Common {
11
12// Android's ART overrides sigaction with its own wrapper. This is problematic for SIGSEGV
13// in particular, because ART's handler accesses tpidr_el0, which conflicts with NCE.
14// This extracts the libc symbol and calls it directly.
15int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact);
16
17} // namespace Common
18
19#endif
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index caca9a123..012fdc1e0 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -10,7 +10,7 @@
10#include "common/x64/rdtsc.h" 10#include "common/x64/rdtsc.h"
11#endif 11#endif
12 12
13#if defined(ARCHITECTURE_arm64) && defined(__linux__) 13#ifdef HAS_NCE
14#include "common/arm64/native_clock.h" 14#include "common/arm64/native_clock.h"
15#endif 15#endif
16 16
@@ -68,7 +68,7 @@ std::unique_ptr<WallClock> CreateOptimalClock() {
68 // - Is not more precise than 1 GHz (1ns resolution) 68 // - Is not more precise than 1 GHz (1ns resolution)
69 return std::make_unique<StandardWallClock>(); 69 return std::make_unique<StandardWallClock>();
70 } 70 }
71#elif defined(ARCHITECTURE_arm64) && defined(__linux__) 71#elif defined(HAS_NCE)
72 return std::make_unique<Arm64::NativeClock>(); 72 return std::make_unique<Arm64::NativeClock>();
73#else 73#else
74 return std::make_unique<StandardWallClock>(); 74 return std::make_unique<StandardWallClock>();
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 66c10fc3f..85583941c 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -926,6 +926,22 @@ if (ENABLE_WEB_SERVICE)
926 target_link_libraries(core PRIVATE web_service) 926 target_link_libraries(core PRIVATE web_service)
927endif() 927endif()
928 928
929if (HAS_NCE)
930 enable_language(C ASM)
931 set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp")
932
933 target_sources(core PRIVATE
934 arm/nce/arm_nce.cpp
935 arm/nce/arm_nce.h
936 arm/nce/arm_nce.s
937 arm/nce/guest_context.h
938 arm/nce/patcher.cpp
939 arm/nce/patcher.h
940 arm/nce/instructions.h
941 )
942 target_link_libraries(core PRIVATE merry::oaknut)
943endif()
944
929if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) 945if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
930 target_sources(core PRIVATE 946 target_sources(core PRIVATE
931 arm/dynarmic/arm_dynarmic.h 947 arm/dynarmic/arm_dynarmic.h
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index 558fba5bd..d231bf89c 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -201,6 +201,8 @@ void ARM_Interface::Run() {
201 if (True(hr & HaltReason::DataAbort)) { 201 if (True(hr & HaltReason::DataAbort)) {
202 if (system.DebuggerEnabled()) { 202 if (system.DebuggerEnabled()) {
203 system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint()); 203 system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
204 } else {
205 LogBacktrace();
204 } 206 }
205 current_thread->RequestSuspend(SuspendType::Debug); 207 current_thread->RequestSuspend(SuspendType::Debug);
206 break; 208 break;
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 3d866ff6f..a9d9ac09d 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -81,6 +81,9 @@ public:
81 // thread context to be 800 bytes in size. 81 // thread context to be 800 bytes in size.
82 static_assert(sizeof(ThreadContext64) == 0x320); 82 static_assert(sizeof(ThreadContext64) == 0x320);
83 83
84 /// Perform any backend-specific initialization.
85 virtual void Initialize() {}
86
84 /// Runs the CPU until an event happens 87 /// Runs the CPU until an event happens
85 void Run(); 88 void Run();
86 89
diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp
new file mode 100644
index 000000000..f7bdafd39
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.cpp
@@ -0,0 +1,400 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <cinttypes>
5#include <memory>
6
7#include "common/signal_chain.h"
8#include "core/arm/nce/arm_nce.h"
9#include "core/arm/nce/patcher.h"
10#include "core/core.h"
11#include "core/memory.h"
12
13#include "core/hle/kernel/k_process.h"
14
15#include <signal.h>
16#include <sys/syscall.h>
17#include <unistd.h>
18
19namespace Core {
20
21namespace {
22
23struct sigaction g_orig_action;
24
25// Verify assembly offsets.
26using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
27static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext);
28static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
29static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
30
31fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
32 _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
33 while (header->magic != FPSIMD_MAGIC) {
34 header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
35 }
36 return reinterpret_cast<fpsimd_context*>(header);
37}
38
39} // namespace
40
41void* ARM_NCE::RestoreGuestContext(void* raw_context) {
42 // Retrieve the host context.
43 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
44
45 // Thread-local parameters will be located in x9.
46 auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
47 auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
48
49 // Retrieve the host floating point state.
50 auto* fpctx = GetFloatingPointState(host_ctx);
51
52 // Save host callee-saved registers.
53 std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
54 sizeof(guest_ctx->host_ctx.host_saved_vregs));
55 std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
56 sizeof(guest_ctx->host_ctx.host_saved_regs));
57
58 // Save stack pointer.
59 guest_ctx->host_ctx.host_sp = host_ctx.sp;
60
61 // Restore all guest state except tpidr_el0.
62 host_ctx.sp = guest_ctx->sp;
63 host_ctx.pc = guest_ctx->pc;
64 host_ctx.pstate = guest_ctx->pstate;
65 fpctx->fpcr = guest_ctx->fpcr;
66 fpctx->fpsr = guest_ctx->fpsr;
67 std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
68 std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
69
70 // Return the new thread-local storage pointer.
71 return tpidr;
72}
73
74void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
75 // Retrieve the host context.
76 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
77
78 // Retrieve the host floating point state.
79 auto* fpctx = GetFloatingPointState(host_ctx);
80
81 // Save all guest registers except tpidr_el0.
82 std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
83 std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
84 guest_ctx->fpsr = fpctx->fpsr;
85 guest_ctx->fpcr = fpctx->fpcr;
86 guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
87 guest_ctx->pc = host_ctx.pc;
88 guest_ctx->sp = host_ctx.sp;
89
90 // Restore stack pointer.
91 host_ctx.sp = guest_ctx->host_ctx.host_sp;
92
93 // Restore host callee-saved registers.
94 std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
95 sizeof(guest_ctx->host_ctx.host_saved_regs));
96 std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
97 sizeof(guest_ctx->host_ctx.host_saved_vregs));
98
99 // Return from the call on exit by setting pc to x30.
100 host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
101
102 // Clear esr_el1 and return it.
103 host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
104}
105
106bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
107 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
108 auto* info = static_cast<siginfo_t*>(raw_info);
109
110 // Try to handle an invalid access.
111 // TODO: handle accesses which split a page?
112 const Common::ProcessAddress addr =
113 (reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
114 if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
115 // We handled the access successfully and are returning to guest code.
116 return true;
117 }
118
119 // We can't handle the access, so determine why we crashed.
120 const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
121
122 // For data aborts, skip the instruction and return to guest code.
123 // This will allow games to continue in many scenarios where they would otherwise crash.
124 if (!is_prefetch_abort) {
125 host_ctx.pc += 4;
126 return true;
127 }
128
129 // This is a prefetch abort.
130 guest_ctx->esr_el1.fetch_or(static_cast<u64>(HaltReason::PrefetchAbort));
131
132 // Forcibly mark the context as locked. We are still running.
133 // We may race with SignalInterrupt here:
134 // - If we lose the race, then SignalInterrupt will send us a signal we are masking,
135 // and it will do nothing when it is unmasked, as we have already left guest code.
136 // - If we win the race, then SignalInterrupt will wait for us to unlock first.
137 auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters();
138 thread_params.lock.store(SpinLockLocked);
139
140 // Return to host.
141 SaveGuestContext(guest_ctx, raw_context);
142 return false;
143}
144
145void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) {
146 return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
147}
148
149HaltReason ARM_NCE::RunJit() {
150 // Get the thread parameters.
151 // TODO: pass the current thread down from ::Run
152 auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel());
153 auto* thread_params = &thread->GetNativeExecutionParameters();
154
155 {
156 // Lock our core context.
157 std::scoped_lock lk{lock};
158
159 // We should not be running.
160 ASSERT(running_thread == nullptr);
161
162 // Check if we need to run. If we have already been halted, we are done.
163 u64 halt = guest_ctx.esr_el1.exchange(0);
164 if (halt != 0) {
165 return static_cast<HaltReason>(halt);
166 }
167
168 // Mark that we are running.
169 running_thread = thread;
170
171 // Acquire the lock on the thread parameters.
172 // This allows us to force synchronization with SignalInterrupt.
173 LockThreadParameters(thread_params);
174 }
175
176 // Assign current members.
177 guest_ctx.parent = this;
178 thread_params->native_context = &guest_ctx;
179 thread_params->tpidr_el0 = guest_ctx.tpidr_el0;
180 thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0;
181 thread_params->is_running = true;
182
183 HaltReason halt{};
184
185 // TODO: finding and creating the post handler needs to be locked
186 // to deal with dynamic loading of NROs.
187 const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers();
188 if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) {
189 halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second);
190 } else {
191 halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params);
192 }
193
194 // Unload members.
195 // The thread does not change, so we can persist the old reference.
196 guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
197 thread_params->native_context = nullptr;
198 thread_params->is_running = false;
199
200 // Unlock the thread parameters.
201 UnlockThreadParameters(thread_params);
202
203 {
204 // Lock the core context.
205 std::scoped_lock lk{lock};
206
207 // On exit, we no longer have an active thread.
208 running_thread = nullptr;
209 }
210
211 // Return the halt reason.
212 return halt;
213}
214
215HaltReason ARM_NCE::StepJit() {
216 return HaltReason::StepThread;
217}
218
219u32 ARM_NCE::GetSvcNumber() const {
220 return guest_ctx.svc_swi;
221}
222
223ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_)
224 : ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} {
225 guest_ctx.system = &system_;
226}
227
228ARM_NCE::~ARM_NCE() = default;
229
230void ARM_NCE::Initialize() {
231 thread_id = gettid();
232
233 // Setup our signals
234 static std::once_flag flag;
235 std::call_once(flag, [] {
236 using HandlerType = decltype(sigaction::sa_sigaction);
237
238 sigset_t signal_mask;
239 sigemptyset(&signal_mask);
240 sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal);
241 sigaddset(&signal_mask, BreakFromRunCodeSignal);
242 sigaddset(&signal_mask, GuestFaultSignal);
243
244 struct sigaction return_to_run_code_action {};
245 return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
246 return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>(
247 &ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
248 return_to_run_code_action.sa_mask = signal_mask;
249 Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
250 nullptr);
251
252 struct sigaction break_from_run_code_action {};
253 break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
254 break_from_run_code_action.sa_sigaction =
255 reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler);
256 break_from_run_code_action.sa_mask = signal_mask;
257 Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr);
258
259 struct sigaction fault_action {};
260 fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
261 fault_action.sa_sigaction =
262 reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler);
263 fault_action.sa_mask = signal_mask;
264 Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action);
265
266 // Simplify call for g_orig_action.
267 // These fields occupy the same space in memory, so this should be a no-op in practice.
268 if (!(g_orig_action.sa_flags & SA_SIGINFO)) {
269 g_orig_action.sa_sigaction =
270 reinterpret_cast<decltype(g_orig_action.sa_sigaction)>(g_orig_action.sa_handler);
271 }
272 });
273}
274
275void ARM_NCE::SetPC(u64 pc) {
276 guest_ctx.pc = pc;
277}
278
279u64 ARM_NCE::GetPC() const {
280 return guest_ctx.pc;
281}
282
283u64 ARM_NCE::GetSP() const {
284 return guest_ctx.sp;
285}
286
287u64 ARM_NCE::GetReg(int index) const {
288 return guest_ctx.cpu_registers[index];
289}
290
291void ARM_NCE::SetReg(int index, u64 value) {
292 guest_ctx.cpu_registers[index] = value;
293}
294
295u128 ARM_NCE::GetVectorReg(int index) const {
296 return guest_ctx.vector_registers[index];
297}
298
299void ARM_NCE::SetVectorReg(int index, u128 value) {
300 guest_ctx.vector_registers[index] = value;
301}
302
303u32 ARM_NCE::GetPSTATE() const {
304 return guest_ctx.pstate;
305}
306
307void ARM_NCE::SetPSTATE(u32 pstate) {
308 guest_ctx.pstate = pstate;
309}
310
311u64 ARM_NCE::GetTlsAddress() const {
312 return guest_ctx.tpidrro_el0;
313}
314
315void ARM_NCE::SetTlsAddress(u64 address) {
316 guest_ctx.tpidrro_el0 = address;
317}
318
319u64 ARM_NCE::GetTPIDR_EL0() const {
320 return guest_ctx.tpidr_el0;
321}
322
323void ARM_NCE::SetTPIDR_EL0(u64 value) {
324 guest_ctx.tpidr_el0 = value;
325}
326
327void ARM_NCE::SaveContext(ThreadContext64& ctx) const {
328 ctx.cpu_registers = guest_ctx.cpu_registers;
329 ctx.sp = guest_ctx.sp;
330 ctx.pc = guest_ctx.pc;
331 ctx.pstate = guest_ctx.pstate;
332 ctx.vector_registers = guest_ctx.vector_registers;
333 ctx.fpcr = guest_ctx.fpcr;
334 ctx.fpsr = guest_ctx.fpsr;
335 ctx.tpidr = guest_ctx.tpidr_el0;
336}
337
338void ARM_NCE::LoadContext(const ThreadContext64& ctx) {
339 guest_ctx.cpu_registers = ctx.cpu_registers;
340 guest_ctx.sp = ctx.sp;
341 guest_ctx.pc = ctx.pc;
342 guest_ctx.pstate = ctx.pstate;
343 guest_ctx.vector_registers = ctx.vector_registers;
344 guest_ctx.fpcr = ctx.fpcr;
345 guest_ctx.fpsr = ctx.fpsr;
346 guest_ctx.tpidr_el0 = ctx.tpidr;
347}
348
349void ARM_NCE::SignalInterrupt() {
350 // Lock core context.
351 std::scoped_lock lk{lock};
352
353 // Add break loop condition.
354 guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
355
356 // If there is no thread running, we are done.
357 if (running_thread == nullptr) {
358 return;
359 }
360
361 // Lock the thread context.
362 auto* params = &running_thread->GetNativeExecutionParameters();
363 LockThreadParameters(params);
364
365 if (params->is_running) {
366 // We should signal to the running thread.
367 // The running thread will unlock the thread context.
368 syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal);
369 } else {
370 // If the thread is no longer running, we have nothing to do.
371 UnlockThreadParameters(params);
372 }
373}
374
375void ARM_NCE::ClearInterrupt() {
376 guest_ctx.esr_el1 = {};
377}
378
379void ARM_NCE::ClearInstructionCache() {
380 // TODO: This is not possible to implement correctly on Linux because
381 // we do not have any access to ic iallu.
382
383 // Require accesses to complete.
384 std::atomic_thread_fence(std::memory_order_seq_cst);
385}
386
387void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) {
388 this->ClearInstructionCache();
389}
390
391void ARM_NCE::ClearExclusiveState() {
392 // No-op.
393}
394
395void ARM_NCE::PageTableChanged(Common::PageTable& page_table,
396 std::size_t new_address_space_size_in_bits) {
397 // No-op. Page table is never used.
398}
399
400} // namespace Core
diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h
new file mode 100644
index 000000000..5fbd6dbf3
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.h
@@ -0,0 +1,108 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <atomic>
7#include <memory>
8#include <span>
9#include <unordered_map>
10#include <vector>
11
12#include "core/arm/arm_interface.h"
13#include "core/arm/nce/guest_context.h"
14
15namespace Core::Memory {
16class Memory;
17}
18
19namespace Core {
20
21class System;
22
23class ARM_NCE final : public ARM_Interface {
24public:
25 ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_);
26
27 ~ARM_NCE() override;
28
29 void Initialize() override;
30 void SetPC(u64 pc) override;
31 u64 GetPC() const override;
32 u64 GetSP() const override;
33 u64 GetReg(int index) const override;
34 void SetReg(int index, u64 value) override;
35 u128 GetVectorReg(int index) const override;
36 void SetVectorReg(int index, u128 value) override;
37
38 u32 GetPSTATE() const override;
39 void SetPSTATE(u32 pstate) override;
40 u64 GetTlsAddress() const override;
41 void SetTlsAddress(u64 address) override;
42 void SetTPIDR_EL0(u64 value) override;
43 u64 GetTPIDR_EL0() const override;
44
45 Architecture GetArchitecture() const override {
46 return Architecture::Aarch64;
47 }
48
49 void SaveContext(ThreadContext32& ctx) const override {}
50 void SaveContext(ThreadContext64& ctx) const override;
51 void LoadContext(const ThreadContext32& ctx) override {}
52 void LoadContext(const ThreadContext64& ctx) override;
53
54 void SignalInterrupt() override;
55 void ClearInterrupt() override;
56 void ClearExclusiveState() override;
57 void ClearInstructionCache() override;
58 void InvalidateCacheRange(u64 addr, std::size_t size) override;
59 void PageTableChanged(Common::PageTable& new_page_table,
60 std::size_t new_address_space_size_in_bits) override;
61
62protected:
63 HaltReason RunJit() override;
64 HaltReason StepJit() override;
65
66 u32 GetSvcNumber() const override;
67
68 const Kernel::DebugWatchpoint* HaltedWatchpoint() const override {
69 return nullptr;
70 }
71
72 void RewindBreakpointInstruction() override {}
73
74private:
75 // Assembly definitions.
76 static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
77 u64 trampoline_addr);
78 static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr);
79
80 static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info,
81 void* raw_context);
82 static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context);
83 static void GuestFaultSignalHandler(int sig, void* info, void* raw_context);
84
85 static void LockThreadParameters(void* tpidr);
86 static void UnlockThreadParameters(void* tpidr);
87
88private:
89 // C++ implementation functions for assembly definitions.
90 static void* RestoreGuestContext(void* raw_context);
91 static void SaveGuestContext(GuestContext* ctx, void* raw_context);
92 static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context);
93 static void HandleHostFault(int sig, void* info, void* raw_context);
94
95public:
96 // Members set on initialization.
97 std::size_t core_index{};
98 pid_t thread_id{-1};
99
100 // Core context.
101 GuestContext guest_ctx;
102
103 // Thread and invalidation info.
104 std::mutex lock;
105 Kernel::KThread* running_thread{};
106};
107
108} // namespace Core
diff --git a/src/core/arm/nce/arm_nce.s b/src/core/arm/nce/arm_nce.s
new file mode 100644
index 000000000..b98e09f31
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.s
@@ -0,0 +1,222 @@
1/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
2/* SPDX-License-Identifier: GPL-2.0-or-later */
3
4#include "core/arm/nce/arm_nce_asm_definitions.h"
5
6#define LOAD_IMMEDIATE_32(reg, val) \
7 mov reg, #(((val) >> 0x00) & 0xFFFF); \
8 movk reg, #(((val) >> 0x10) & 0xFFFF), lsl #16
9
10
11/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
12.section .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
13.global _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
14.type _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
15_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
16 /* Back up host sp to x3. */
17 /* Back up host tpidr_el0 to x4. */
18 mov x3, sp
19 mrs x4, tpidr_el0
20
21 /* Load guest sp. x5 is used as a scratch register. */
22 ldr x5, [x1, #(GuestContextSp)]
23 mov sp, x5
24
25 /* Offset GuestContext pointer to the host member. */
26 add x5, x1, #(GuestContextHostContext)
27
28 /* Save original host sp and tpidr_el0 (x3, x4) to host context. */
29 stp x3, x4, [x5, #(HostContextSpTpidrEl0)]
30
31 /* Save all callee-saved host GPRs. */
32 stp x19, x20, [x5, #(HostContextRegs+0x0)]
33 stp x21, x22, [x5, #(HostContextRegs+0x10)]
34 stp x23, x24, [x5, #(HostContextRegs+0x20)]
35 stp x25, x26, [x5, #(HostContextRegs+0x30)]
36 stp x27, x28, [x5, #(HostContextRegs+0x40)]
37 stp x29, x30, [x5, #(HostContextRegs+0x50)]
38
39 /* Save all callee-saved host FPRs. */
40 stp q8, q9, [x5, #(HostContextVregs+0x0)]
41 stp q10, q11, [x5, #(HostContextVregs+0x20)]
42 stp q12, q13, [x5, #(HostContextVregs+0x40)]
43 stp q14, q15, [x5, #(HostContextVregs+0x60)]
44
45 /* Load guest tpidr_el0 from argument. */
46 msr tpidr_el0, x0
47
48 /* Tail call the trampoline to restore guest state. */
49 br x2
50
51
52/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
53.section .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
54.global _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv
55.type _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
56_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
57 /* This jumps to the signal handler, which will restore the entire context. */
58 /* On entry, x0 = thread id, which is already in the right place. */
59
60 /* Move tpidr to x9 so it is not trampled. */
61 mov x9, x1
62
63 /* Set up arguments. */
64 mov x8, #(__NR_tkill)
65 mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
66
67 /* Tail call the signal handler. */
68 svc #0
69
70 /* Block execution from flowing here. */
71 brk #1000
72
73
74/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
75.section .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
76.global _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
77.type _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
78_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
79 stp x29, x30, [sp, #-0x10]!
80 mov x29, sp
81
82 /* Call the context restorer with the raw context. */
83 mov x0, x2
84 bl _ZN4Core7ARM_NCE19RestoreGuestContextEPv
85
86 /* Save the old value of tpidr_el0. */
87 mrs x8, tpidr_el0
88 ldr x9, [x0, #(TpidrEl0NativeContext)]
89 str x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)]
90
91 /* Set our new tpidr_el0. */
92 msr tpidr_el0, x0
93
94 /* Unlock the context. */
95 bl _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
96
97 /* Returning from here will enter the guest. */
98 ldp x29, x30, [sp], #0x10
99 ret
100
101
102/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
103.section .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
104.global _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_
105.type _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function
106_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
107 /* Check to see if we have the correct TLS magic. */
108 mrs x8, tpidr_el0
109 ldr w9, [x8, #(TpidrEl0TlsMagic)]
110
111 LOAD_IMMEDIATE_32(w10, TlsMagic)
112
113 cmp w9, w10
114 b.ne 1f
115
116 /* Correct TLS magic, so this is a guest interrupt. */
117 /* Restore host tpidr_el0. */
118 ldr x0, [x8, #(TpidrEl0NativeContext)]
119 ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
120 msr tpidr_el0, x3
121
122 /* Tail call the restorer. */
123 mov x1, x2
124 b _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv
125
126 /* Returning from here will enter host code. */
127
1281:
129 /* Incorrect TLS magic, so this is a spurious signal. */
130 ret
131
132
133/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
134.section .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
135.global _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_
136.type _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function
137_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
138 /* Check to see if we have the correct TLS magic. */
139 mrs x8, tpidr_el0
140 ldr w9, [x8, #(TpidrEl0TlsMagic)]
141
142 LOAD_IMMEDIATE_32(w10, TlsMagic)
143
144 cmp w9, w10
145 b.eq 1f
146
147 /* Incorrect TLS magic, so this is a host fault. */
148 /* Tail call the handler. */
149 b _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_
150
1511:
152 /* Correct TLS magic, so this is a guest fault. */
153 stp x29, x30, [sp, #-0x20]!
154 str x19, [sp, #0x10]
155 mov x29, sp
156
157 /* Save the old tpidr_el0. */
158 mov x19, x8
159
160 /* Restore host tpidr_el0. */
161 ldr x0, [x8, #(TpidrEl0NativeContext)]
162 ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
163 msr tpidr_el0, x3
164
165 /* Call the handler. */
166 bl _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_
167
168 /* If the handler returned false, we want to preserve the host tpidr_el0. */
169 cbz x0, 2f
170
171 /* Otherwise, restore guest tpidr_el0. */
172 msr tpidr_el0, x19
173
1742:
175 ldr x19, [sp, #0x10]
176 ldp x29, x30, [sp], #0x20
177 ret
178
179
180/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */
181.section .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits
182.global _ZN4Core7ARM_NCE20LockThreadParametersEPv
183.type _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function
184_ZN4Core7ARM_NCE20LockThreadParametersEPv:
185 /* Offset to lock member. */
186 add x0, x0, #(TpidrEl0Lock)
187
1881:
189 /* Clear the monitor. */
190 clrex
191
1922:
193 /* Load-linked with acquire ordering. */
194 ldaxr w1, [x0]
195
196 /* If the value was SpinLockLocked, clear monitor and retry. */
197 cbz w1, 1b
198
199 /* Store-conditional SpinLockLocked with relaxed ordering. */
200 stxr w1, wzr, [x0]
201
202 /* If we failed to store, retry. */
203 cbnz w1, 2b
204
205 ret
206
207
208/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */
209.section .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits
210.global _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
211.type _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function
212_ZN4Core7ARM_NCE22UnlockThreadParametersEPv:
213 /* Offset to lock member. */
214 add x0, x0, #(TpidrEl0Lock)
215
216 /* Load SpinLockUnlocked. */
217 mov w1, #(SpinLockUnlocked)
218
219 /* Store value with release ordering. */
220 stlr w1, [x0]
221
222 ret
diff --git a/src/core/arm/nce/arm_nce_asm_definitions.h b/src/core/arm/nce/arm_nce_asm_definitions.h
new file mode 100644
index 000000000..8a9b285b5
--- /dev/null
+++ b/src/core/arm/nce/arm_nce_asm_definitions.h
@@ -0,0 +1,29 @@
1/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
2/* SPDX-License-Identifier: GPL-2.0-or-later */
3
4#pragma once
5
6#define __ASSEMBLY__
7
8#include <asm-generic/signal.h>
9#include <asm-generic/unistd.h>
10
11#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
12#define BreakFromRunCodeSignal SIGURG
13#define GuestFaultSignal SIGSEGV
14
15#define GuestContextSp 0xF8
16#define GuestContextHostContext 0x320
17
18#define HostContextSpTpidrEl0 0xE0
19#define HostContextTpidrEl0 0xE8
20#define HostContextRegs 0x0
21#define HostContextVregs 0x60
22
23#define TpidrEl0NativeContext 0x10
24#define TpidrEl0Lock 0x18
25#define TpidrEl0TlsMagic 0x20
26#define TlsMagic 0x555a5559
27
28#define SpinLockLocked 0
29#define SpinLockUnlocked 1
diff --git a/src/core/arm/nce/guest_context.h b/src/core/arm/nce/guest_context.h
new file mode 100644
index 000000000..0767a0337
--- /dev/null
+++ b/src/core/arm/nce/guest_context.h
@@ -0,0 +1,50 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8#include "core/arm/arm_interface.h"
9#include "core/arm/nce/arm_nce_asm_definitions.h"
10
11namespace Core {
12
13class ARM_NCE;
14class System;
15
16struct HostContext {
17 alignas(16) std::array<u64, 12> host_saved_regs{};
18 alignas(16) std::array<u128, 8> host_saved_vregs{};
19 u64 host_sp{};
20 void* host_tpidr_el0{};
21};
22
23struct GuestContext {
24 std::array<u64, 31> cpu_registers{};
25 u64 sp{};
26 u64 pc{};
27 u32 fpcr{};
28 u32 fpsr{};
29 std::array<u128, 32> vector_registers{};
30 u32 pstate{};
31 alignas(16) HostContext host_ctx{};
32 u64 tpidrro_el0{};
33 u64 tpidr_el0{};
34 std::atomic<u64> esr_el1{};
35 u32 nzcv{};
36 u32 svc_swi{};
37 System* system{};
38 ARM_NCE* parent{};
39};
40
41// Verify assembly offsets.
42static_assert(offsetof(GuestContext, sp) == GuestContextSp);
43static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext);
44static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0);
45static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0);
46static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0);
47static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs);
48static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs);
49
50} // namespace Core
diff --git a/src/core/arm/nce/instructions.h b/src/core/arm/nce/instructions.h
new file mode 100644
index 000000000..5b56ff857
--- /dev/null
+++ b/src/core/arm/nce/instructions.h
@@ -0,0 +1,147 @@
1// SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors
2// SPDX-License-Identifier: MPL-2.0
3
4#include "common/bit_field.h"
5#include "common/common_types.h"
6
7namespace Core::NCE {
8
9enum SystemRegister : u32 {
10 TpidrEl0 = 0x5E82,
11 TpidrroEl0 = 0x5E83,
12 CntfrqEl0 = 0x5F00,
13 CntpctEl0 = 0x5F01,
14};
15
16// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call-
17union SVC {
18 constexpr explicit SVC(u32 raw_) : raw{raw_} {}
19
20 constexpr bool Verify() {
21 return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0);
22 }
23
24 constexpr u32 GetSig0() {
25 return decltype(sig0)::ExtractValue(raw);
26 }
27
28 constexpr u32 GetValue() {
29 return decltype(value)::ExtractValue(raw);
30 }
31
32 constexpr u32 GetSig1() {
33 return decltype(sig1)::ExtractValue(raw);
34 }
35
36 u32 raw;
37
38private:
39 BitField<0, 5, u32> sig0; // 0x1
40 BitField<5, 16, u32> value; // 16-bit immediate
41 BitField<21, 11, u32> sig1; // 0x6A0
42};
43static_assert(sizeof(SVC) == sizeof(u32));
44static_assert(SVC(0xD40000C1).Verify());
45static_assert(SVC(0xD40000C1).GetValue() == 0x6);
46
47// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register-
48union MRS {
49 constexpr explicit MRS(u32 raw_) : raw{raw_} {}
50
51 constexpr bool Verify() {
52 return (this->GetSig() == 0xD53);
53 }
54
55 constexpr u32 GetRt() {
56 return decltype(rt)::ExtractValue(raw);
57 }
58
59 constexpr u32 GetSystemReg() {
60 return decltype(system_reg)::ExtractValue(raw);
61 }
62
63 constexpr u32 GetSig() {
64 return decltype(sig)::ExtractValue(raw);
65 }
66
67 u32 raw;
68
69private:
70 BitField<0, 5, u32> rt; // destination register
71 BitField<5, 15, u32> system_reg; // source system register
72 BitField<20, 12, u32> sig; // 0xD53
73};
74static_assert(sizeof(MRS) == sizeof(u32));
75static_assert(MRS(0xD53BE020).Verify());
76static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0);
77static_assert(MRS(0xD53BE020).GetRt() == 0x0);
78
79// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-
80union MSR {
81 constexpr explicit MSR(u32 raw_) : raw{raw_} {}
82
83 constexpr bool Verify() {
84 return this->GetSig() == 0xD51;
85 }
86
87 constexpr u32 GetRt() {
88 return decltype(rt)::ExtractValue(raw);
89 }
90
91 constexpr u32 GetSystemReg() {
92 return decltype(system_reg)::ExtractValue(raw);
93 }
94
95 constexpr u32 GetSig() {
96 return decltype(sig)::ExtractValue(raw);
97 }
98
99 u32 raw;
100
101private:
102 BitField<0, 5, u32> rt; // source register
103 BitField<5, 15, u32> system_reg; // destination system register
104 BitField<20, 12, u32> sig; // 0xD51
105};
106static_assert(sizeof(MSR) == sizeof(u32));
107static_assert(MSR(0xD51BD040).Verify());
108static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0);
109static_assert(MSR(0xD51BD040).GetRt() == 0x0);
110
111// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register-
112// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-
113// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register-
114// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers-
115union Exclusive {
116 constexpr explicit Exclusive(u32 raw_) : raw{raw_} {}
117
118 constexpr bool Verify() {
119 return this->GetSig() == 0x10;
120 }
121
122 constexpr u32 GetSig() {
123 return decltype(sig)::ExtractValue(raw);
124 }
125
126 constexpr u32 AsOrdered() {
127 return raw | decltype(o0)::FormatValue(1);
128 }
129
130 u32 raw;
131
132private:
133 BitField<0, 5, u32> rt; // memory operand
134 BitField<5, 5, u32> rn; // register operand 1
135 BitField<10, 5, u32> rt2; // register operand 2
136 BitField<15, 1, u32> o0; // ordered
137 BitField<16, 5, u32> rs; // status register
138 BitField<21, 2, u32> l; // operation type
139 BitField<23, 7, u32> sig; // 0x10
140 BitField<30, 2, u32> size; // size
141};
142static_assert(Exclusive(0xC85FFC00).Verify());
143static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00);
144static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00);
145static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440);
146
147} // namespace Core::NCE
diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp
new file mode 100644
index 000000000..ec8527224
--- /dev/null
+++ b/src/core/arm/nce/patcher.cpp
@@ -0,0 +1,474 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/arm64/native_clock.h"
5#include "common/bit_cast.h"
6#include "common/literals.h"
7#include "core/arm/nce/arm_nce.h"
8#include "core/arm/nce/guest_context.h"
9#include "core/arm/nce/instructions.h"
10#include "core/arm/nce/patcher.h"
11#include "core/core.h"
12#include "core/core_timing.h"
13#include "core/hle/kernel/svc.h"
14
15namespace Core::NCE {
16
17using namespace Common::Literals;
18using namespace oaknut::util;
19
20using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
21
22constexpr size_t MaxRelativeBranch = 128_MiB;
23constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
24
25Patcher::Patcher() : c(m_patch_instructions) {}
26
27Patcher::~Patcher() = default;
28
29void Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
30 const Kernel::CodeSet::Segment& code) {
31
32 // Write save context helper function.
33 c.l(m_save_context);
34 WriteSaveContext();
35
36 // Write load context helper function.
37 c.l(m_load_context);
38 WriteLoadContext();
39
40 // Retrieve text segment data.
41 const auto text = std::span{program_image}.subspan(code.offset, code.size);
42 const auto text_words =
43 std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)};
44
45 // Loop through instructions, patching as needed.
46 for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
47 const u32 inst = text_words[i];
48
49 const auto AddRelocations = [&] {
50 const uintptr_t this_offset = i * sizeof(u32);
51 const uintptr_t next_offset = this_offset + sizeof(u32);
52
53 // Relocate from here to patch.
54 this->BranchToPatch(this_offset);
55
56 // Relocate from patch to next instruction.
57 return next_offset;
58 };
59
60 // SVC
61 if (auto svc = SVC{inst}; svc.Verify()) {
62 WriteSvcTrampoline(AddRelocations(), svc.GetValue());
63 continue;
64 }
65
66 // MRS Xn, TPIDR_EL0
67 // MRS Xn, TPIDRRO_EL0
68 if (auto mrs = MRS{inst};
69 mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) {
70 const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
71 : oaknut::SystemReg::TPIDR_EL0;
72 const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
73 WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
74 continue;
75 }
76
77 // MRS Xn, CNTPCT_EL0
78 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
79 WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
80 continue;
81 }
82
83 // MRS Xn, CNTFRQ_EL0
84 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) {
85 UNREACHABLE();
86 }
87
88 // MSR TPIDR_EL0, Xn
89 if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
90 WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
91 continue;
92 }
93
94 if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
95 m_exclusives.push_back(i);
96 }
97 }
98
99 // Determine patching mode for the final relocation step
100 const size_t image_size = program_image.size();
101 this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
102}
103
104void Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
105 const Kernel::CodeSet::Segment& code,
106 Kernel::PhysicalMemory& program_image,
107 EntryTrampolines* out_trampolines) {
108 const size_t patch_size = GetSectionSize();
109 const size_t image_size = program_image.size();
110
111 // Retrieve text segment data.
112 const auto text = std::span{program_image}.subspan(code.offset, code.size);
113 const auto text_words =
114 std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)};
115
116 const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) {
117 oaknut::CodeGenerator rc{target};
118 if (mode == PatchMode::PreText) {
119 rc.B(rel.patch_offset - patch_size - rel.module_offset);
120 } else {
121 rc.B(image_size - rel.module_offset + rel.patch_offset);
122 }
123 };
124
125 const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
126 oaknut::CodeGenerator rc{target};
127 if (mode == PatchMode::PreText) {
128 rc.B(patch_size - rel.patch_offset + rel.module_offset);
129 } else {
130 rc.B(rel.module_offset - image_size - rel.patch_offset);
131 }
132 };
133
134 const auto RebasePatch = [&](ptrdiff_t patch_offset) {
135 if (mode == PatchMode::PreText) {
136 return GetInteger(load_base) + patch_offset;
137 } else {
138 return GetInteger(load_base) + image_size + patch_offset;
139 }
140 };
141
142 const auto RebasePc = [&](uintptr_t module_offset) {
143 if (mode == PatchMode::PreText) {
144 return GetInteger(load_base) + patch_size + module_offset;
145 } else {
146 return GetInteger(load_base) + module_offset;
147 }
148 };
149
150 // We are now ready to relocate!
151 for (const Relocation& rel : m_branch_to_patch_relocations) {
152 ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
153 }
154 for (const Relocation& rel : m_branch_to_module_relocations) {
155 ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
156 rel);
157 }
158
159 // Rewrite PC constants and record post trampolines
160 for (const Relocation& rel : m_write_module_pc_relocations) {
161 oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
162 rc.dx(RebasePc(rel.module_offset));
163 }
164 for (const Trampoline& rel : m_trampolines) {
165 out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
166 }
167
168 // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
169 // Convert to ordered to preserve this assumption.
170 for (const ModuleTextAddress i : m_exclusives) {
171 auto exclusive = Exclusive{text_words[i]};
172 text_words[i] = exclusive.AsOrdered();
173 }
174
175 // Copy to program image
176 if (this->mode == PatchMode::PreText) {
177 std::memcpy(program_image.data(), m_patch_instructions.data(),
178 m_patch_instructions.size() * sizeof(u32));
179 } else {
180 program_image.resize(image_size + patch_size);
181 std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
182 m_patch_instructions.size() * sizeof(u32));
183 }
184}
185
186size_t Patcher::GetSectionSize() const noexcept {
187 return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
188}
189
190void Patcher::WriteLoadContext() {
191 // This function was called, which modifies X30, so use that as a scratch register.
192 // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
193 // of stack.
194 c.STR(X30, SP, 8);
195 c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
196 c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
197
198 // Load system registers.
199 c.LDR(W0, X30, offsetof(GuestContext, fpsr));
200 c.MSR(oaknut::SystemReg::FPSR, X0);
201 c.LDR(W0, X30, offsetof(GuestContext, fpcr));
202 c.MSR(oaknut::SystemReg::FPCR, X0);
203 c.LDR(W0, X30, offsetof(GuestContext, nzcv));
204 c.MSR(oaknut::SystemReg::NZCV, X0);
205
206 // Load all vector registers.
207 static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
208 for (int i = 0; i <= 30; i += 2) {
209 c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
210 }
211
212 // Load all general-purpose registers except X30.
213 for (int i = 0; i <= 28; i += 2) {
214 c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
215 }
216
217 // Reload our return X30 from the stack and return.
218 // The patch code will reload the guest X30 for us.
219 c.LDR(X30, SP, 8);
220 c.RET();
221}
222
223void Patcher::WriteSaveContext() {
224 // This function was called, which modifies X30, so use that as a scratch register.
225 // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
226 // stack.
227 c.STR(X30, SP, 8);
228 c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
229 c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
230
231 // Store all general-purpose registers except X30.
232 for (int i = 0; i <= 28; i += 2) {
233 c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
234 }
235
236 // Store all vector registers.
237 static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
238 for (int i = 0; i <= 30; i += 2) {
239 c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
240 }
241
242 // Store guest system registers, X30 and SP, using X0 as a scratch register.
243 c.STR(X0, SP, PRE_INDEXED, -16);
244 c.LDR(X0, SP, 16);
245 c.STR(X0, X30, 8 * 30);
246 c.ADD(X0, SP, 32);
247 c.STR(X0, X30, offsetof(GuestContext, sp));
248 c.MRS(X0, oaknut::SystemReg::FPSR);
249 c.STR(W0, X30, offsetof(GuestContext, fpsr));
250 c.MRS(X0, oaknut::SystemReg::FPCR);
251 c.STR(W0, X30, offsetof(GuestContext, fpcr));
252 c.MRS(X0, oaknut::SystemReg::NZCV);
253 c.STR(W0, X30, offsetof(GuestContext, nzcv));
254 c.LDR(X0, SP, POST_INDEXED, 16);
255
256 // Reload our return X30 from the stack, and return.
257 c.LDR(X30, SP, 8);
258 c.RET();
259}
260
261void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
262 // We are about to start saving state, so we need to lock the context.
263 this->LockContext();
264
265 // Store guest X30 to the stack. Then, save the context and restore the stack.
266 // This will save all registers except PC, but we know PC at patch time.
267 c.STR(X30, SP, PRE_INDEXED, -16);
268 c.BL(m_save_context);
269 c.LDR(X30, SP, POST_INDEXED, 16);
270
271 // Now that we've saved all registers, we can use any registers as scratch.
272 // Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
273 oaknut::Label pc_after_svc;
274 c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
275 c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
276 c.LDR(X2, pc_after_svc);
277 c.STR(X2, X1, offsetof(GuestContext, pc));
278
279 // Store SVC number to execute when we return
280 c.MOV(X2, svc_id);
281 c.STR(W2, X1, offsetof(GuestContext, svc_swi));
282
283 // We are calling a SVC. Clear esr_el1 and return it.
284 static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
285 oaknut::Label retry;
286 c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
287 c.l(retry);
288 c.LDAXR(X0, X2);
289 c.STLXR(W3, XZR, X2);
290 c.CBNZ(W3, retry);
291
292 // Add "calling SVC" flag. Since this is X0, this is now our return value.
293 c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
294
295 // Offset the GuestContext pointer to the HostContext member.
296 // STP has limited range of [-512, 504] which we can't reach otherwise
297 // NB: Due to this all offsets below are from the start of HostContext.
298 c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
299
300 // Reload host TPIDR_EL0 and SP.
301 static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
302 c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
303 c.MOV(SP, X2);
304 c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
305
306 // Load callee-saved host registers and return to host.
307 static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
308 static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
309 c.LDP(X19, X20, X1, HOST_REGS_OFF);
310 c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
311 c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
312 c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
313 c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
314 c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
315 c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
316 c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
317 c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
318 c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
319 c.RET();
320
321 // Write the post-SVC trampoline address, which will jump back to the guest after restoring its
322 // state.
323 m_trampolines.push_back({c.offset(), module_dest});
324
325 // Host called this location. Save the return address so we can
326 // unwind the stack properly when jumping back.
327 c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
328 c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
329 c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
330 c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
331
332 // Reload all guest registers except X30 and PC.
333 // The function also expects 16 bytes of stack already allocated.
334 c.STR(X30, SP, PRE_INDEXED, -16);
335 c.BL(m_load_context);
336 c.LDR(X30, SP, POST_INDEXED, 16);
337
338 // Use X1 as a scratch register to restore X30.
339 c.STR(X1, SP, PRE_INDEXED, -16);
340 c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
341 c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
342 c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
343 c.LDR(X1, SP, POST_INDEXED, 16);
344
345 // Unlock the context.
346 this->UnlockContext();
347
348 // Jump back to the instruction after the emulated SVC.
349 this->BranchToModule(module_dest);
350
351 // Store PC after call.
352 c.l(pc_after_svc);
353 this->WriteModulePc(module_dest);
354}
355
356void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
357 oaknut::SystemReg src_reg) {
358 // Retrieve emulated TLS register from GuestContext.
359 c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
360 if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
361 c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
362 } else {
363 c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
364 }
365
366 // Jump back to the instruction after the emulated MRS.
367 this->BranchToModule(module_dest);
368}
369
370void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
371 const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
372 c.STR(scratch_reg, SP, PRE_INDEXED, -16);
373
374 // Save guest value to NativeExecutionParameters::tpidr_el0.
375 c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
376 c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
377
378 // Restore scratch register.
379 c.LDR(scratch_reg, SP, POST_INDEXED, 16);
380
381 // Jump back to the instruction after the emulated MSR.
382 this->BranchToModule(module_dest);
383}
384
385void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
386 static Common::Arm64::NativeClock clock{};
387 const auto factor = clock.GetGuestCNTFRQFactor();
388 const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor);
389
390 const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1;
391 oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0;
392 oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1;
393
394 oaknut::Label factorlo;
395 oaknut::Label factorhi;
396
397 // Save scratches.
398 c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
399
400 // Load counter value.
401 c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
402
403 // Load scaling factor.
404 c.LDR(scratch0, factorlo);
405 c.LDR(scratch1, factorhi);
406
407 // Multiply low bits and get result.
408 c.UMULH(scratch0, dest_reg, scratch0);
409
410 // Multiply high bits and add low bit result.
411 c.MADD(dest_reg, dest_reg, scratch1, scratch0);
412
413 // Reload scratches.
414 c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
415
416 // Jump back to the instruction after the emulated MRS.
417 this->BranchToModule(module_dest);
418
419 // Scaling factor constant values.
420 c.l(factorlo);
421 c.dx(raw_factor[0]);
422 c.l(factorhi);
423 c.dx(raw_factor[1]);
424}
425
426void Patcher::LockContext() {
427 oaknut::Label retry;
428
429 // Save scratches.
430 c.STP(X0, X1, SP, PRE_INDEXED, -16);
431
432 // Reload lock pointer.
433 c.l(retry);
434 c.CLREX();
435 c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
436 c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
437
438 static_assert(SpinLockLocked == 0);
439
440 // Load-linked with acquire ordering.
441 c.LDAXR(W1, X0);
442
443 // If the value was SpinLockLocked, clear monitor and retry.
444 c.CBZ(W1, retry);
445
446 // Store-conditional SpinLockLocked with relaxed ordering.
447 c.STXR(W1, WZR, X0);
448
449 // If we failed to store, retry.
450 c.CBNZ(W1, retry);
451
452 // We succeeded! Reload scratches.
453 c.LDP(X0, X1, SP, POST_INDEXED, 16);
454}
455
456void Patcher::UnlockContext() {
457 // Save scratches.
458 c.STP(X0, X1, SP, PRE_INDEXED, -16);
459
460 // Load lock pointer.
461 c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
462 c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
463
464 // Load SpinLockUnlocked.
465 c.MOV(W1, SpinLockUnlocked);
466
467 // Store value with release ordering.
468 c.STLR(W1, X0);
469
470 // Load scratches.
471 c.LDP(X0, X1, SP, POST_INDEXED, 16);
472}
473
474} // namespace Core::NCE
diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h
new file mode 100644
index 000000000..c6d1608c1
--- /dev/null
+++ b/src/core/arm/nce/patcher.h
@@ -0,0 +1,98 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <span>
7#include <unordered_map>
8#include <vector>
9#include <oaknut/code_block.hpp>
10#include <oaknut/oaknut.hpp>
11
12#include "common/common_types.h"
13#include "core/hle/kernel/code_set.h"
14#include "core/hle/kernel/k_typed_address.h"
15#include "core/hle/kernel/physical_memory.h"
16
17namespace Core::NCE {
18
19enum class PatchMode : u32 {
20 None,
21 PreText, ///< Patch section is inserted before .text
22 PostData, ///< Patch section is inserted after .data
23};
24
25using ModuleTextAddress = u64;
26using PatchTextAddress = u64;
27using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>;
28
29class Patcher {
30public:
31 explicit Patcher();
32 ~Patcher();
33
34 void PatchText(const Kernel::PhysicalMemory& program_image,
35 const Kernel::CodeSet::Segment& code);
36 void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
37 Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
38 size_t GetSectionSize() const noexcept;
39
40 [[nodiscard]] PatchMode GetPatchMode() const noexcept {
41 return mode;
42 }
43
44private:
45 using ModuleDestLabel = uintptr_t;
46
47 struct Trampoline {
48 ptrdiff_t patch_offset;
49 uintptr_t module_offset;
50 };
51
52 void WriteLoadContext();
53 void WriteSaveContext();
54 void LockContext();
55 void UnlockContext();
56 void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
57 void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
58 oaknut::SystemReg src_reg);
59 void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
60 void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
61
62private:
63 void BranchToPatch(uintptr_t module_dest) {
64 m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
65 }
66
67 void BranchToModule(uintptr_t module_dest) {
68 m_branch_to_module_relocations.push_back({c.offset(), module_dest});
69 c.dw(0);
70 }
71
72 void WriteModulePc(uintptr_t module_dest) {
73 m_write_module_pc_relocations.push_back({c.offset(), module_dest});
74 c.dx(0);
75 }
76
77private:
78 // List of patch instructions we have generated.
79 std::vector<u32> m_patch_instructions{};
80
81 // Relocation type for relative branch from module to patch.
82 struct Relocation {
83 ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section.
84 uintptr_t module_offset; ///< Offset in bytes from the start of the text section.
85 };
86
87 oaknut::VectorCodeGenerator c;
88 std::vector<Trampoline> m_trampolines;
89 std::vector<Relocation> m_branch_to_patch_relocations{};
90 std::vector<Relocation> m_branch_to_module_relocations{};
91 std::vector<Relocation> m_write_module_pc_relocations{};
92 std::vector<ModuleTextAddress> m_exclusives{};
93 oaknut::Label m_save_context{};
94 oaknut::Label m_load_context{};
95 PatchMode mode{PatchMode::None};
96};
97
98} // namespace Core::NCE
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 980bb97f9..151eb3870 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -211,6 +211,8 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) {
211 system.GPU().ObtainContext(); 211 system.GPU().ObtainContext();
212 } 212 }
213 213
214 system.ArmInterface(core).Initialize();
215
214 auto& kernel = system.Kernel(); 216 auto& kernel = system.Kernel();
215 auto& scheduler = *kernel.CurrentScheduler(); 217 auto& scheduler = *kernel.CurrentScheduler();
216 auto* thread = scheduler.GetSchedulerCurrentThread(); 218 auto* thread = scheduler.GetSchedulerCurrentThread();
diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp
index de3f8ef8f..1aea56a99 100644
--- a/src/core/device_memory.cpp
+++ b/src/core/device_memory.cpp
@@ -6,7 +6,7 @@
6 6
7namespace Core { 7namespace Core {
8 8
9#ifdef ANDROID 9#ifdef HAS_NCE
10constexpr size_t VirtualReserveSize = 1ULL << 38; 10constexpr size_t VirtualReserveSize = 1ULL << 38;
11#else 11#else
12constexpr size_t VirtualReserveSize = 1ULL << 39; 12constexpr size_t VirtualReserveSize = 1ULL << 39;
@@ -15,6 +15,7 @@ constexpr size_t VirtualReserveSize = 1ULL << 39;
15DeviceMemory::DeviceMemory() 15DeviceMemory::DeviceMemory()
16 : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), 16 : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(),
17 VirtualReserveSize} {} 17 VirtualReserveSize} {}
18
18DeviceMemory::~DeviceMemory() = default; 19DeviceMemory::~DeviceMemory() = default;
19 20
20} // namespace Core 21} // namespace Core
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index af1af2b78..4d2d0098e 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -75,12 +75,26 @@ struct CodeSet final {
75 return segments[2]; 75 return segments[2];
76 } 76 }
77 77
78#ifdef HAS_NCE
79 Segment& PatchSegment() {
80 return patch_segment;
81 }
82
83 const Segment& PatchSegment() const {
84 return patch_segment;
85 }
86#endif
87
78 /// The overall data that backs this code set. 88 /// The overall data that backs this code set.
79 Kernel::PhysicalMemory memory; 89 Kernel::PhysicalMemory memory;
80 90
81 /// The segments that comprise this code set. 91 /// The segments that comprise this code set.
82 std::array<Segment, 3> segments; 92 std::array<Segment, 3> segments;
83 93
94#ifdef HAS_NCE
95 Segment patch_segment;
96#endif
97
84 /// The entry point address for this code set. 98 /// The entry point address for this code set.
85 KProcessAddress entrypoint = 0; 99 KProcessAddress entrypoint = 0;
86}; 100};
diff --git a/src/core/hle/kernel/k_address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp
index 32173e52b..23258071e 100644
--- a/src/core/hle/kernel/k_address_space_info.cpp
+++ b/src/core/hle/kernel/k_address_space_info.cpp
@@ -25,8 +25,8 @@ constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{
25 { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, }, 25 { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, },
26 { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, }, 26 { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, },
27 { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, }, 27 { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, },
28#ifdef ANDROID 28#ifdef HAS_NCE
29 // With Android, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. 29 // With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region.
30 { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, 30 { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
31#else 31#else
32 { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, 32 { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
diff --git a/src/core/hle/kernel/k_page_table_base.cpp b/src/core/hle/kernel/k_page_table_base.cpp
index 47dc8fd35..6691586ed 100644
--- a/src/core/hle/kernel/k_page_table_base.cpp
+++ b/src/core/hle/kernel/k_page_table_base.cpp
@@ -88,6 +88,22 @@ Result FlushDataCache(AddressType addr, u64 size) {
88 R_SUCCEED(); 88 R_SUCCEED();
89} 89}
90 90
91constexpr Common::MemoryPermission ConvertToMemoryPermission(KMemoryPermission perm) {
92 Common::MemoryPermission perms{};
93 if (True(perm & KMemoryPermission::UserRead)) {
94 perms |= Common::MemoryPermission::Read;
95 }
96 if (True(perm & KMemoryPermission::UserWrite)) {
97 perms |= Common::MemoryPermission::Write;
98 }
99#ifdef HAS_NCE
100 if (True(perm & KMemoryPermission::UserExecute)) {
101 perms |= Common::MemoryPermission::Execute;
102 }
103#endif
104 return perms;
105}
106
91} // namespace 107} // namespace
92 108
93void KPageTableBase::MemoryRange::Open() { 109void KPageTableBase::MemoryRange::Open() {
@@ -170,7 +186,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool
170 KMemoryManager::Pool pool, KProcessAddress code_address, 186 KMemoryManager::Pool pool, KProcessAddress code_address,
171 size_t code_size, KSystemResource* system_resource, 187 size_t code_size, KSystemResource* system_resource,
172 KResourceLimit* resource_limit, 188 KResourceLimit* resource_limit,
173 Core::Memory::Memory& memory) { 189 Core::Memory::Memory& memory,
190 KProcessAddress aslr_space_start) {
174 // Calculate region extents. 191 // Calculate region extents.
175 const size_t as_width = GetAddressSpaceWidth(as_type); 192 const size_t as_width = GetAddressSpaceWidth(as_type);
176 const KProcessAddress start = 0; 193 const KProcessAddress start = 0;
@@ -211,7 +228,8 @@ Result KPageTableBase::InitializeForProcess(Svc::CreateProcessFlag as_type, bool
211 heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap); 228 heap_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Heap);
212 stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack); 229 stack_region_size = GetSpaceSize(KAddressSpaceInfo::Type::Stack);
213 kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall); 230 kernel_map_region_size = GetSpaceSize(KAddressSpaceInfo::Type::MapSmall);
214 m_code_region_start = GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit); 231 m_code_region_start = m_address_space_start + aslr_space_start +
232 GetSpaceStart(KAddressSpaceInfo::Type::Map39Bit);
215 m_code_region_end = m_code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit); 233 m_code_region_end = m_code_region_start + GetSpaceSize(KAddressSpaceInfo::Type::Map39Bit);
216 m_alias_code_region_start = m_code_region_start; 234 m_alias_code_region_start = m_code_region_start;
217 m_alias_code_region_end = m_code_region_end; 235 m_alias_code_region_end = m_code_region_end;
@@ -5643,7 +5661,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
5643 case OperationType::Map: { 5661 case OperationType::Map: {
5644 ASSERT(virt_addr != 0); 5662 ASSERT(virt_addr != 0);
5645 ASSERT(Common::IsAligned(GetInteger(virt_addr), PageSize)); 5663 ASSERT(Common::IsAligned(GetInteger(virt_addr), PageSize));
5646 m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr); 5664 m_memory->MapMemoryRegion(*m_impl, virt_addr, num_pages * PageSize, phys_addr,
5665 ConvertToMemoryPermission(properties.perm));
5647 5666
5648 // Open references to pages, if we should. 5667 // Open references to pages, if we should.
5649 if (this->IsHeapPhysicalAddress(phys_addr)) { 5668 if (this->IsHeapPhysicalAddress(phys_addr)) {
@@ -5658,8 +5677,11 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
5658 } 5677 }
5659 case OperationType::ChangePermissions: 5678 case OperationType::ChangePermissions:
5660 case OperationType::ChangePermissionsAndRefresh: 5679 case OperationType::ChangePermissionsAndRefresh:
5661 case OperationType::ChangePermissionsAndRefreshAndFlush: 5680 case OperationType::ChangePermissionsAndRefreshAndFlush: {
5681 m_memory->ProtectRegion(*m_impl, virt_addr, num_pages * PageSize,
5682 ConvertToMemoryPermission(properties.perm));
5662 R_SUCCEED(); 5683 R_SUCCEED();
5684 }
5663 default: 5685 default:
5664 UNREACHABLE(); 5686 UNREACHABLE();
5665 } 5687 }
@@ -5687,7 +5709,8 @@ Result KPageTableBase::Operate(PageLinkedList* page_list, KProcessAddress virt_a
5687 const size_t size{node.GetNumPages() * PageSize}; 5709 const size_t size{node.GetNumPages() * PageSize};
5688 5710
5689 // Map the pages. 5711 // Map the pages.
5690 m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress()); 5712 m_memory->MapMemoryRegion(*m_impl, virt_addr, size, node.GetAddress(),
5713 ConvertToMemoryPermission(properties.perm));
5691 5714
5692 virt_addr += size; 5715 virt_addr += size;
5693 } 5716 }
diff --git a/src/core/hle/kernel/k_page_table_base.h b/src/core/hle/kernel/k_page_table_base.h
index ee2c41e67..556d230b3 100644
--- a/src/core/hle/kernel/k_page_table_base.h
+++ b/src/core/hle/kernel/k_page_table_base.h
@@ -235,7 +235,8 @@ public:
235 bool enable_device_address_space_merge, bool from_back, 235 bool enable_device_address_space_merge, bool from_back,
236 KMemoryManager::Pool pool, KProcessAddress code_address, 236 KMemoryManager::Pool pool, KProcessAddress code_address,
237 size_t code_size, KSystemResource* system_resource, 237 size_t code_size, KSystemResource* system_resource,
238 KResourceLimit* resource_limit, Core::Memory::Memory& memory); 238 KResourceLimit* resource_limit, Core::Memory::Memory& memory,
239 KProcessAddress aslr_space_start);
239 240
240 void Finalize(); 241 void Finalize();
241 242
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 3cfb414e5..6c29eb72c 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -300,7 +300,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
300 False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); 300 False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
301 R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, 301 R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
302 params.code_address, params.code_num_pages * PageSize, 302 params.code_address, params.code_num_pages * PageSize,
303 m_system_resource, res_limit, this->GetMemory())); 303 m_system_resource, res_limit, this->GetMemory(), 0));
304 } 304 }
305 ON_RESULT_FAILURE_2 { 305 ON_RESULT_FAILURE_2 {
306 m_page_table.Finalize(); 306 m_page_table.Finalize();
@@ -332,7 +332,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params, const KPa
332 332
333Result KProcess::Initialize(const Svc::CreateProcessParameter& params, 333Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
334 std::span<const u32> user_caps, KResourceLimit* res_limit, 334 std::span<const u32> user_caps, KResourceLimit* res_limit,
335 KMemoryManager::Pool pool) { 335 KMemoryManager::Pool pool, KProcessAddress aslr_space_start) {
336 ASSERT(res_limit != nullptr); 336 ASSERT(res_limit != nullptr);
337 337
338 // Set members. 338 // Set members.
@@ -393,7 +393,7 @@ Result KProcess::Initialize(const Svc::CreateProcessParameter& params,
393 False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge); 393 False(params.flags & Svc::CreateProcessFlag::DisableDeviceAddressSpaceMerge);
394 R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool, 394 R_TRY(m_page_table.Initialize(as_type, enable_aslr, enable_das_merge, !enable_aslr, pool,
395 params.code_address, code_size, m_system_resource, res_limit, 395 params.code_address, code_size, m_system_resource, res_limit,
396 this->GetMemory())); 396 this->GetMemory(), aslr_space_start));
397 } 397 }
398 ON_RESULT_FAILURE_2 { 398 ON_RESULT_FAILURE_2 {
399 m_page_table.Finalize(); 399 m_page_table.Finalize();
@@ -1128,7 +1128,7 @@ KProcess::KProcess(KernelCore& kernel)
1128KProcess::~KProcess() = default; 1128KProcess::~KProcess() = default;
1129 1129
1130Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, 1130Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
1131 bool is_hbl) { 1131 KProcessAddress aslr_space_start, bool is_hbl) {
1132 // Create a resource limit for the process. 1132 // Create a resource limit for the process.
1133 const auto physical_memory_size = 1133 const auto physical_memory_size =
1134 m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application); 1134 m_kernel.MemoryManager().GetSize(Kernel::KMemoryManager::Pool::Application);
@@ -1179,7 +1179,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
1179 .name = {}, 1179 .name = {},
1180 .version = {}, 1180 .version = {},
1181 .program_id = metadata.GetTitleID(), 1181 .program_id = metadata.GetTitleID(),
1182 .code_address = code_address, 1182 .code_address = code_address + GetInteger(aslr_space_start),
1183 .code_num_pages = static_cast<s32>(code_size / PageSize), 1183 .code_num_pages = static_cast<s32>(code_size / PageSize),
1184 .flags = flag, 1184 .flags = flag,
1185 .reslimit = Svc::InvalidHandle, 1185 .reslimit = Svc::InvalidHandle,
@@ -1193,7 +1193,7 @@ Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std:
1193 1193
1194 // Initialize for application process. 1194 // Initialize for application process.
1195 R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit, 1195 R_TRY(this->Initialize(params, metadata.GetKernelCapabilities(), res_limit,
1196 KMemoryManager::Pool::Application)); 1196 KMemoryManager::Pool::Application, aslr_space_start));
1197 1197
1198 // Assign remaining properties. 1198 // Assign remaining properties.
1199 m_is_hbl = is_hbl; 1199 m_is_hbl = is_hbl;
@@ -1214,6 +1214,17 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
1214 ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); 1214 ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute);
1215 ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); 1215 ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read);
1216 ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); 1216 ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite);
1217
1218#ifdef HAS_NCE
1219 if (Settings::IsNceEnabled()) {
1220 auto& buffer = m_kernel.System().DeviceMemory().buffer;
1221 const auto& code = code_set.CodeSegment();
1222 const auto& patch = code_set.PatchSegment();
1223 buffer.Protect(GetInteger(base_addr + code.addr), code.size, true, true, true);
1224 buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, true, true, true);
1225 ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None);
1226 }
1227#endif
1217} 1228}
1218 1229
1219bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { 1230bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) {
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index 8339465fd..d8cd0fdde 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -120,6 +120,9 @@ private:
120 std::atomic<s64> m_num_ipc_messages{}; 120 std::atomic<s64> m_num_ipc_messages{};
121 std::atomic<s64> m_num_ipc_replies{}; 121 std::atomic<s64> m_num_ipc_replies{};
122 std::atomic<s64> m_num_ipc_receives{}; 122 std::atomic<s64> m_num_ipc_receives{};
123#ifdef HAS_NCE
124 std::unordered_map<u64, u64> m_post_handlers{};
125#endif
123 126
124private: 127private:
125 Result StartTermination(); 128 Result StartTermination();
@@ -150,7 +153,8 @@ public:
150 std::span<const u32> caps, KResourceLimit* res_limit, 153 std::span<const u32> caps, KResourceLimit* res_limit,
151 KMemoryManager::Pool pool, bool immortal); 154 KMemoryManager::Pool pool, bool immortal);
152 Result Initialize(const Svc::CreateProcessParameter& params, std::span<const u32> user_caps, 155 Result Initialize(const Svc::CreateProcessParameter& params, std::span<const u32> user_caps,
153 KResourceLimit* res_limit, KMemoryManager::Pool pool); 156 KResourceLimit* res_limit, KMemoryManager::Pool pool,
157 KProcessAddress aslr_space_start);
154 void Exit(); 158 void Exit();
155 159
156 const char* GetName() const { 160 const char* GetName() const {
@@ -466,6 +470,12 @@ public:
466 470
467 static void Switch(KProcess* cur_process, KProcess* next_process); 471 static void Switch(KProcess* cur_process, KProcess* next_process);
468 472
473#ifdef HAS_NCE
474 std::unordered_map<u64, u64>& GetPostHandlers() noexcept {
475 return m_post_handlers;
476 }
477#endif
478
469public: 479public:
470 // Attempts to insert a watchpoint into a free slot. Returns false if none are available. 480 // Attempts to insert a watchpoint into a free slot. Returns false if none are available.
471 bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); 481 bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type);
@@ -479,7 +489,7 @@ public:
479 489
480public: 490public:
481 Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, 491 Result LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
482 bool is_hbl); 492 KProcessAddress aslr_space_start, bool is_hbl);
483 493
484 void LoadModule(CodeSet code_set, KProcessAddress base_addr); 494 void LoadModule(CodeSet code_set, KProcessAddress base_addr);
485 495
diff --git a/src/core/hle/kernel/k_process_page_table.h b/src/core/hle/kernel/k_process_page_table.h
index b7ae5abd0..9e40f68bc 100644
--- a/src/core/hle/kernel/k_process_page_table.h
+++ b/src/core/hle/kernel/k_process_page_table.h
@@ -23,10 +23,11 @@ public:
23 Result Initialize(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge, 23 Result Initialize(Svc::CreateProcessFlag as_type, bool enable_aslr, bool enable_das_merge,
24 bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address, 24 bool from_back, KMemoryManager::Pool pool, KProcessAddress code_address,
25 size_t code_size, KSystemResource* system_resource, 25 size_t code_size, KSystemResource* system_resource,
26 KResourceLimit* resource_limit, Core::Memory::Memory& memory) { 26 KResourceLimit* resource_limit, Core::Memory::Memory& memory,
27 R_RETURN(m_page_table.InitializeForProcess(as_type, enable_aslr, enable_das_merge, 27 KProcessAddress aslr_space_start) {
28 from_back, pool, code_address, code_size, 28 R_RETURN(m_page_table.InitializeForProcess(
29 system_resource, resource_limit, memory)); 29 as_type, enable_aslr, enable_das_merge, from_back, pool, code_address, code_size,
30 system_resource, resource_limit, memory, aslr_space_start));
30 } 31 }
31 32
32 void Finalize() { 33 void Finalize() {
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index e1f80b04f..e9ca5dfca 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -655,6 +655,21 @@ public:
655 return m_stack_top; 655 return m_stack_top;
656 } 656 }
657 657
658public:
659 // TODO: This shouldn't be defined in kernel namespace
660 struct NativeExecutionParameters {
661 u64 tpidr_el0{};
662 u64 tpidrro_el0{};
663 void* native_context{};
664 std::atomic<u32> lock{1};
665 bool is_running{};
666 u32 magic{Common::MakeMagic('Y', 'U', 'Z', 'U')};
667 };
668
669 NativeExecutionParameters& GetNativeExecutionParameters() {
670 return m_native_execution_parameters;
671 }
672
658private: 673private:
659 KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key, 674 KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key,
660 bool is_kernel_address_key); 675 bool is_kernel_address_key);
@@ -914,6 +929,7 @@ private:
914 ThreadWaitReasonForDebugging m_wait_reason_for_debugging{}; 929 ThreadWaitReasonForDebugging m_wait_reason_for_debugging{};
915 uintptr_t m_argument{}; 930 uintptr_t m_argument{};
916 KProcessAddress m_stack_top{}; 931 KProcessAddress m_stack_top{};
932 NativeExecutionParameters m_native_execution_parameters{};
917 933
918public: 934public:
919 using ConditionVariableThreadTreeType = ConditionVariableThreadTree; 935 using ConditionVariableThreadTreeType = ConditionVariableThreadTree;
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 5ee869fa2..073039825 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -1,8 +1,12 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/settings.h"
4#include "core/arm/dynarmic/arm_dynarmic_32.h" 5#include "core/arm/dynarmic/arm_dynarmic_32.h"
5#include "core/arm/dynarmic/arm_dynarmic_64.h" 6#include "core/arm/dynarmic/arm_dynarmic_64.h"
7#ifdef HAS_NCE
8#include "core/arm/nce/arm_nce.h"
9#endif
6#include "core/core.h" 10#include "core/core.h"
7#include "core/hle/kernel/k_scheduler.h" 11#include "core/hle/kernel/k_scheduler.h"
8#include "core/hle/kernel/kernel.h" 12#include "core/hle/kernel/kernel.h"
@@ -14,7 +18,8 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
14 : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} { 18 : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} {
15#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) 19#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
16 // TODO(bunnei): Initialization relies on a core being available. We may later replace this with 20 // TODO(bunnei): Initialization relies on a core being available. We may later replace this with
17 // a 32-bit instance of Dynarmic. This should be abstracted out to a CPU manager. 21 // an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU
22 // manager.
18 auto& kernel = system.Kernel(); 23 auto& kernel = system.Kernel();
19 m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( 24 m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
20 system, kernel.IsMulticore(), 25 system, kernel.IsMulticore(),
@@ -28,6 +33,13 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
28PhysicalCore::~PhysicalCore() = default; 33PhysicalCore::~PhysicalCore() = default;
29 34
30void PhysicalCore::Initialize(bool is_64_bit) { 35void PhysicalCore::Initialize(bool is_64_bit) {
36#if defined(HAS_NCE)
37 if (Settings::IsNceEnabled()) {
38 m_arm_interface = std::make_unique<Core::ARM_NCE>(m_system, m_system.Kernel().IsMulticore(),
39 m_core_index);
40 return;
41 }
42#endif
31#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) 43#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
32 auto& kernel = m_system.Kernel(); 44 auto& kernel = m_system.Kernel();
33 if (!is_64_bit) { 45 if (!is_64_bit) {
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 5c36b71e5..60ee78e89 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -3,6 +3,7 @@
3 3
4#include <cstring> 4#include <cstring>
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "common/settings.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/file_sys/content_archive.h" 8#include "core/file_sys/content_archive.h"
8#include "core/file_sys/control_metadata.h" 9#include "core/file_sys/control_metadata.h"
@@ -14,6 +15,10 @@
14#include "core/loader/deconstructed_rom_directory.h" 15#include "core/loader/deconstructed_rom_directory.h"
15#include "core/loader/nso.h" 16#include "core/loader/nso.h"
16 17
18#ifdef HAS_NCE
19#include "core/arm/nce/patcher.h"
20#endif
21
17namespace Loader { 22namespace Loader {
18 23
19AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, 24AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_,
@@ -124,21 +129,43 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
124 } 129 }
125 metadata.Print(); 130 metadata.Print();
126 131
127 const auto static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", 132 // Enable NCE only for programs with 39-bit address space.
128 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", 133 const bool is_39bit =
129 "subsdk8", "subsdk9", "sdk"}; 134 metadata.GetAddressSpaceType() == FileSys::ProgramAddressSpaceType::Is39Bit;
135 Settings::SetNceEnabled(is_39bit);
136
137 const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2",
138 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7",
139 "subsdk8", "subsdk9", "sdk"};
130 140
131 // Use the NSO module loader to figure out the code layout
132 std::size_t code_size{}; 141 std::size_t code_size{};
133 for (const auto& module : static_modules) { 142
143 // Define an nce patch context for each potential module.
144#ifdef HAS_NCE
145 std::array<Core::NCE::Patcher, 13> module_patchers;
146#endif
147
148 const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* {
149#ifdef HAS_NCE
150 if (Settings::IsNceEnabled()) {
151 return &module_patchers[i];
152 }
153#endif
154 return nullptr;
155 };
156
157 // Use the NSO module loader to figure out the code layout
158 for (size_t i = 0; i < static_modules.size(); i++) {
159 const auto& module = static_modules[i];
134 const FileSys::VirtualFile module_file{dir->GetFile(module)}; 160 const FileSys::VirtualFile module_file{dir->GetFile(module)};
135 if (!module_file) { 161 if (!module_file) {
136 continue; 162 continue;
137 } 163 }
138 164
139 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; 165 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
140 const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( 166 const auto tentative_next_load_addr =
141 process, system, *module_file, code_size, should_pass_arguments, false); 167 AppLoader_NSO::LoadModule(process, system, *module_file, code_size,
168 should_pass_arguments, false, {}, GetPatcher(i));
142 if (!tentative_next_load_addr) { 169 if (!tentative_next_load_addr) {
143 return {ResultStatus::ErrorLoadingNSO, {}}; 170 return {ResultStatus::ErrorLoadingNSO, {}};
144 } 171 }
@@ -146,8 +173,18 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
146 code_size = *tentative_next_load_addr; 173 code_size = *tentative_next_load_addr;
147 } 174 }
148 175
176 // Enable direct memory mapping in case of NCE.
177 const u64 fastmem_base = [&]() -> size_t {
178 if (Settings::IsNceEnabled()) {
179 auto& buffer = system.DeviceMemory().buffer;
180 buffer.EnableDirectMappedAddress();
181 return reinterpret_cast<u64>(buffer.VirtualBasePointer());
182 }
183 return 0;
184 }();
185
149 // Setup the process code layout 186 // Setup the process code layout
150 if (process.LoadFromMetadata(metadata, code_size, is_hbl).IsError()) { 187 if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) {
151 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; 188 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
152 } 189 }
153 190
@@ -157,7 +194,8 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
157 VAddr next_load_addr{base_address}; 194 VAddr next_load_addr{base_address};
158 const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(), 195 const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(),
159 system.GetContentProvider()}; 196 system.GetContentProvider()};
160 for (const auto& module : static_modules) { 197 for (size_t i = 0; i < static_modules.size(); i++) {
198 const auto& module = static_modules[i];
161 const FileSys::VirtualFile module_file{dir->GetFile(module)}; 199 const FileSys::VirtualFile module_file{dir->GetFile(module)};
162 if (!module_file) { 200 if (!module_file) {
163 continue; 201 continue;
@@ -165,15 +203,16 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
165 203
166 const VAddr load_addr{next_load_addr}; 204 const VAddr load_addr{next_load_addr};
167 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; 205 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
168 const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( 206 const auto tentative_next_load_addr =
169 process, system, *module_file, load_addr, should_pass_arguments, true, pm); 207 AppLoader_NSO::LoadModule(process, system, *module_file, load_addr,
208 should_pass_arguments, true, pm, GetPatcher(i));
170 if (!tentative_next_load_addr) { 209 if (!tentative_next_load_addr) {
171 return {ResultStatus::ErrorLoadingNSO, {}}; 210 return {ResultStatus::ErrorLoadingNSO, {}};
172 } 211 }
173 212
174 next_load_addr = *tentative_next_load_addr; 213 next_load_addr = *tentative_next_load_addr;
175 modules.insert_or_assign(load_addr, module); 214 modules.insert_or_assign(load_addr, module);
176 LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); 215 LOG_DEBUG(Loader, "loaded module {} @ {:#X}", module, load_addr);
177 } 216 }
178 217
179 // Find the RomFS by searching for a ".romfs" file in this directory 218 // Find the RomFS by searching for a ".romfs" file in this directory
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index bf56a08b4..cd6982921 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -91,7 +91,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::KProcess& process,
91 91
92 // Setup the process code layout 92 // Setup the process code layout
93 if (process 93 if (process
94 .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false) 94 .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0,
95 false)
95 .IsError()) { 96 .IsError()) {
96 return {ResultStatus::ErrorNotInitialized, {}}; 97 return {ResultStatus::ErrorNotInitialized, {}};
97 } 98 }
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 69f1a54ed..e74697cda 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -22,6 +22,10 @@
22#include "core/loader/nso.h" 22#include "core/loader/nso.h"
23#include "core/memory.h" 23#include "core/memory.h"
24 24
25#ifdef HAS_NCE
26#include "core/arm/nce/patcher.h"
27#endif
28
25namespace Loader { 29namespace Loader {
26 30
27struct NroSegmentHeader { 31struct NroSegmentHeader {
@@ -139,7 +143,8 @@ static constexpr u32 PageAlignSize(u32 size) {
139 return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK); 143 return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK);
140} 144}
141 145
142static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) { 146static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process,
147 const std::vector<u8>& data) {
143 if (data.size() < sizeof(NroHeader)) { 148 if (data.size() < sizeof(NroHeader)) {
144 return {}; 149 return {};
145 } 150 }
@@ -194,14 +199,61 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
194 199
195 codeset.DataSegment().size += bss_size; 200 codeset.DataSegment().size += bss_size;
196 program_image.resize(static_cast<u32>(program_image.size()) + bss_size); 201 program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
202 size_t image_size = program_image.size();
203
204#ifdef HAS_NCE
205 const auto& code = codeset.CodeSegment();
206
207 // NROs always have a 39-bit address space.
208 Settings::SetNceEnabled(true);
209
210 // Create NCE patcher
211 Core::NCE::Patcher patch{};
212
213 if (Settings::IsNceEnabled()) {
214 // Patch SVCs and MRS calls in the guest code
215 patch.PatchText(program_image, code);
216
217 // We only support PostData patching for NROs.
218 ASSERT(patch.GetPatchMode() == Core::NCE::PatchMode::PostData);
219
220 // Update patch section.
221 auto& patch_segment = codeset.PatchSegment();
222 patch_segment.addr = image_size;
223 patch_segment.size = static_cast<u32>(patch.GetSectionSize());
224
225 // Add patch section size to the module size.
226 image_size += patch_segment.size;
227 }
228#endif
229
230 // Enable direct memory mapping in case of NCE.
231 const u64 fastmem_base = [&]() -> size_t {
232 if (Settings::IsNceEnabled()) {
233 auto& buffer = system.DeviceMemory().buffer;
234 buffer.EnableDirectMappedAddress();
235 return reinterpret_cast<u64>(buffer.VirtualBasePointer());
236 }
237 return 0;
238 }();
197 239
198 // Setup the process code layout 240 // Setup the process code layout
199 if (process 241 if (process
200 .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), false) 242 .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), image_size, fastmem_base,
243 false)
201 .IsError()) { 244 .IsError()) {
202 return false; 245 return false;
203 } 246 }
204 247
248 // Relocate code patch and copy to the program_image if running under NCE.
249 // This needs to be after LoadFromMetadata so we can use the process entry point.
250#ifdef HAS_NCE
251 if (Settings::IsNceEnabled()) {
252 patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image,
253 &process.GetPostHandlers());
254 }
255#endif
256
205 // Load codeset for current process 257 // Load codeset for current process
206 codeset.memory = std::move(program_image); 258 codeset.memory = std::move(program_image);
207 process.LoadModule(std::move(codeset), process.GetEntryPoint()); 259 process.LoadModule(std::move(codeset), process.GetEntryPoint());
@@ -209,8 +261,9 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
209 return true; 261 return true;
210} 262}
211 263
212bool AppLoader_NRO::LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file) { 264bool AppLoader_NRO::LoadNro(Core::System& system, Kernel::KProcess& process,
213 return LoadNroImpl(process, nro_file.ReadAllBytes()); 265 const FileSys::VfsFile& nro_file) {
266 return LoadNroImpl(system, process, nro_file.ReadAllBytes());
214} 267}
215 268
216AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) { 269AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) {
@@ -218,7 +271,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::S
218 return {ResultStatus::ErrorAlreadyLoaded, {}}; 271 return {ResultStatus::ErrorAlreadyLoaded, {}};
219 } 272 }
220 273
221 if (!LoadNro(process, *file)) { 274 if (!LoadNro(system, process, *file)) {
222 return {ResultStatus::ErrorLoadingNRO, {}}; 275 return {ResultStatus::ErrorLoadingNRO, {}};
223 } 276 }
224 277
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 8de6eebc6..d2928cba0 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -54,7 +54,7 @@ public:
54 bool IsRomFSUpdatable() const override; 54 bool IsRomFSUpdatable() const override;
55 55
56private: 56private:
57 bool LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file); 57 bool LoadNro(Core::System& system, Kernel::KProcess& process, const FileSys::VfsFile& nro_file);
58 58
59 std::vector<u8> icon_data; 59 std::vector<u8> icon_data;
60 std::unique_ptr<FileSys::NACP> nacp; 60 std::unique_ptr<FileSys::NACP> nacp;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 1350da8dc..b053a0d14 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -20,6 +20,10 @@
20#include "core/loader/nso.h" 20#include "core/loader/nso.h"
21#include "core/memory.h" 21#include "core/memory.h"
22 22
23#ifdef HAS_NCE
24#include "core/arm/nce/patcher.h"
25#endif
26
23namespace Loader { 27namespace Loader {
24namespace { 28namespace {
25struct MODHeader { 29struct MODHeader {
@@ -72,7 +76,8 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& in_file) {
72std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system, 76std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system,
73 const FileSys::VfsFile& nso_file, VAddr load_base, 77 const FileSys::VfsFile& nso_file, VAddr load_base,
74 bool should_pass_arguments, bool load_into_process, 78 bool should_pass_arguments, bool load_into_process,
75 std::optional<FileSys::PatchManager> pm) { 79 std::optional<FileSys::PatchManager> pm,
80 Core::NCE::Patcher* patch) {
76 if (nso_file.GetSize() < sizeof(NSOHeader)) { 81 if (nso_file.GetSize() < sizeof(NSOHeader)) {
77 return std::nullopt; 82 return std::nullopt;
78 } 83 }
@@ -86,6 +91,16 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
86 return std::nullopt; 91 return std::nullopt;
87 } 92 }
88 93
94 // Allocate some space at the beginning if we are patching in PreText mode.
95 const size_t module_start = [&]() -> size_t {
96#ifdef HAS_NCE
97 if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
98 return patch->GetSectionSize();
99 }
100#endif
101 return 0;
102 }();
103
89 // Build program image 104 // Build program image
90 Kernel::CodeSet codeset; 105 Kernel::CodeSet codeset;
91 Kernel::PhysicalMemory program_image; 106 Kernel::PhysicalMemory program_image;
@@ -95,11 +110,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
95 if (nso_header.IsSegmentCompressed(i)) { 110 if (nso_header.IsSegmentCompressed(i)) {
96 data = DecompressSegment(data, nso_header.segments[i]); 111 data = DecompressSegment(data, nso_header.segments[i]);
97 } 112 }
98 program_image.resize(nso_header.segments[i].location + static_cast<u32>(data.size())); 113 program_image.resize(module_start + nso_header.segments[i].location +
99 std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), 114 static_cast<u32>(data.size()));
100 data.size()); 115 std::memcpy(program_image.data() + module_start + nso_header.segments[i].location,
101 codeset.segments[i].addr = nso_header.segments[i].location; 116 data.data(), data.size());
102 codeset.segments[i].offset = nso_header.segments[i].location; 117 codeset.segments[i].addr = module_start + nso_header.segments[i].location;
118 codeset.segments[i].offset = module_start + nso_header.segments[i].location;
103 codeset.segments[i].size = nso_header.segments[i].size; 119 codeset.segments[i].size = nso_header.segments[i].size;
104 } 120 }
105 121
@@ -118,7 +134,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
118 } 134 }
119 135
120 codeset.DataSegment().size += nso_header.segments[2].bss_size; 136 codeset.DataSegment().size += nso_header.segments[2].bss_size;
121 const u32 image_size{ 137 u32 image_size{
122 PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)}; 138 PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)};
123 program_image.resize(image_size); 139 program_image.resize(image_size);
124 140
@@ -129,15 +145,44 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
129 // Apply patches if necessary 145 // Apply patches if necessary
130 const auto name = nso_file.GetName(); 146 const auto name = nso_file.GetName();
131 if (pm && (pm->HasNSOPatch(nso_header.build_id, name) || Settings::values.dump_nso)) { 147 if (pm && (pm->HasNSOPatch(nso_header.build_id, name) || Settings::values.dump_nso)) {
132 std::vector<u8> pi_header(sizeof(NSOHeader) + program_image.size()); 148 std::span<u8> patchable_section(program_image.data() + module_start,
149 program_image.size() - module_start);
150 std::vector<u8> pi_header(sizeof(NSOHeader) + patchable_section.size());
133 std::memcpy(pi_header.data(), &nso_header, sizeof(NSOHeader)); 151 std::memcpy(pi_header.data(), &nso_header, sizeof(NSOHeader));
134 std::memcpy(pi_header.data() + sizeof(NSOHeader), program_image.data(), 152 std::memcpy(pi_header.data() + sizeof(NSOHeader), patchable_section.data(),
135 program_image.size()); 153 patchable_section.size());
136 154
137 pi_header = pm->PatchNSO(pi_header, name); 155 pi_header = pm->PatchNSO(pi_header, name);
138 156
139 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); 157 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), patchable_section.data());
158 }
159
160#ifdef HAS_NCE
161 // If we are computing the process code layout and using nce backend, patch.
162 const auto& code = codeset.CodeSegment();
163 if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::None) {
164 // Patch SVCs and MRS calls in the guest code
165 patch->PatchText(program_image, code);
166
167 // Add patch section size to the module size.
168 image_size += static_cast<u32>(patch->GetSectionSize());
169 } else if (patch) {
170 // Relocate code patch and copy to the program_image.
171 patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers());
172
173 // Update patch section.
174 auto& patch_segment = codeset.PatchSegment();
175 patch_segment.addr =
176 patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size;
177 patch_segment.size = static_cast<u32>(patch->GetSectionSize());
178
179 // Add patch section size to the module size. In PreText mode image_size
180 // already contains the patch segment as part of module_start.
181 if (patch->GetPatchMode() == Core::NCE::PatchMode::PostData) {
182 image_size += patch_segment.size;
183 }
140 } 184 }
185#endif
141 186
142 // If we aren't actually loading (i.e. just computing the process code layout), we are done 187 // If we aren't actually loading (i.e. just computing the process code layout), we are done
143 if (!load_into_process) { 188 if (!load_into_process) {
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 0b53b4ecd..29b86ed4c 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -15,6 +15,10 @@ namespace Core {
15class System; 15class System;
16} 16}
17 17
18namespace Core::NCE {
19class Patcher;
20}
21
18namespace Kernel { 22namespace Kernel {
19class KProcess; 23class KProcess;
20} 24}
@@ -88,7 +92,8 @@ public:
88 static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system, 92 static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system,
89 const FileSys::VfsFile& nso_file, VAddr load_base, 93 const FileSys::VfsFile& nso_file, VAddr load_base,
90 bool should_pass_arguments, bool load_into_process, 94 bool should_pass_arguments, bool load_into_process,
91 std::optional<FileSys::PatchManager> pm = {}); 95 std::optional<FileSys::PatchManager> pm = {},
96 Core::NCE::Patcher* patch = nullptr);
92 97
93 LoadResult Load(Kernel::KProcess& process, Core::System& system) override; 98 LoadResult Load(Kernel::KProcess& process, Core::System& system) override;
94 99
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index a3431772a..5b376b202 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -53,7 +53,7 @@ struct Memory::Impl {
53 } 53 }
54 54
55 void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, 55 void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
56 Common::PhysicalAddress target) { 56 Common::PhysicalAddress target, Common::MemoryPermission perms) {
57 ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size); 57 ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size);
58 ASSERT_MSG((base & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", GetInteger(base)); 58 ASSERT_MSG((base & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", GetInteger(base));
59 ASSERT_MSG(target >= DramMemoryMap::Base, "Out of bounds target: {:016X}", 59 ASSERT_MSG(target >= DramMemoryMap::Base, "Out of bounds target: {:016X}",
@@ -63,7 +63,7 @@ struct Memory::Impl {
63 63
64 if (Settings::IsFastmemEnabled()) { 64 if (Settings::IsFastmemEnabled()) {
65 system.DeviceMemory().buffer.Map(GetInteger(base), 65 system.DeviceMemory().buffer.Map(GetInteger(base),
66 GetInteger(target) - DramMemoryMap::Base, size); 66 GetInteger(target) - DramMemoryMap::Base, size, perms);
67 } 67 }
68 } 68 }
69 69
@@ -78,6 +78,51 @@ struct Memory::Impl {
78 } 78 }
79 } 79 }
80 80
81 void ProtectRegion(Common::PageTable& page_table, VAddr vaddr, u64 size,
82 Common::MemoryPermission perms) {
83 ASSERT_MSG((size & YUZU_PAGEMASK) == 0, "non-page aligned size: {:016X}", size);
84 ASSERT_MSG((vaddr & YUZU_PAGEMASK) == 0, "non-page aligned base: {:016X}", vaddr);
85
86 if (!Settings::IsFastmemEnabled()) {
87 return;
88 }
89
90 const bool is_r = True(perms & Common::MemoryPermission::Read);
91 const bool is_w = True(perms & Common::MemoryPermission::Write);
92 const bool is_x =
93 True(perms & Common::MemoryPermission::Execute) && Settings::IsNceEnabled();
94
95 if (!current_page_table) {
96 system.DeviceMemory().buffer.Protect(vaddr, size, is_r, is_w, is_x);
97 return;
98 }
99
100 u64 protect_bytes{};
101 u64 protect_begin{};
102 for (u64 addr = vaddr; addr < vaddr + size; addr += YUZU_PAGESIZE) {
103 const Common::PageType page_type{
104 current_page_table->pointers[addr >> YUZU_PAGEBITS].Type()};
105 switch (page_type) {
106 case Common::PageType::RasterizerCachedMemory:
107 if (protect_bytes > 0) {
108 system.DeviceMemory().buffer.Protect(protect_begin, protect_bytes, is_r, is_w,
109 is_x);
110 protect_bytes = 0;
111 }
112 break;
113 default:
114 if (protect_bytes == 0) {
115 protect_begin = addr;
116 }
117 protect_bytes += YUZU_PAGESIZE;
118 }
119 }
120
121 if (protect_bytes > 0) {
122 system.DeviceMemory().buffer.Protect(protect_begin, protect_bytes, is_r, is_w, is_x);
123 }
124 }
125
81 [[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(u64 vaddr) const { 126 [[nodiscard]] u8* GetPointerFromRasterizerCachedMemory(u64 vaddr) const {
82 const Common::PhysicalAddress paddr{ 127 const Common::PhysicalAddress paddr{
83 current_page_table->backing_addr[vaddr >> YUZU_PAGEBITS]}; 128 current_page_table->backing_addr[vaddr >> YUZU_PAGEBITS]};
@@ -831,14 +876,19 @@ void Memory::SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
831} 876}
832 877
833void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, 878void Memory::MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
834 Common::PhysicalAddress target) { 879 Common::PhysicalAddress target, Common::MemoryPermission perms) {
835 impl->MapMemoryRegion(page_table, base, size, target); 880 impl->MapMemoryRegion(page_table, base, size, target, perms);
836} 881}
837 882
838void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size) { 883void Memory::UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size) {
839 impl->UnmapRegion(page_table, base, size); 884 impl->UnmapRegion(page_table, base, size);
840} 885}
841 886
887void Memory::ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress vaddr, u64 size,
888 Common::MemoryPermission perms) {
889 impl->ProtectRegion(page_table, GetInteger(vaddr), size, perms);
890}
891
842bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const { 892bool Memory::IsValidVirtualAddress(const Common::ProcessAddress vaddr) const {
843 const Kernel::KProcess& process = *system.ApplicationProcess(); 893 const Kernel::KProcess& process = *system.ApplicationProcess();
844 const auto& page_table = process.GetPageTable().GetImpl(); 894 const auto& page_table = process.GetPageTable().GetImpl();
@@ -1001,4 +1051,17 @@ void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
1001 impl->FlushRegion(dest_addr, size); 1051 impl->FlushRegion(dest_addr, size);
1002} 1052}
1003 1053
1054bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1055 bool mapped = true;
1056 u8* const ptr = impl->GetPointerImpl(
1057 GetInteger(vaddr),
1058 [&] {
1059 LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size,
1060 GetInteger(vaddr));
1061 mapped = false;
1062 },
1063 [&] { impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); });
1064 return mapped && ptr != nullptr;
1065}
1066
1004} // namespace Core::Memory 1067} // namespace Core::Memory
diff --git a/src/core/memory.h b/src/core/memory.h
index 13047a545..ed8ebb5eb 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -15,8 +15,9 @@
15#include "core/hle/result.h" 15#include "core/hle/result.h"
16 16
17namespace Common { 17namespace Common {
18enum class MemoryPermission : u32;
18struct PageTable; 19struct PageTable;
19} 20} // namespace Common
20 21
21namespace Core { 22namespace Core {
22class System; 23class System;
@@ -82,9 +83,10 @@ public:
82 * @param size The amount of bytes to map. Must be page-aligned. 83 * @param size The amount of bytes to map. Must be page-aligned.
83 * @param target Buffer with the memory backing the mapping. Must be of length at least 84 * @param target Buffer with the memory backing the mapping. Must be of length at least
84 * `size`. 85 * `size`.
86 * @param perms The permissions to map the memory with.
85 */ 87 */
86 void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size, 88 void MapMemoryRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
87 Common::PhysicalAddress target); 89 Common::PhysicalAddress target, Common::MemoryPermission perms);
88 90
89 /** 91 /**
90 * Unmaps a region of the emulated process address space. 92 * Unmaps a region of the emulated process address space.
@@ -96,6 +98,17 @@ public:
96 void UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size); 98 void UnmapRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size);
97 99
98 /** 100 /**
101 * Protects a region of the emulated process address space with the new permissions.
102 *
103 * @param page_table The page table of the emulated process.
104 * @param base The start address to re-protect. Must be page-aligned.
105 * @param size The amount of bytes to protect. Must be page-aligned.
106 * @param perms The permissions the address range is mapped.
107 */
108 void ProtectRegion(Common::PageTable& page_table, Common::ProcessAddress base, u64 size,
109 Common::MemoryPermission perms);
110
111 /**
99 * Checks whether or not the supplied address is a valid virtual 112 * Checks whether or not the supplied address is a valid virtual
100 * address for the current process. 113 * address for the current process.
101 * 114 *
@@ -472,6 +485,7 @@ public:
472 485
473 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); 486 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
474 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); 487 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
488 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
475 void FlushRegion(Common::ProcessAddress dest_addr, size_t size); 489 void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
476 490
477private: 491private:
diff --git a/src/tests/common/host_memory.cpp b/src/tests/common/host_memory.cpp
index 1b014b632..1a28e862b 100644
--- a/src/tests/common/host_memory.cpp
+++ b/src/tests/common/host_memory.cpp
@@ -11,6 +11,7 @@ using namespace Common::Literals;
11 11
12static constexpr size_t VIRTUAL_SIZE = 1ULL << 39; 12static constexpr size_t VIRTUAL_SIZE = 1ULL << 39;
13static constexpr size_t BACKING_SIZE = 4_GiB; 13static constexpr size_t BACKING_SIZE = 4_GiB;
14static constexpr auto PERMS = Common::MemoryPermission::ReadWrite;
14 15
15TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") { 16TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
16 { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); } 17 { HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
@@ -19,7 +20,7 @@ TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
19 20
20TEST_CASE("HostMemory: Simple map", "[common]") { 21TEST_CASE("HostMemory: Simple map", "[common]") {
21 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 22 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
22 mem.Map(0x5000, 0x8000, 0x1000); 23 mem.Map(0x5000, 0x8000, 0x1000, PERMS);
23 24
24 volatile u8* const data = mem.VirtualBasePointer() + 0x5000; 25 volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
25 data[0] = 50; 26 data[0] = 50;
@@ -28,8 +29,8 @@ TEST_CASE("HostMemory: Simple map", "[common]") {
28 29
29TEST_CASE("HostMemory: Simple mirror map", "[common]") { 30TEST_CASE("HostMemory: Simple mirror map", "[common]") {
30 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 31 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
31 mem.Map(0x5000, 0x3000, 0x2000); 32 mem.Map(0x5000, 0x3000, 0x2000, PERMS);
32 mem.Map(0x8000, 0x4000, 0x1000); 33 mem.Map(0x8000, 0x4000, 0x1000, PERMS);
33 34
34 volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000; 35 volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000;
35 volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000; 36 volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000;
@@ -39,7 +40,7 @@ TEST_CASE("HostMemory: Simple mirror map", "[common]") {
39 40
40TEST_CASE("HostMemory: Simple unmap", "[common]") { 41TEST_CASE("HostMemory: Simple unmap", "[common]") {
41 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 42 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
42 mem.Map(0x5000, 0x3000, 0x2000); 43 mem.Map(0x5000, 0x3000, 0x2000, PERMS);
43 44
44 volatile u8* const data = mem.VirtualBasePointer() + 0x5000; 45 volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
45 data[75] = 50; 46 data[75] = 50;
@@ -50,7 +51,7 @@ TEST_CASE("HostMemory: Simple unmap", "[common]") {
50 51
51TEST_CASE("HostMemory: Simple unmap and remap", "[common]") { 52TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
52 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 53 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
53 mem.Map(0x5000, 0x3000, 0x2000); 54 mem.Map(0x5000, 0x3000, 0x2000, PERMS);
54 55
55 volatile u8* const data = mem.VirtualBasePointer() + 0x5000; 56 volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
56 data[0] = 50; 57 data[0] = 50;
@@ -58,79 +59,79 @@ TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
58 59
59 mem.Unmap(0x5000, 0x2000); 60 mem.Unmap(0x5000, 0x2000);
60 61
61 mem.Map(0x5000, 0x3000, 0x2000); 62 mem.Map(0x5000, 0x3000, 0x2000, PERMS);
62 REQUIRE(data[0] == 50); 63 REQUIRE(data[0] == 50);
63 64
64 mem.Map(0x7000, 0x2000, 0x5000); 65 mem.Map(0x7000, 0x2000, 0x5000, PERMS);
65 REQUIRE(data[0x3000] == 50); 66 REQUIRE(data[0x3000] == 50);
66} 67}
67 68
68TEST_CASE("HostMemory: Nieche allocation", "[common]") { 69TEST_CASE("HostMemory: Nieche allocation", "[common]") {
69 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 70 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
70 mem.Map(0x0000, 0, 0x20000); 71 mem.Map(0x0000, 0, 0x20000, PERMS);
71 mem.Unmap(0x0000, 0x4000); 72 mem.Unmap(0x0000, 0x4000);
72 mem.Map(0x1000, 0, 0x2000); 73 mem.Map(0x1000, 0, 0x2000, PERMS);
73 mem.Map(0x3000, 0, 0x1000); 74 mem.Map(0x3000, 0, 0x1000, PERMS);
74 mem.Map(0, 0, 0x1000); 75 mem.Map(0, 0, 0x1000, PERMS);
75} 76}
76 77
77TEST_CASE("HostMemory: Full unmap", "[common]") { 78TEST_CASE("HostMemory: Full unmap", "[common]") {
78 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 79 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
79 mem.Map(0x8000, 0, 0x4000); 80 mem.Map(0x8000, 0, 0x4000, PERMS);
80 mem.Unmap(0x8000, 0x4000); 81 mem.Unmap(0x8000, 0x4000);
81 mem.Map(0x6000, 0, 0x16000); 82 mem.Map(0x6000, 0, 0x16000, PERMS);
82} 83}
83 84
84TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") { 85TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") {
85 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 86 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
86 mem.Map(0x0000, 0, 0x4000); 87 mem.Map(0x0000, 0, 0x4000, PERMS);
87 mem.Unmap(0x2000, 0x4000); 88 mem.Unmap(0x2000, 0x4000);
88 mem.Map(0x2000, 0x80000, 0x4000); 89 mem.Map(0x2000, 0x80000, 0x4000, PERMS);
89} 90}
90 91
91TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") { 92TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") {
92 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 93 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
93 mem.Map(0x8000, 0, 0x4000); 94 mem.Map(0x8000, 0, 0x4000, PERMS);
94 mem.Unmap(0x6000, 0x4000); 95 mem.Unmap(0x6000, 0x4000);
95 mem.Map(0x8000, 0, 0x2000); 96 mem.Map(0x8000, 0, 0x2000, PERMS);
96} 97}
97 98
98TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") { 99TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") {
99 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 100 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
100 mem.Map(0x0000, 0, 0x4000); 101 mem.Map(0x0000, 0, 0x4000, PERMS);
101 mem.Map(0x4000, 0, 0x1b000); 102 mem.Map(0x4000, 0, 0x1b000, PERMS);
102 mem.Unmap(0x3000, 0x1c000); 103 mem.Unmap(0x3000, 0x1c000);
103 mem.Map(0x3000, 0, 0x20000); 104 mem.Map(0x3000, 0, 0x20000, PERMS);
104} 105}
105 106
106TEST_CASE("HostMemory: Unmap between placeholders", "[common]") { 107TEST_CASE("HostMemory: Unmap between placeholders", "[common]") {
107 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 108 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
108 mem.Map(0x0000, 0, 0x4000); 109 mem.Map(0x0000, 0, 0x4000, PERMS);
109 mem.Map(0x4000, 0, 0x4000); 110 mem.Map(0x4000, 0, 0x4000, PERMS);
110 mem.Unmap(0x2000, 0x4000); 111 mem.Unmap(0x2000, 0x4000);
111 mem.Map(0x2000, 0, 0x4000); 112 mem.Map(0x2000, 0, 0x4000, PERMS);
112} 113}
113 114
114TEST_CASE("HostMemory: Unmap to origin", "[common]") { 115TEST_CASE("HostMemory: Unmap to origin", "[common]") {
115 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 116 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
116 mem.Map(0x4000, 0, 0x4000); 117 mem.Map(0x4000, 0, 0x4000, PERMS);
117 mem.Map(0x8000, 0, 0x4000); 118 mem.Map(0x8000, 0, 0x4000, PERMS);
118 mem.Unmap(0x4000, 0x4000); 119 mem.Unmap(0x4000, 0x4000);
119 mem.Map(0, 0, 0x4000); 120 mem.Map(0, 0, 0x4000, PERMS);
120 mem.Map(0x4000, 0, 0x4000); 121 mem.Map(0x4000, 0, 0x4000, PERMS);
121} 122}
122 123
123TEST_CASE("HostMemory: Unmap to right", "[common]") { 124TEST_CASE("HostMemory: Unmap to right", "[common]") {
124 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 125 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
125 mem.Map(0x4000, 0, 0x4000); 126 mem.Map(0x4000, 0, 0x4000, PERMS);
126 mem.Map(0x8000, 0, 0x4000); 127 mem.Map(0x8000, 0, 0x4000, PERMS);
127 mem.Unmap(0x8000, 0x4000); 128 mem.Unmap(0x8000, 0x4000);
128 mem.Map(0x8000, 0, 0x4000); 129 mem.Map(0x8000, 0, 0x4000, PERMS);
129} 130}
130 131
131TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") { 132TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
132 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 133 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
133 mem.Map(0x4000, 0x10000, 0x4000); 134 mem.Map(0x4000, 0x10000, 0x4000, PERMS);
134 135
135 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; 136 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
136 ptr[0x1000] = 17; 137 ptr[0x1000] = 17;
@@ -142,7 +143,7 @@ TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
142 143
143TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") { 144TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
144 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 145 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
145 mem.Map(0x4000, 0x10000, 0x4000); 146 mem.Map(0x4000, 0x10000, 0x4000, PERMS);
146 147
147 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; 148 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
148 ptr[0x3000] = 19; 149 ptr[0x3000] = 19;
@@ -156,7 +157,7 @@ TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
156 157
157TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") { 158TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
158 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 159 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
159 mem.Map(0x4000, 0x10000, 0x4000); 160 mem.Map(0x4000, 0x10000, 0x4000, PERMS);
160 161
161 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; 162 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
162 ptr[0x0000] = 19; 163 ptr[0x0000] = 19;
@@ -170,8 +171,8 @@ TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
170 171
171TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") { 172TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") {
172 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); 173 HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
173 mem.Map(0x4000, 0x10000, 0x2000); 174 mem.Map(0x4000, 0x10000, 0x2000, PERMS);
174 mem.Map(0x6000, 0x20000, 0x2000); 175 mem.Map(0x6000, 0x20000, 0x2000, PERMS);
175 176
176 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000; 177 volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
177 ptr[0x0000] = 19; 178 ptr[0x0000] = 19;
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index a51359903..7e16cf17d 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -27,6 +27,13 @@ ConfigureCpu::ConfigureCpu(const Core::System& system_,
27 27
28 connect(accuracy_combobox, qOverload<int>(&QComboBox::currentIndexChanged), this, 28 connect(accuracy_combobox, qOverload<int>(&QComboBox::currentIndexChanged), this,
29 &ConfigureCpu::UpdateGroup); 29 &ConfigureCpu::UpdateGroup);
30
31 connect(backend_combobox, qOverload<int>(&QComboBox::currentIndexChanged), this,
32 &ConfigureCpu::UpdateGroup);
33
34#ifdef HAS_NCE
35 ui->backend_group->setVisible(true);
36#endif
30} 37}
31 38
32ConfigureCpu::~ConfigureCpu() = default; 39ConfigureCpu::~ConfigureCpu() = default;
@@ -34,6 +41,7 @@ ConfigureCpu::~ConfigureCpu() = default;
34void ConfigureCpu::SetConfiguration() {} 41void ConfigureCpu::SetConfiguration() {}
35void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) { 42void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) {
36 auto* accuracy_layout = ui->widget_accuracy->layout(); 43 auto* accuracy_layout = ui->widget_accuracy->layout();
44 auto* backend_layout = ui->widget_backend->layout();
37 auto* unsafe_layout = ui->unsafe_widget->layout(); 45 auto* unsafe_layout = ui->unsafe_widget->layout();
38 std::map<u32, QWidget*> unsafe_hold{}; 46 std::map<u32, QWidget*> unsafe_hold{};
39 47
@@ -62,6 +70,9 @@ void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) {
62 // Keep track of cpu_accuracy combobox to display/hide the unsafe settings 70 // Keep track of cpu_accuracy combobox to display/hide the unsafe settings
63 accuracy_layout->addWidget(widget); 71 accuracy_layout->addWidget(widget);
64 accuracy_combobox = widget->combobox; 72 accuracy_combobox = widget->combobox;
73 } else if (setting->Id() == Settings::values.cpu_backend.Id()) {
74 backend_layout->addWidget(widget);
75 backend_combobox = widget->combobox;
65 } else { 76 } else {
66 // Presently, all other settings here are unsafe checkboxes 77 // Presently, all other settings here are unsafe checkboxes
67 unsafe_hold.insert({setting->Id(), widget}); 78 unsafe_hold.insert({setting->Id(), widget});
@@ -73,6 +84,7 @@ void ConfigureCpu::Setup(const ConfigurationShared::Builder& builder) {
73 } 84 }
74 85
75 UpdateGroup(accuracy_combobox->currentIndex()); 86 UpdateGroup(accuracy_combobox->currentIndex());
87 UpdateGroup(backend_combobox->currentIndex());
76} 88}
77 89
78void ConfigureCpu::UpdateGroup(int index) { 90void ConfigureCpu::UpdateGroup(int index) {
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index 61a6de7aa..a102b4c1f 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -49,4 +49,5 @@ private:
49 std::vector<std::function<void(bool)>> apply_funcs{}; 49 std::vector<std::function<void(bool)>> apply_funcs{};
50 50
51 QComboBox* accuracy_combobox; 51 QComboBox* accuracy_combobox;
52 QComboBox* backend_combobox;
52}; 53};
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index f734e842e..13fd43605 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -60,6 +60,36 @@
60 </widget> 60 </widget>
61 </item> 61 </item>
62 <item> 62 <item>
63 <widget class="QGroupBox" name="backend_group">
64 <property name="title">
65 <string>CPU Backend</string>
66 </property>
67 <layout class="QVBoxLayout">
68 <item>
69 <widget class="QWidget" name="widget_backend" native="true">
70 <layout class="QVBoxLayout" name="verticalLayout1">
71 <property name="leftMargin">
72 <number>0</number>
73 </property>
74 <property name="topMargin">
75 <number>0</number>
76 </property>
77 <property name="rightMargin">
78 <number>0</number>
79 </property>
80 <property name="bottomMargin">
81 <number>0</number>
82 </property>
83 </layout>
84 </widget>
85 </item>
86 </layout>
87 <property name="visible">
88 <bool>false</bool>
89 </property>
90 </widget>
91 </item>
92 <item>
63 <widget class="QGroupBox" name="unsafe_group"> 93 <widget class="QGroupBox" name="unsafe_group">
64 <property name="title"> 94 <property name="title">
65 <string>Unsafe CPU Optimization Settings</string> 95 <string>Unsafe CPU Optimization Settings</string>
diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp
index ee0ca4aa7..7e908924c 100644
--- a/src/yuzu/configuration/shared_translation.cpp
+++ b/src/yuzu/configuration/shared_translation.cpp
@@ -44,6 +44,7 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent) {
44 44
45 // Cpu 45 // Cpu
46 INSERT(Settings, cpu_accuracy, tr("Accuracy:"), QStringLiteral()); 46 INSERT(Settings, cpu_accuracy, tr("Accuracy:"), QStringLiteral());
47 INSERT(Settings, cpu_backend, tr("Backend:"), QStringLiteral());
47 48
48 // Cpu Debug 49 // Cpu Debug
49 50
@@ -243,6 +244,11 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QWidget* parent) {
243 PAIR(CpuAccuracy, Unsafe, tr("Unsafe")), 244 PAIR(CpuAccuracy, Unsafe, tr("Unsafe")),
244 PAIR(CpuAccuracy, Paranoid, tr("Paranoid (disables most optimizations)")), 245 PAIR(CpuAccuracy, Paranoid, tr("Paranoid (disables most optimizations)")),
245 }}); 246 }});
247 translations->insert({Settings::EnumMetadata<Settings::CpuBackend>::Index(),
248 {
249 PAIR(CpuBackend, Dynarmic, tr("Dynarmic")),
250 PAIR(CpuBackend, Nce, tr("NCE")),
251 }});
246 translations->insert({Settings::EnumMetadata<Settings::FullscreenMode>::Index(), 252 translations->insert({Settings::EnumMetadata<Settings::FullscreenMode>::Index(),
247 { 253 {
248 PAIR(FullscreenMode, Borderless, tr("Borderless Windowed")), 254 PAIR(FullscreenMode, Borderless, tr("Borderless Windowed")),