summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--externals/CMakeLists.txt8
-rw-r--r--src/common/host_memory.cpp10
-rw-r--r--src/common/settings.cpp4
-rw-r--r--src/common/settings.h2
-rw-r--r--src/core/CMakeLists.txt16
-rw-r--r--src/core/arm/arm_interface.h3
-rw-r--r--src/core/arm/nce/arm_nce.cpp395
-rw-r--r--src/core/arm/nce/arm_nce.h108
-rw-r--r--src/core/arm/nce/arm_nce.s222
-rw-r--r--src/core/arm/nce/arm_nce_asm_definitions.h29
-rw-r--r--src/core/arm/nce/guest_context.h50
-rw-r--r--src/core/arm/nce/instructions.h147
-rw-r--r--src/core/arm/nce/patch.cpp472
-rw-r--r--src/core/arm/nce/patch.h107
-rw-r--r--src/core/core.cpp4
-rw-r--r--src/core/cpu_manager.cpp2
-rw-r--r--src/core/device_memory.cpp8
-rw-r--r--src/core/device_memory.h2
-rw-r--r--src/core/hle/kernel/code_set.h9
-rw-r--r--src/core/hle/kernel/k_address_space_info.cpp4
-rw-r--r--src/core/hle/kernel/k_process.cpp11
-rw-r--r--src/core/hle/kernel/k_process.h9
-rw-r--r--src/core/hle/kernel/k_thread.h16
-rw-r--r--src/core/hle/kernel/physical_core.cpp14
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp61
-rw-r--r--src/core/loader/nro.cpp62
-rw-r--r--src/core/loader/nro.h2
-rw-r--r--src/core/loader/nso.cpp56
-rw-r--r--src/core/loader/nso.h7
-rw-r--r--src/core/memory.cpp13
-rw-r--r--src/core/memory.h1
31 files changed, 1803 insertions, 51 deletions
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 1d2be1459..8f83d4991 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -20,16 +20,16 @@ if ((ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) AND NOT TARGET xbyak::xbyak)
20endif() 20endif()
21 21
22# Dynarmic 22# Dynarmic
23if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut)
24 add_subdirectory(oaknut)
25endif()
26
23if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic) 27if ((ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) AND NOT TARGET dynarmic::dynarmic)
24 set(DYNARMIC_IGNORE_ASSERTS ON) 28 set(DYNARMIC_IGNORE_ASSERTS ON)
25 add_subdirectory(dynarmic) 29 add_subdirectory(dynarmic)
26 add_library(dynarmic::dynarmic ALIAS dynarmic) 30 add_library(dynarmic::dynarmic ALIAS dynarmic)
27endif() 31endif()
28 32
29if (ARCHITECTURE_arm64 AND NOT TARGET merry::oaknut)
30 add_subdirectory(oaknut)
31endif()
32
33# getopt 33# getopt
34if (MSVC) 34if (MSVC)
35 add_subdirectory(getopt) 35 add_subdirectory(getopt)
diff --git a/src/common/host_memory.cpp b/src/common/host_memory.cpp
index 3e4b34de6..38d7b29f7 100644
--- a/src/common/host_memory.cpp
+++ b/src/common/host_memory.cpp
@@ -189,6 +189,11 @@ public:
189 } 189 }
190 } 190 }
191 191
192 void EnableDirectMappedAddress() {
193 // TODO
194 UNREACHABLE();
195 }
196
192 const size_t backing_size; ///< Size of the backing memory in bytes 197 const size_t backing_size; ///< Size of the backing memory in bytes
193 const size_t virtual_size; ///< Size of the virtual address placeholder in bytes 198 const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
194 199
@@ -340,11 +345,6 @@ private:
340 return false; 345 return false;
341 } 346 }
342 347
343 void EnableDirectMappedAddress() {
344 // TODO
345 UNREACHABLE();
346 }
347
348 HANDLE process{}; ///< Current process handle 348 HANDLE process{}; ///< Current process handle
349 HANDLE backing_handle{}; ///< File based backing memory 349 HANDLE backing_handle{}; ///< File based backing memory
350 350
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 167e984a6..81a036ef0 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -158,8 +158,8 @@ bool IsFastmemEnabled() {
158 158
159static bool is_nce_enabled = false; 159static bool is_nce_enabled = false;
160 160
161void SetNceEnabled(bool is_64bit) { 161void SetNceEnabled(bool is_39bit) {
162 is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_64bit; 162 is_nce_enabled = values.cpu_backend.GetValue() == CpuBackend::Nce && is_39bit;
163} 163}
164 164
165bool IsNceEnabled() { 165bool IsNceEnabled() {
diff --git a/src/common/settings.h b/src/common/settings.h
index fea639ee3..648e0be0d 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -181,7 +181,7 @@ struct Values {
181 181
182 // Cpu 182 // Cpu
183 SwitchableSetting<CpuBackend, true> cpu_backend{ 183 SwitchableSetting<CpuBackend, true> cpu_backend{
184 linkage, CpuBackend::Dynarmic, CpuBackend::Dynarmic, 184 linkage, CpuBackend::Nce, CpuBackend::Dynarmic,
185#ifdef ARCHITECTURE_arm64 185#ifdef ARCHITECTURE_arm64
186 CpuBackend::Nce, 186 CpuBackend::Nce,
187#else 187#else
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 66c10fc3f..c5805ec61 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -926,6 +926,22 @@ if (ENABLE_WEB_SERVICE)
926 target_link_libraries(core PRIVATE web_service) 926 target_link_libraries(core PRIVATE web_service)
927endif() 927endif()
928 928
929if (ARCHITECTURE_arm64)
930 enable_language(C ASM)
931 set(CMAKE_ASM_FLAGS "${CFLAGS} -x assembler-with-cpp")
932
933 target_sources(core PRIVATE
934 arm/nce/arm_nce.cpp
935 arm/nce/arm_nce.h
936 arm/nce/arm_nce.s
937 arm/nce/guest_context.h
938 arm/nce/patch.cpp
939 arm/nce/patch.h
940 arm/nce/instructions.h
941 )
942 target_link_libraries(core PRIVATE merry::oaknut)
943endif()
944
929if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64) 945if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
930 target_sources(core PRIVATE 946 target_sources(core PRIVATE
931 arm/dynarmic/arm_dynarmic.h 947 arm/dynarmic/arm_dynarmic.h
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 3d866ff6f..a9d9ac09d 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -81,6 +81,9 @@ public:
81 // thread context to be 800 bytes in size. 81 // thread context to be 800 bytes in size.
82 static_assert(sizeof(ThreadContext64) == 0x320); 82 static_assert(sizeof(ThreadContext64) == 0x320);
83 83
84 /// Perform any backend-specific initialization.
85 virtual void Initialize() {}
86
84 /// Runs the CPU until an event happens 87 /// Runs the CPU until an event happens
85 void Run(); 88 void Run();
86 89
diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp
new file mode 100644
index 000000000..511248a0d
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.cpp
@@ -0,0 +1,395 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <cinttypes>
5#include <memory>
6
7#include "common/scope_exit.h"
8#include "common/signal_chain.h"
9#include "core/arm/nce/arm_nce.h"
10#include "core/arm/nce/patch.h"
11#include "core/core.h"
12#include "core/memory.h"
13
14#include "core/hle/kernel/k_process.h"
15
16#include <signal.h>
17#include <sys/syscall.h>
18#include <unistd.h>
19
20namespace Core {
21
22namespace {
23
24struct sigaction g_orig_action;
25
26// Verify assembly offsets.
27using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
28static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext);
29static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
30static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
31
32fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
33 _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
34 while (header->magic != FPSIMD_MAGIC) {
35 header = reinterpret_cast<_aarch64_ctx*>((char*)header + header->size);
36 }
37 return reinterpret_cast<fpsimd_context*>(header);
38}
39
40} // namespace
41
42void* ARM_NCE::RestoreGuestContext(void* raw_context) {
43 // Retrieve the host context.
44 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
45
46 // Thread-local parameters will be located in x9.
47 auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
48 auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
49
50 // Retrieve the host floating point state.
51 auto* fpctx = GetFloatingPointState(host_ctx);
52
53 // Save host callee-saved registers.
54 std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
55 sizeof(guest_ctx->host_ctx.host_saved_vregs));
56 std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
57 sizeof(guest_ctx->host_ctx.host_saved_regs));
58
59 // Save stack pointer.
60 guest_ctx->host_ctx.host_sp = host_ctx.sp;
61
62 // Restore all guest state except tpidr_el0.
63 host_ctx.sp = guest_ctx->sp;
64 host_ctx.pc = guest_ctx->pc;
65 host_ctx.pstate = guest_ctx->pstate;
66 fpctx->fpcr = guest_ctx->fpcr;
67 fpctx->fpsr = guest_ctx->fpsr;
68 std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
69 std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
70
71 // Return the new thread-local storage pointer.
72 return tpidr;
73}
74
75void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
76 // Retrieve the host context.
77 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
78
79 // Retrieve the host floating point state.
80 auto* fpctx = GetFloatingPointState(host_ctx);
81
82 // Save all guest registers except tpidr_el0.
83 std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
84 std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
85 guest_ctx->fpsr = fpctx->fpsr;
86 guest_ctx->fpcr = fpctx->fpcr;
87 guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
88 guest_ctx->pc = host_ctx.pc;
89 guest_ctx->sp = host_ctx.sp;
90
91 // Restore stack pointer.
92 host_ctx.sp = guest_ctx->host_ctx.host_sp;
93
94 // Restore host callee-saved registers.
95 std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
96 sizeof(guest_ctx->host_ctx.host_saved_regs));
97 std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
98 sizeof(guest_ctx->host_ctx.host_saved_vregs));
99
100 // Return from the call on exit by setting pc to x30.
101 host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
102
103 // Clear esr_el1 and return it.
104 host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
105}
106
107bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
108 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
109 auto* info = static_cast<siginfo_t*>(raw_info);
110
111 // Try to handle an invalid access.
112 // TODO: handle accesses which split a page?
113 const Common::ProcessAddress addr =
114 (reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
115 if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
116 // We handled the access successfully and are returning to guest code.
117 return true;
118 }
119
120 // We can't handle the access, so trigger an exception.
121 const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
122 guest_ctx->esr_el1.fetch_or(
123 static_cast<u64>(is_prefetch_abort ? HaltReason::PrefetchAbort : HaltReason::DataAbort));
124
125 // Forcibly mark the context as locked. We are still running.
126 // We may race with SignalInterrupt here:
127 // - If we lose the race, then SignalInterrupt will send us a signal which are masking,
128 // and it will do nothing when it is unmasked, as we have already left guest code.
129 // - If we win the race, then SignalInterrupt will wait for us to unlock first.
130 auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters();
131 thread_params.lock.store(SpinLockLocked);
132
133 // Return to host.
134 SaveGuestContext(guest_ctx, raw_context);
135 return false;
136}
137
138void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) {
139 return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
140}
141
142HaltReason ARM_NCE::RunJit() {
143 // Get the thread parameters.
144 // TODO: pass the current thread down from ::Run
145 auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel());
146 auto* thread_params = &thread->GetNativeExecutionParameters();
147
148 {
149 // Lock our core context.
150 std::scoped_lock lk{lock};
151
152 // We should not be running.
153 ASSERT(running_thread == nullptr);
154
155 // Check if we need to run. If we have already been halted, we are done.
156 u64 halt = guest_ctx.esr_el1.exchange(0);
157 if (halt != 0) {
158 return static_cast<HaltReason>(halt);
159 }
160
161 // Mark that we are running.
162 running_thread = thread;
163
164 // Acquire the lock on the thread parameters.
165 // This allows us to force synchronization with SignalInterrupt.
166 LockThreadParameters(thread_params);
167 }
168
169 // Assign current members.
170 guest_ctx.parent = this;
171 thread_params->native_context = &guest_ctx;
172 thread_params->tpidr_el0 = guest_ctx.tpidr_el0;
173 thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0;
174 thread_params->is_running = true;
175
176 HaltReason halt{};
177
178 // TODO: finding and creating the post handler needs to be locked
179 // to deal with dynamic loading of NROs.
180 const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers();
181 if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) {
182 halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second);
183 } else {
184 halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params);
185 }
186
187 // Unload members.
188 // The thread does not change, so we can persist the old reference.
189 guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
190 thread_params->native_context = nullptr;
191 thread_params->is_running = false;
192
193 // Unlock the thread parameters.
194 UnlockThreadParameters(thread_params);
195
196 {
197 // Lock the core context.
198 std::scoped_lock lk{lock};
199
200 // On exit, we no longer have an active thread.
201 running_thread = nullptr;
202 }
203
204 // Return the halt reason.
205 return halt;
206}
207
208HaltReason ARM_NCE::StepJit() {
209 return HaltReason::StepThread;
210}
211
212u32 ARM_NCE::GetSvcNumber() const {
213 return guest_ctx.svc_swi;
214}
215
216ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_)
217 : ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} {
218 guest_ctx.system = &system_;
219}
220
221ARM_NCE::~ARM_NCE() = default;
222
223void ARM_NCE::Initialize() {
224 thread_id = gettid();
225
226 // Setup our signals
227 static std::once_flag flag;
228 std::call_once(flag, [] {
229 using HandlerType = decltype(sigaction::sa_sigaction);
230
231 sigset_t signal_mask;
232 sigemptyset(&signal_mask);
233 sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal);
234 sigaddset(&signal_mask, BreakFromRunCodeSignal);
235 sigaddset(&signal_mask, GuestFaultSignal);
236
237 struct sigaction return_to_run_code_action {};
238 return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
239 return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>(
240 &ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
241 return_to_run_code_action.sa_mask = signal_mask;
242 Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
243 nullptr);
244
245 struct sigaction break_from_run_code_action {};
246 break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
247 break_from_run_code_action.sa_sigaction =
248 reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler);
249 break_from_run_code_action.sa_mask = signal_mask;
250 Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr);
251
252 struct sigaction fault_action {};
253 fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
254 fault_action.sa_sigaction =
255 reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler);
256 fault_action.sa_mask = signal_mask;
257 Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action);
258
259 // Simplify call for g_orig_action.
260 // These fields occupy the same space in memory, so this should be a no-op in practice.
261 if (!(g_orig_action.sa_flags & SA_SIGINFO)) {
262 g_orig_action.sa_sigaction =
263 reinterpret_cast<decltype(g_orig_action.sa_sigaction)>(g_orig_action.sa_handler);
264 }
265 });
266}
267
268void ARM_NCE::SetPC(u64 pc) {
269 guest_ctx.pc = pc;
270}
271
272u64 ARM_NCE::GetPC() const {
273 return guest_ctx.pc;
274}
275
276u64 ARM_NCE::GetSP() const {
277 return guest_ctx.sp;
278}
279
280u64 ARM_NCE::GetReg(int index) const {
281 return guest_ctx.cpu_registers[index];
282}
283
284void ARM_NCE::SetReg(int index, u64 value) {
285 guest_ctx.cpu_registers[index] = value;
286}
287
288u128 ARM_NCE::GetVectorReg(int index) const {
289 return guest_ctx.vector_registers[index];
290}
291
292void ARM_NCE::SetVectorReg(int index, u128 value) {
293 guest_ctx.vector_registers[index] = value;
294}
295
296u32 ARM_NCE::GetPSTATE() const {
297 return guest_ctx.pstate;
298}
299
300void ARM_NCE::SetPSTATE(u32 pstate) {
301 guest_ctx.pstate = pstate;
302}
303
304u64 ARM_NCE::GetTlsAddress() const {
305 return guest_ctx.tpidrro_el0;
306}
307
308void ARM_NCE::SetTlsAddress(u64 address) {
309 guest_ctx.tpidrro_el0 = address;
310}
311
312u64 ARM_NCE::GetTPIDR_EL0() const {
313 return guest_ctx.tpidr_el0;
314}
315
316void ARM_NCE::SetTPIDR_EL0(u64 value) {
317 guest_ctx.tpidr_el0 = value;
318}
319
320void ARM_NCE::SaveContext(ThreadContext64& ctx) const {
321 ctx.cpu_registers = guest_ctx.cpu_registers;
322 ctx.sp = guest_ctx.sp;
323 ctx.pc = guest_ctx.pc;
324 ctx.pstate = guest_ctx.pstate;
325 ctx.vector_registers = guest_ctx.vector_registers;
326 ctx.fpcr = guest_ctx.fpcr;
327 ctx.fpsr = guest_ctx.fpsr;
328 ctx.tpidr = guest_ctx.tpidr_el0;
329}
330
331void ARM_NCE::LoadContext(const ThreadContext64& ctx) {
332 guest_ctx.cpu_registers = ctx.cpu_registers;
333 guest_ctx.sp = ctx.sp;
334 guest_ctx.pc = ctx.pc;
335 guest_ctx.pstate = ctx.pstate;
336 guest_ctx.vector_registers = ctx.vector_registers;
337 guest_ctx.fpcr = ctx.fpcr;
338 guest_ctx.fpsr = ctx.fpsr;
339 guest_ctx.tpidr_el0 = ctx.tpidr;
340}
341
342void ARM_NCE::SignalInterrupt() {
343 // Lock core context.
344 std::scoped_lock lk{lock};
345
346 // Add break loop condition.
347 guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
348
349 // If there is no thread running, we are done.
350 if (running_thread == nullptr) {
351 return;
352 }
353
354 // Lock the thread context.
355 auto* params = &running_thread->GetNativeExecutionParameters();
356 LockThreadParameters(params);
357
358 if (params->is_running) {
359 // We should signal to the running thread.
360 // The running thread will unlock the thread context.
361 syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal);
362 } else {
363 // If the thread is no longer running, we have nothing to do.
364 UnlockThreadParameters(params);
365 }
366}
367
368void ARM_NCE::ClearInterrupt() {
369 guest_ctx.esr_el1 = {};
370}
371
372void ARM_NCE::ClearInstructionCache() {
373 // TODO: This is not possible to implement correctly on Linux because
374 // we do not have any access to ic iallu.
375
376 // Require accesses to complete.
377 std::atomic_thread_fence(std::memory_order_seq_cst);
378}
379
380void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) {
381 // Clean cache.
382 auto* ptr = reinterpret_cast<char*>(addr);
383 __builtin___clear_cache(ptr, ptr + size);
384}
385
386void ARM_NCE::ClearExclusiveState() {
387 // No-op.
388}
389
390void ARM_NCE::PageTableChanged(Common::PageTable& page_table,
391 std::size_t new_address_space_size_in_bits) {
392 // No-op. Page table is never used.
393}
394
395} // namespace Core
diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h
new file mode 100644
index 000000000..5fbd6dbf3
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.h
@@ -0,0 +1,108 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <atomic>
7#include <memory>
8#include <span>
9#include <unordered_map>
10#include <vector>
11
12#include "core/arm/arm_interface.h"
13#include "core/arm/nce/guest_context.h"
14
15namespace Core::Memory {
16class Memory;
17}
18
19namespace Core {
20
21class System;
22
23class ARM_NCE final : public ARM_Interface {
24public:
25 ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_);
26
27 ~ARM_NCE() override;
28
29 void Initialize() override;
30 void SetPC(u64 pc) override;
31 u64 GetPC() const override;
32 u64 GetSP() const override;
33 u64 GetReg(int index) const override;
34 void SetReg(int index, u64 value) override;
35 u128 GetVectorReg(int index) const override;
36 void SetVectorReg(int index, u128 value) override;
37
38 u32 GetPSTATE() const override;
39 void SetPSTATE(u32 pstate) override;
40 u64 GetTlsAddress() const override;
41 void SetTlsAddress(u64 address) override;
42 void SetTPIDR_EL0(u64 value) override;
43 u64 GetTPIDR_EL0() const override;
44
45 Architecture GetArchitecture() const override {
46 return Architecture::Aarch64;
47 }
48
49 void SaveContext(ThreadContext32& ctx) const override {}
50 void SaveContext(ThreadContext64& ctx) const override;
51 void LoadContext(const ThreadContext32& ctx) override {}
52 void LoadContext(const ThreadContext64& ctx) override;
53
54 void SignalInterrupt() override;
55 void ClearInterrupt() override;
56 void ClearExclusiveState() override;
57 void ClearInstructionCache() override;
58 void InvalidateCacheRange(u64 addr, std::size_t size) override;
59 void PageTableChanged(Common::PageTable& new_page_table,
60 std::size_t new_address_space_size_in_bits) override;
61
62protected:
63 HaltReason RunJit() override;
64 HaltReason StepJit() override;
65
66 u32 GetSvcNumber() const override;
67
68 const Kernel::DebugWatchpoint* HaltedWatchpoint() const override {
69 return nullptr;
70 }
71
72 void RewindBreakpointInstruction() override {}
73
74private:
75 // Assembly definitions.
76 static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
77 u64 trampoline_addr);
78 static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr);
79
80 static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info,
81 void* raw_context);
82 static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context);
83 static void GuestFaultSignalHandler(int sig, void* info, void* raw_context);
84
85 static void LockThreadParameters(void* tpidr);
86 static void UnlockThreadParameters(void* tpidr);
87
88private:
89 // C++ implementation functions for assembly definitions.
90 static void* RestoreGuestContext(void* raw_context);
91 static void SaveGuestContext(GuestContext* ctx, void* raw_context);
92 static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context);
93 static void HandleHostFault(int sig, void* info, void* raw_context);
94
95public:
96 // Members set on initialization.
97 std::size_t core_index{};
98 pid_t thread_id{-1};
99
100 // Core context.
101 GuestContext guest_ctx;
102
103 // Thread and invalidation info.
104 std::mutex lock;
105 Kernel::KThread* running_thread{};
106};
107
108} // namespace Core
diff --git a/src/core/arm/nce/arm_nce.s b/src/core/arm/nce/arm_nce.s
new file mode 100644
index 000000000..b98e09f31
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.s
@@ -0,0 +1,222 @@
1/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
2/* SPDX-License-Identifier: GPL-2.0-or-later */
3
4#include "core/arm/nce/arm_nce_asm_definitions.h"
5
6#define LOAD_IMMEDIATE_32(reg, val) \
7 mov reg, #(((val) >> 0x00) & 0xFFFF); \
8 movk reg, #(((val) >> 0x10) & 0xFFFF), lsl #16
9
10
11/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
12.section .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
13.global _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
14.type _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
15_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
16 /* Back up host sp to x3. */
17 /* Back up host tpidr_el0 to x4. */
18 mov x3, sp
19 mrs x4, tpidr_el0
20
21 /* Load guest sp. x5 is used as a scratch register. */
22 ldr x5, [x1, #(GuestContextSp)]
23 mov sp, x5
24
25 /* Offset GuestContext pointer to the host member. */
26 add x5, x1, #(GuestContextHostContext)
27
28 /* Save original host sp and tpidr_el0 (x3, x4) to host context. */
29 stp x3, x4, [x5, #(HostContextSpTpidrEl0)]
30
31 /* Save all callee-saved host GPRs. */
32 stp x19, x20, [x5, #(HostContextRegs+0x0)]
33 stp x21, x22, [x5, #(HostContextRegs+0x10)]
34 stp x23, x24, [x5, #(HostContextRegs+0x20)]
35 stp x25, x26, [x5, #(HostContextRegs+0x30)]
36 stp x27, x28, [x5, #(HostContextRegs+0x40)]
37 stp x29, x30, [x5, #(HostContextRegs+0x50)]
38
39 /* Save all callee-saved host FPRs. */
40 stp q8, q9, [x5, #(HostContextVregs+0x0)]
41 stp q10, q11, [x5, #(HostContextVregs+0x20)]
42 stp q12, q13, [x5, #(HostContextVregs+0x40)]
43 stp q14, q15, [x5, #(HostContextVregs+0x60)]
44
45 /* Load guest tpidr_el0 from argument. */
46 msr tpidr_el0, x0
47
48 /* Tail call the trampoline to restore guest state. */
49 br x2
50
51
52/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
53.section .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
54.global _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv
55.type _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
56_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
57 /* This jumps to the signal handler, which will restore the entire context. */
58 /* On entry, x0 = thread id, which is already in the right place. */
59
60 /* Move tpidr to x9 so it is not trampled. */
61 mov x9, x1
62
63 /* Set up arguments. */
64 mov x8, #(__NR_tkill)
65 mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
66
67 /* Tail call the signal handler. */
68 svc #0
69
70 /* Block execution from flowing here. */
71 brk #1000
72
73
74/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
75.section .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
76.global _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
77.type _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
78_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
79 stp x29, x30, [sp, #-0x10]!
80 mov x29, sp
81
82 /* Call the context restorer with the raw context. */
83 mov x0, x2
84 bl _ZN4Core7ARM_NCE19RestoreGuestContextEPv
85
86 /* Save the old value of tpidr_el0. */
87 mrs x8, tpidr_el0
88 ldr x9, [x0, #(TpidrEl0NativeContext)]
89 str x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)]
90
91 /* Set our new tpidr_el0. */
92 msr tpidr_el0, x0
93
94 /* Unlock the context. */
95 bl _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
96
97 /* Returning from here will enter the guest. */
98 ldp x29, x30, [sp], #0x10
99 ret
100
101
102/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
103.section .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
104.global _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_
105.type _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function
106_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
107 /* Check to see if we have the correct TLS magic. */
108 mrs x8, tpidr_el0
109 ldr w9, [x8, #(TpidrEl0TlsMagic)]
110
111 LOAD_IMMEDIATE_32(w10, TlsMagic)
112
113 cmp w9, w10
114 b.ne 1f
115
116 /* Correct TLS magic, so this is a guest interrupt. */
117 /* Restore host tpidr_el0. */
118 ldr x0, [x8, #(TpidrEl0NativeContext)]
119 ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
120 msr tpidr_el0, x3
121
122 /* Tail call the restorer. */
123 mov x1, x2
124 b _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv
125
126 /* Returning from here will enter host code. */
127
1281:
129 /* Incorrect TLS magic, so this is a spurious signal. */
130 ret
131
132
133/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
134.section .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
135.global _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_
136.type _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function
137_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
138 /* Check to see if we have the correct TLS magic. */
139 mrs x8, tpidr_el0
140 ldr w9, [x8, #(TpidrEl0TlsMagic)]
141
142 LOAD_IMMEDIATE_32(w10, TlsMagic)
143
144 cmp w9, w10
145 b.eq 1f
146
147 /* Incorrect TLS magic, so this is a host fault. */
148 /* Tail call the handler. */
149 b _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_
150
1511:
152 /* Correct TLS magic, so this is a guest fault. */
153 stp x29, x30, [sp, #-0x20]!
154 str x19, [sp, #0x10]
155 mov x29, sp
156
157 /* Save the old tpidr_el0. */
158 mov x19, x8
159
160 /* Restore host tpidr_el0. */
161 ldr x0, [x8, #(TpidrEl0NativeContext)]
162 ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
163 msr tpidr_el0, x3
164
165 /* Call the handler. */
166 bl _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_
167
168 /* If the handler returned false, we want to preserve the host tpidr_el0. */
169 cbz x0, 2f
170
171 /* Otherwise, restore guest tpidr_el0. */
172 msr tpidr_el0, x19
173
1742:
175 ldr x19, [sp, #0x10]
176 ldp x29, x30, [sp], #0x20
177 ret
178
179
180/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */
181.section .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits
182.global _ZN4Core7ARM_NCE20LockThreadParametersEPv
183.type _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function
184_ZN4Core7ARM_NCE20LockThreadParametersEPv:
185 /* Offset to lock member. */
186 add x0, x0, #(TpidrEl0Lock)
187
1881:
189 /* Clear the monitor. */
190 clrex
191
1922:
193 /* Load-linked with acquire ordering. */
194 ldaxr w1, [x0]
195
196 /* If the value was SpinLockLocked, clear monitor and retry. */
197 cbz w1, 1b
198
199 /* Store-conditional SpinLockLocked with relaxed ordering. */
200 stxr w1, wzr, [x0]
201
202 /* If we failed to store, retry. */
203 cbnz w1, 2b
204
205 ret
206
207
208/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */
209.section .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits
210.global _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
211.type _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function
212_ZN4Core7ARM_NCE22UnlockThreadParametersEPv:
213 /* Offset to lock member. */
214 add x0, x0, #(TpidrEl0Lock)
215
216 /* Load SpinLockUnlocked. */
217 mov w1, #(SpinLockUnlocked)
218
219 /* Store value with release ordering. */
220 stlr w1, [x0]
221
222 ret
diff --git a/src/core/arm/nce/arm_nce_asm_definitions.h b/src/core/arm/nce/arm_nce_asm_definitions.h
new file mode 100644
index 000000000..8a9b285b5
--- /dev/null
+++ b/src/core/arm/nce/arm_nce_asm_definitions.h
@@ -0,0 +1,29 @@
1/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
2/* SPDX-License-Identifier: GPL-2.0-or-later */
3
4#pragma once
5
6#define __ASSEMBLY__
7
8#include <asm-generic/signal.h>
9#include <asm-generic/unistd.h>
10
11#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
12#define BreakFromRunCodeSignal SIGURG
13#define GuestFaultSignal SIGSEGV
14
15#define GuestContextSp 0xF8
16#define GuestContextHostContext 0x320
17
18#define HostContextSpTpidrEl0 0xE0
19#define HostContextTpidrEl0 0xE8
20#define HostContextRegs 0x0
21#define HostContextVregs 0x60
22
23#define TpidrEl0NativeContext 0x10
24#define TpidrEl0Lock 0x18
25#define TpidrEl0TlsMagic 0x20
26#define TlsMagic 0x555a5559
27
28#define SpinLockLocked 0
29#define SpinLockUnlocked 1
diff --git a/src/core/arm/nce/guest_context.h b/src/core/arm/nce/guest_context.h
new file mode 100644
index 000000000..0767a0337
--- /dev/null
+++ b/src/core/arm/nce/guest_context.h
@@ -0,0 +1,50 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8#include "core/arm/arm_interface.h"
9#include "core/arm/nce/arm_nce_asm_definitions.h"
10
11namespace Core {
12
13class ARM_NCE;
14class System;
15
16struct HostContext {
17 alignas(16) std::array<u64, 12> host_saved_regs{};
18 alignas(16) std::array<u128, 8> host_saved_vregs{};
19 u64 host_sp{};
20 void* host_tpidr_el0{};
21};
22
23struct GuestContext {
24 std::array<u64, 31> cpu_registers{};
25 u64 sp{};
26 u64 pc{};
27 u32 fpcr{};
28 u32 fpsr{};
29 std::array<u128, 32> vector_registers{};
30 u32 pstate{};
31 alignas(16) HostContext host_ctx{};
32 u64 tpidrro_el0{};
33 u64 tpidr_el0{};
34 std::atomic<u64> esr_el1{};
35 u32 nzcv{};
36 u32 svc_swi{};
37 System* system{};
38 ARM_NCE* parent{};
39};
40
41// Verify assembly offsets.
42static_assert(offsetof(GuestContext, sp) == GuestContextSp);
43static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext);
44static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0);
45static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0);
46static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0);
47static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs);
48static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs);
49
50} // namespace Core
diff --git a/src/core/arm/nce/instructions.h b/src/core/arm/nce/instructions.h
new file mode 100644
index 000000000..5b56ff857
--- /dev/null
+++ b/src/core/arm/nce/instructions.h
@@ -0,0 +1,147 @@
1// SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors
2// SPDX-License-Identifier: MPL-2.0
3
4#include "common/bit_field.h"
5#include "common/common_types.h"
6
7namespace Core::NCE {
8
9enum SystemRegister : u32 {
10 TpidrEl0 = 0x5E82,
11 TpidrroEl0 = 0x5E83,
12 CntfrqEl0 = 0x5F00,
13 CntpctEl0 = 0x5F01,
14};
15
16// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call-
17union SVC {
18 constexpr explicit SVC(u32 raw_) : raw{raw_} {}
19
20 constexpr bool Verify() {
21 return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0);
22 }
23
24 constexpr u32 GetSig0() {
25 return decltype(sig0)::ExtractValue(raw);
26 }
27
28 constexpr u32 GetValue() {
29 return decltype(value)::ExtractValue(raw);
30 }
31
32 constexpr u32 GetSig1() {
33 return decltype(sig1)::ExtractValue(raw);
34 }
35
36 u32 raw;
37
38private:
39 BitField<0, 5, u32> sig0; // 0x1
40 BitField<5, 16, u32> value; // 16-bit immediate
41 BitField<21, 11, u32> sig1; // 0x6A0
42};
43static_assert(sizeof(SVC) == sizeof(u32));
44static_assert(SVC(0xD40000C1).Verify());
45static_assert(SVC(0xD40000C1).GetValue() == 0x6);
46
47// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register-
48union MRS {
49 constexpr explicit MRS(u32 raw_) : raw{raw_} {}
50
51 constexpr bool Verify() {
52 return (this->GetSig() == 0xD53);
53 }
54
55 constexpr u32 GetRt() {
56 return decltype(rt)::ExtractValue(raw);
57 }
58
59 constexpr u32 GetSystemReg() {
60 return decltype(system_reg)::ExtractValue(raw);
61 }
62
63 constexpr u32 GetSig() {
64 return decltype(sig)::ExtractValue(raw);
65 }
66
67 u32 raw;
68
69private:
70 BitField<0, 5, u32> rt; // destination register
71 BitField<5, 15, u32> system_reg; // source system register
72 BitField<20, 12, u32> sig; // 0xD53
73};
74static_assert(sizeof(MRS) == sizeof(u32));
75static_assert(MRS(0xD53BE020).Verify());
76static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0);
77static_assert(MRS(0xD53BE020).GetRt() == 0x0);
78
79// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-
80union MSR {
81 constexpr explicit MSR(u32 raw_) : raw{raw_} {}
82
83 constexpr bool Verify() {
84 return this->GetSig() == 0xD51;
85 }
86
87 constexpr u32 GetRt() {
88 return decltype(rt)::ExtractValue(raw);
89 }
90
91 constexpr u32 GetSystemReg() {
92 return decltype(system_reg)::ExtractValue(raw);
93 }
94
95 constexpr u32 GetSig() {
96 return decltype(sig)::ExtractValue(raw);
97 }
98
99 u32 raw;
100
101private:
102 BitField<0, 5, u32> rt; // source register
103 BitField<5, 15, u32> system_reg; // destination system register
104 BitField<20, 12, u32> sig; // 0xD51
105};
106static_assert(sizeof(MSR) == sizeof(u32));
107static_assert(MSR(0xD51BD040).Verify());
108static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0);
109static_assert(MSR(0xD51BD040).GetRt() == 0x0);
110
111// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register-
112// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-
113// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register-
114// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers-
115union Exclusive {
116 constexpr explicit Exclusive(u32 raw_) : raw{raw_} {}
117
118 constexpr bool Verify() {
119 return this->GetSig() == 0x10;
120 }
121
122 constexpr u32 GetSig() {
123 return decltype(sig)::ExtractValue(raw);
124 }
125
126 constexpr u32 AsOrdered() {
127 return raw | decltype(o0)::FormatValue(1);
128 }
129
130 u32 raw;
131
132private:
133 BitField<0, 5, u32> rt; // memory operand
134 BitField<5, 5, u32> rn; // register operand 1
135 BitField<10, 5, u32> rt2; // register operand 2
136 BitField<15, 1, u32> o0; // ordered
137 BitField<16, 5, u32> rs; // status register
138 BitField<21, 2, u32> l; // operation type
139 BitField<23, 7, u32> sig; // 0x10
140 BitField<30, 2, u32> size; // size
141};
142static_assert(Exclusive(0xC85FFC00).Verify());
143static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00);
144static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00);
145static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440);
146
147} // namespace Core::NCE
diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patch.cpp
new file mode 100644
index 000000000..c79399c2b
--- /dev/null
+++ b/src/core/arm/nce/patch.cpp
@@ -0,0 +1,472 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/arm64/native_clock.h"
5#include "common/bit_cast.h"
6#include "common/literals.h"
7#include "core/arm/nce/arm_nce.h"
8#include "core/arm/nce/guest_context.h"
9#include "core/arm/nce/instructions.h"
10#include "core/arm/nce/patch.h"
11#include "core/core.h"
12#include "core/core_timing.h"
13#include "core/hle/kernel/svc.h"
14
15namespace Core::NCE {
16
17using namespace Common::Literals;
18using namespace oaknut::util;
19
20using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
21
22constexpr size_t MaxRelativeBranch = 128_MiB;
23
24Patcher::Patcher() : c(m_patch_instructions) {}
25
26Patcher::~Patcher() = default;
27
28void Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
29 const Kernel::CodeSet::Segment& code) {
30
31 // Write save context helper function.
32 c.l(m_save_context);
33 WriteSaveContext();
34
35 // Write load context helper function.
36 c.l(m_load_context);
37 WriteLoadContext();
38
39 // Retrieve text segment data.
40 const auto text = std::span{program_image}.subspan(code.offset, code.size);
41 const auto text_words =
42 std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)};
43
44 // Loop through instructions, patching as needed.
45 for (u32 i = 0; i < static_cast<u32>(text_words.size()); i++) {
46 const u32 inst = text_words[i];
47
48 const auto AddRelocations = [&] {
49 const uintptr_t this_offset = i * sizeof(u32);
50 const uintptr_t next_offset = this_offset + sizeof(u32);
51
52 // Relocate from here to patch.
53 this->BranchToPatch(this_offset);
54
55 // Relocate from patch to next instruction.
56 return next_offset;
57 };
58
59 // SVC
60 if (auto svc = SVC{inst}; svc.Verify()) {
61 WriteSvcTrampoline(AddRelocations(), svc.GetValue());
62 continue;
63 }
64
65 // MRS Xn, TPIDR_EL0
66 // MRS Xn, TPIDRRO_EL0
67 if (auto mrs = MRS{inst};
68 mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) {
69 const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
70 : oaknut::SystemReg::TPIDR_EL0;
71 const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
72 WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
73 continue;
74 }
75
76 // MRS Xn, CNTPCT_EL0
77 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
78 WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
79 continue;
80 }
81
82 // MRS Xn, CNTFRQ_EL0
83 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) {
84 UNREACHABLE();
85 }
86
87 // MSR TPIDR_EL0, Xn
88 if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
89 WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
90 continue;
91 }
92 }
93
94 // Determine patching mode for the final relocation step
95 const size_t image_size = program_image.size();
96 this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
97}
98
99void Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
100 const Kernel::CodeSet::Segment& code,
101 Kernel::PhysicalMemory& program_image,
102 EntryTrampolines* out_trampolines) {
103 const size_t patch_size = SectionSize();
104 const size_t image_size = program_image.size();
105
106 // Retrieve text segment data.
107 const auto text = std::span{program_image}.subspan(code.offset, code.size);
108 const auto text_words =
109 std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)};
110
111 const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) {
112 oaknut::CodeGenerator rc{target};
113 if (mode == PatchMode::PreText) {
114 rc.B(rel.patch_offset - patch_size - rel.module_offset);
115 } else {
116 rc.B(image_size - rel.module_offset + rel.patch_offset);
117 }
118 };
119
120 const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
121 oaknut::CodeGenerator rc{target};
122 if (mode == PatchMode::PreText) {
123 rc.B(patch_size - rel.patch_offset + rel.module_offset);
124 } else {
125 rc.B(rel.module_offset - image_size - rel.patch_offset);
126 }
127 };
128
129 const auto RebasePatch = [&](ptrdiff_t patch_offset) {
130 if (mode == PatchMode::PreText) {
131 return GetInteger(load_base) + patch_offset;
132 } else {
133 return GetInteger(load_base) + image_size + patch_offset;
134 }
135 };
136
137 const auto RebasePc = [&](uintptr_t module_offset) {
138 if (mode == PatchMode::PreText) {
139 return GetInteger(load_base) + patch_size + module_offset;
140 } else {
141 return GetInteger(load_base) + module_offset;
142 }
143 };
144
145 // We are now ready to relocate!
146 for (const Relocation& rel : m_branch_to_patch_relocations) {
147 ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
148 }
149 for (const Relocation& rel : m_branch_to_module_relocations) {
150 ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
151 rel);
152 }
153
154 // Rewrite PC constants and record post trampolines
155 for (const Relocation& rel : m_write_module_pc_relocations) {
156 oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
157 rc.dx(RebasePc(rel.module_offset));
158 }
159 for (const Trampoline& rel : m_trampolines) {
160 out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
161 }
162
163 // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
164 // Convert to ordered to preserve this assumption
165 for (u32 i = 0; i < static_cast<u32>(text_words.size()); i++) {
166 const u32 inst = text_words[i];
167 if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
168 text_words[i] = exclusive.AsOrdered();
169 }
170 }
171
172 // Copy to program image
173 if (this->mode == PatchMode::PreText) {
174 std::memcpy(program_image.data(), m_patch_instructions.data(),
175 m_patch_instructions.size() * sizeof(u32));
176 } else {
177 program_image.resize(image_size + patch_size);
178 std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
179 m_patch_instructions.size() * sizeof(u32));
180 }
181}
182
183size_t Patcher::SectionSize() const noexcept {
184 return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
185}
186
187void Patcher::WriteLoadContext() {
188 // This function was called, which modifies X30, so use that as a scratch register.
189 // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
190 // of stack.
191 c.STR(X30, SP, 8);
192 c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
193 c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
194
195 // Load system registers.
196 c.LDR(W0, X30, offsetof(GuestContext, fpsr));
197 c.MSR(oaknut::SystemReg::FPSR, X0);
198 c.LDR(W0, X30, offsetof(GuestContext, fpcr));
199 c.MSR(oaknut::SystemReg::FPCR, X0);
200 c.LDR(W0, X30, offsetof(GuestContext, nzcv));
201 c.MSR(oaknut::SystemReg::NZCV, X0);
202
203 // Load all vector registers.
204 static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
205 for (int i = 0; i <= 30; i += 2) {
206 c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
207 }
208
209 // Load all general-purpose registers except X30.
210 for (int i = 0; i <= 28; i += 2) {
211 c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
212 }
213
214 // Reload our return X30 from the stack and return.
215 // The patch code will reload the guest X30 for us.
216 c.LDR(X30, SP, 8);
217 c.RET();
218}
219
220void Patcher::WriteSaveContext() {
221 // This function was called, which modifies X30, so use that as a scratch register.
222 // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
223 // stack.
224 c.STR(X30, SP, 8);
225 c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
226 c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
227
228 // Store all general-purpose registers except X30.
229 for (int i = 0; i <= 28; i += 2) {
230 c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
231 }
232
233 // Store all vector registers.
234 static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
235 for (int i = 0; i <= 30; i += 2) {
236 c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
237 }
238
239 // Store guest system registers, X30 and SP, using X0 as a scratch register.
240 c.STR(X0, SP, PRE_INDEXED, -16);
241 c.LDR(X0, SP, 16);
242 c.STR(X0, X30, 8 * 30);
243 c.ADD(X0, SP, 32);
244 c.STR(X0, X30, offsetof(GuestContext, sp));
245 c.MRS(X0, oaknut::SystemReg::FPSR);
246 c.STR(W0, X30, offsetof(GuestContext, fpsr));
247 c.MRS(X0, oaknut::SystemReg::FPCR);
248 c.STR(W0, X30, offsetof(GuestContext, fpcr));
249 c.MRS(X0, oaknut::SystemReg::NZCV);
250 c.STR(W0, X30, offsetof(GuestContext, nzcv));
251 c.LDR(X0, SP, POST_INDEXED, 16);
252
253 // Reload our return X30 from the stack, and return.
254 c.LDR(X30, SP, 8);
255 c.RET();
256}
257
258void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
259 LOG_ERROR(Core_ARM, "Patching SVC {:#x} at {:#x}", svc_id, module_dest - 4);
260 // We are about to start saving state, so we need to lock the context.
261 this->LockContext();
262
263 // Store guest X30 to the stack. Then, save the context and restore the stack.
264 // This will save all registers except PC, but we know PC at patch time.
265 c.STR(X30, SP, PRE_INDEXED, -16);
266 c.BL(m_save_context);
267 c.LDR(X30, SP, POST_INDEXED, 16);
268
269 // Now that we've saved all registers, we can use any registers as scratch.
270 // Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
271 oaknut::Label pc_after_svc;
272 c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
273 c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
274 c.LDR(X2, pc_after_svc);
275 c.STR(X2, X1, offsetof(GuestContext, pc));
276
277 // Store SVC number to execute when we return
278 c.MOV(X2, svc_id);
279 c.STR(W2, X1, offsetof(GuestContext, svc_swi));
280
281 // We are calling a SVC. Clear esr_el1 and return it.
282 static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
283 oaknut::Label retry;
284 c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
285 c.l(retry);
286 c.LDAXR(X0, X2);
287 c.STLXR(W3, XZR, X2);
288 c.CBNZ(W3, retry);
289
290 // Add "calling SVC" flag. Since this is X0, this is now our return value.
291 c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
292
293 // Offset the GuestContext pointer to the HostContext member.
294 // STP has limited range of [-512, 504] which we can't reach otherwise
295 // NB: Due to this all offsets below are from the start of HostContext.
296 c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
297
298 // Reload host TPIDR_EL0 and SP.
299 static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
300 c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
301 c.MOV(SP, X2);
302 c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
303
304 // Load callee-saved host registers and return to host.
305 static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
306 static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
307 c.LDP(X19, X20, X1, HOST_REGS_OFF);
308 c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
309 c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
310 c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
311 c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
312 c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
313 c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
314 c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
315 c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
316 c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
317 c.RET();
318
319 // Write the post-SVC trampoline address, which will jump back to the guest after restoring its
320 // state.
321 m_trampolines.push_back({c.offset(), module_dest});
322
323 // Host called this location. Save the return address so we can
324 // unwind the stack properly when jumping back.
325 c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
326 c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
327 c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
328 c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
329
330 // Reload all guest registers except X30 and PC.
331 // The function also expects 16 bytes of stack already allocated.
332 c.STR(X30, SP, PRE_INDEXED, -16);
333 c.BL(m_load_context);
334 c.LDR(X30, SP, POST_INDEXED, 16);
335
336 // Use X1 as a scratch register to restore X30.
337 c.STR(X1, SP, PRE_INDEXED, -16);
338 c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
339 c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
340 c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
341 c.LDR(X1, SP, POST_INDEXED, 16);
342
343 // Unlock the context.
344 this->UnlockContext();
345
346 // Jump back to the instruction after the emulated SVC.
347 this->BranchToModule(module_dest);
348
349 // Store PC after call.
350 c.l(pc_after_svc);
351 this->WriteModulePc(module_dest);
352}
353
354void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
355 oaknut::SystemReg src_reg) {
356 // Retrieve emulated TLS register from GuestContext.
357 c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
358 if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
359 c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
360 } else {
361 c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
362 }
363
364 // Jump back to the instruction after the emulated MRS.
365 this->BranchToModule(module_dest);
366}
367
368void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
369 const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
370 c.STR(scratch_reg, SP, PRE_INDEXED, -16);
371
372 // Save guest value to NativeExecutionParameters::tpidr_el0.
373 c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
374 c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
375
376 // Restore scratch register.
377 c.LDR(scratch_reg, SP, POST_INDEXED, 16);
378
379 // Jump back to the instruction after the emulated MSR.
380 this->BranchToModule(module_dest);
381}
382
383void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
384 static Common::Arm64::NativeClock clock{};
385 const auto factor = clock.GetGuestCNTFRQFactor();
386 const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor);
387
388 const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1;
389 oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0;
390 oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1;
391
392 oaknut::Label factorlo;
393 oaknut::Label factorhi;
394
395 // Save scratches.
396 c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
397
398 // Load counter value.
399 c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
400
401 // Load scaling factor.
402 c.LDR(scratch0, factorlo);
403 c.LDR(scratch1, factorhi);
404
405 // Multiply low bits and get result.
406 c.UMULH(scratch0, dest_reg, scratch0);
407
408 // Multiply high bits and add low bit result.
409 c.MADD(dest_reg, dest_reg, scratch1, scratch0);
410
411 // Reload scratches.
412 c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
413
414 // Jump back to the instruction after the emulated MRS.
415 this->BranchToModule(module_dest);
416
417 // Scaling factor constant values.
418 c.l(factorlo);
419 c.dx(raw_factor[0]);
420 c.l(factorhi);
421 c.dx(raw_factor[1]);
422}
423
424void Patcher::LockContext() {
425 oaknut::Label retry;
426
427 // Save scratches.
428 c.STP(X0, X1, SP, PRE_INDEXED, -16);
429
430 // Reload lock pointer.
431 c.l(retry);
432 c.CLREX();
433 c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
434 c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
435
436 static_assert(SpinLockLocked == 0);
437
438 // Load-linked with acquire ordering.
439 c.LDAXR(W1, X0);
440
441 // If the value was SpinLockLocked, clear monitor and retry.
442 c.CBZ(W1, retry);
443
444 // Store-conditional SpinLockLocked with relaxed ordering.
445 c.STXR(W1, WZR, X0);
446
447 // If we failed to store, retry.
448 c.CBNZ(W1, retry);
449
450 // We succeeded! Reload scratches.
451 c.LDP(X0, X1, SP, POST_INDEXED, 16);
452}
453
454void Patcher::UnlockContext() {
455 // Save scratches.
456 c.STP(X0, X1, SP, PRE_INDEXED, -16);
457
458 // Load lock pointer.
459 c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
460 c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
461
462 // Load SpinLockUnlocked.
463 c.MOV(W1, SpinLockUnlocked);
464
465 // Store value with release ordering.
466 c.STLR(W1, X0);
467
468 // Load scratches.
469 c.LDP(X0, X1, SP, POST_INDEXED, 16);
470}
471
472} // namespace Core::NCE
diff --git a/src/core/arm/nce/patch.h b/src/core/arm/nce/patch.h
new file mode 100644
index 000000000..b727d4e48
--- /dev/null
+++ b/src/core/arm/nce/patch.h
@@ -0,0 +1,107 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <span>
7#include <unordered_map>
8#include <vector>
9
10#pragma clang diagnostic push
11#pragma clang diagnostic ignored "-Wshorten-64-to-32"
12#include <oaknut/code_block.hpp>
13#include <oaknut/oaknut.hpp>
14#pragma clang diagnostic pop
15
16#include "common/common_types.h"
17#include "core/hle/kernel/code_set.h"
18#include "core/hle/kernel/k_typed_address.h"
19#include "core/hle/kernel/physical_memory.h"
20
21#include <signal.h>
22
23namespace Core {
24struct GuestContext;
25}
26
27namespace Core::NCE {
28
29enum class PatchMode : u32 {
30 None,
31 PreText, ///< Patch section is inserted before .text
32 PostData, ///< Patch section is inserted after .data
33};
34
35using ModuleTextAddress = u64;
36using PatchTextAddress = u64;
37using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>;
38
39class Patcher {
40public:
41 explicit Patcher();
42 ~Patcher();
43
44 void PatchText(const Kernel::PhysicalMemory& program_image,
45 const Kernel::CodeSet::Segment& code);
46 void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
47 Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
48 size_t SectionSize() const noexcept;
49
50 [[nodiscard]] PatchMode Mode() const noexcept {
51 return mode;
52 }
53
54private:
55 using ModuleDestLabel = uintptr_t;
56
57 struct Trampoline {
58 ptrdiff_t patch_offset;
59 uintptr_t module_offset;
60 };
61
62 void WriteLoadContext();
63 void WriteSaveContext();
64 void LockContext();
65 void UnlockContext();
66 void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
67 void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
68 oaknut::SystemReg src_reg);
69 void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
70 void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
71
72private:
73 void BranchToPatch(uintptr_t module_dest) {
74 m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
75 }
76
77 void BranchToModule(uintptr_t module_dest) {
78 m_branch_to_module_relocations.push_back({c.offset(), module_dest});
79 c.dw(0);
80 }
81
82 void WriteModulePc(uintptr_t module_dest) {
83 m_write_module_pc_relocations.push_back({c.offset(), module_dest});
84 c.dx(0);
85 }
86
87private:
88 // List of patch instructions we have generated.
89 std::vector<u32> m_patch_instructions{};
90
91 // Relocation type for relative branch from module to patch.
92 struct Relocation {
93 ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section.
94 uintptr_t module_offset; ///< Offset in bytes from the start of the text section.
95 };
96
97 oaknut::VectorCodeGenerator c;
98 std::vector<Trampoline> m_trampolines;
99 std::vector<Relocation> m_branch_to_patch_relocations{};
100 std::vector<Relocation> m_branch_to_module_relocations{};
101 std::vector<Relocation> m_write_module_pc_relocations{};
102 oaknut::Label m_save_context{};
103 oaknut::Label m_load_context{};
104 PatchMode mode{PatchMode::None};
105};
106
107} // namespace Core::NCE
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 1d557fb43..408479019 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -136,9 +136,7 @@ struct System::Impl {
136 } 136 }
137 137
138 void Initialize(System& system) { 138 void Initialize(System& system) {
139 const bool direct_mapped_address = Settings::IsNceEnabled(); 139 device_memory = std::make_unique<Core::DeviceMemory>();
140 device_memory = std::make_unique<Core::DeviceMemory>(direct_mapped_address);
141
142 is_multicore = Settings::values.use_multi_core.GetValue(); 140 is_multicore = Settings::values.use_multi_core.GetValue();
143 extended_memory_layout = 141 extended_memory_layout =
144 Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; 142 Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb;
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 980bb97f9..151eb3870 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -211,6 +211,8 @@ void CpuManager::RunThread(std::stop_token token, std::size_t core) {
211 system.GPU().ObtainContext(); 211 system.GPU().ObtainContext();
212 } 212 }
213 213
214 system.ArmInterface(core).Initialize();
215
214 auto& kernel = system.Kernel(); 216 auto& kernel = system.Kernel();
215 auto& scheduler = *kernel.CurrentScheduler(); 217 auto& scheduler = *kernel.CurrentScheduler();
216 auto* thread = scheduler.GetSchedulerCurrentThread(); 218 auto* thread = scheduler.GetSchedulerCurrentThread();
diff --git a/src/core/device_memory.cpp b/src/core/device_memory.cpp
index 0528a8e3b..3a9151646 100644
--- a/src/core/device_memory.cpp
+++ b/src/core/device_memory.cpp
@@ -12,13 +12,9 @@ constexpr size_t VirtualReserveSize = 1ULL << 38;
12constexpr size_t VirtualReserveSize = 1ULL << 39; 12constexpr size_t VirtualReserveSize = 1ULL << 39;
13#endif 13#endif
14 14
15DeviceMemory::DeviceMemory(bool direct_mapped_address) 15DeviceMemory::DeviceMemory()
16 : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(), 16 : buffer{Kernel::Board::Nintendo::Nx::KSystemControl::Init::GetIntendedMemorySize(),
17 VirtualReserveSize} { 17 VirtualReserveSize} {}
18 if (direct_mapped_address) {
19 buffer.EnableDirectMappedAddress();
20 }
21}
22 18
23DeviceMemory::~DeviceMemory() = default; 19DeviceMemory::~DeviceMemory() = default;
24 20
diff --git a/src/core/device_memory.h b/src/core/device_memory.h
index 368f19e86..13388b73e 100644
--- a/src/core/device_memory.h
+++ b/src/core/device_memory.h
@@ -18,7 +18,7 @@ enum : u64 {
18 18
19class DeviceMemory { 19class DeviceMemory {
20public: 20public:
21 explicit DeviceMemory(bool direct_mapped_address); 21 explicit DeviceMemory();
22 ~DeviceMemory(); 22 ~DeviceMemory();
23 23
24 DeviceMemory& operator=(const DeviceMemory&) = delete; 24 DeviceMemory& operator=(const DeviceMemory&) = delete;
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index af1af2b78..d53da82f4 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -75,11 +75,20 @@ struct CodeSet final {
75 return segments[2]; 75 return segments[2];
76 } 76 }
77 77
78 Segment& PatchSegment() {
79 return patch_segment;
80 }
81
82 const Segment& PatchSegment() const {
83 return patch_segment;
84 }
85
78 /// The overall data that backs this code set. 86 /// The overall data that backs this code set.
79 Kernel::PhysicalMemory memory; 87 Kernel::PhysicalMemory memory;
80 88
81 /// The segments that comprise this code set. 89 /// The segments that comprise this code set.
82 std::array<Segment, 3> segments; 90 std::array<Segment, 3> segments;
91 Segment patch_segment;
83 92
84 /// The entry point address for this code set. 93 /// The entry point address for this code set.
85 KProcessAddress entrypoint = 0; 94 KProcessAddress entrypoint = 0;
diff --git a/src/core/hle/kernel/k_address_space_info.cpp b/src/core/hle/kernel/k_address_space_info.cpp
index 32173e52b..3235a7a37 100644
--- a/src/core/hle/kernel/k_address_space_info.cpp
+++ b/src/core/hle/kernel/k_address_space_info.cpp
@@ -25,8 +25,8 @@ constexpr std::array<KAddressSpaceInfo, 13> AddressSpaceInfos{{
25 { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, }, 25 { .bit_width = 36, .address = 2_GiB , .size = 64_GiB - 2_GiB , .type = KAddressSpaceInfo::Type::MapLarge, },
26 { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, }, 26 { .bit_width = 36, .address = Size_Invalid, .size = 8_GiB , .type = KAddressSpaceInfo::Type::Heap, },
27 { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, }, 27 { .bit_width = 36, .address = Size_Invalid, .size = 6_GiB , .type = KAddressSpaceInfo::Type::Alias, },
28#ifdef ANDROID 28#ifdef ARCHITECTURE_arm64
29 // With Android, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region. 29 // With NCE, we use a 38-bit address space due to memory limitations. This should (safely) truncate ASLR region.
30 { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, 30 { .bit_width = 39, .address = 128_MiB , .size = 256_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
31#else 31#else
32 { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, }, 32 { .bit_width = 39, .address = 128_MiB , .size = 512_GiB - 128_MiB, .type = KAddressSpaceInfo::Type::Map39Bit, },
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index c6a200320..d2e9f2a2e 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -1214,6 +1214,17 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
1214 ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute); 1214 ReprotectSegment(code_set.CodeSegment(), Svc::MemoryPermission::ReadExecute);
1215 ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read); 1215 ReprotectSegment(code_set.RODataSegment(), Svc::MemoryPermission::Read);
1216 ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); 1216 ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite);
1217
1218#ifdef ARCHITECTURE_arm64
1219 if (Settings::IsNceEnabled()) {
1220 auto& buffer = m_kernel.System().DeviceMemory().buffer;
1221 const auto& code = code_set.CodeSegment();
1222 const auto& patch = code_set.PatchSegment();
1223 buffer.Protect(GetInteger(base_addr + code.addr), code.size, true, true, true);
1224 buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, true, true, true);
1225 ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None);
1226 }
1227#endif
1217} 1228}
1218 1229
1219bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) { 1230bool KProcess::InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type) {
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index 54b8e0a59..7b97d452b 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -112,6 +112,7 @@ private:
112 std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{}; 112 std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{};
113 std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{}; 113 std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{};
114 std::map<KProcessAddress, u64> m_debug_page_refcounts{}; 114 std::map<KProcessAddress, u64> m_debug_page_refcounts{};
115 std::unordered_map<u64, u64> m_post_handlers{};
115 std::atomic<s64> m_cpu_time{}; 116 std::atomic<s64> m_cpu_time{};
116 std::atomic<s64> m_num_process_switches{}; 117 std::atomic<s64> m_num_process_switches{};
117 std::atomic<s64> m_num_thread_switches{}; 118 std::atomic<s64> m_num_thread_switches{};
@@ -467,6 +468,14 @@ public:
467 468
468 static void Switch(KProcess* cur_process, KProcess* next_process); 469 static void Switch(KProcess* cur_process, KProcess* next_process);
469 470
471 std::unordered_map<u64, u64>& GetPostHandlers() noexcept {
472 return m_post_handlers;
473 }
474
475 KernelCore& GetKernel() noexcept {
476 return m_kernel;
477 }
478
470public: 479public:
471 // Attempts to insert a watchpoint into a free slot. Returns false if none are available. 480 // Attempts to insert a watchpoint into a free slot. Returns false if none are available.
472 bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type); 481 bool InsertWatchpoint(KProcessAddress addr, u64 size, DebugWatchpointType type);
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index e1f80b04f..e9ca5dfca 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -655,6 +655,21 @@ public:
655 return m_stack_top; 655 return m_stack_top;
656 } 656 }
657 657
658public:
659 // TODO: This shouldn't be defined in kernel namespace
660 struct NativeExecutionParameters {
661 u64 tpidr_el0{};
662 u64 tpidrro_el0{};
663 void* native_context{};
664 std::atomic<u32> lock{1};
665 bool is_running{};
666 u32 magic{Common::MakeMagic('Y', 'U', 'Z', 'U')};
667 };
668
669 NativeExecutionParameters& GetNativeExecutionParameters() {
670 return m_native_execution_parameters;
671 }
672
658private: 673private:
659 KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key, 674 KThread* RemoveWaiterByKey(bool* out_has_waiters, KProcessAddress key,
660 bool is_kernel_address_key); 675 bool is_kernel_address_key);
@@ -914,6 +929,7 @@ private:
914 ThreadWaitReasonForDebugging m_wait_reason_for_debugging{}; 929 ThreadWaitReasonForDebugging m_wait_reason_for_debugging{};
915 uintptr_t m_argument{}; 930 uintptr_t m_argument{};
916 KProcessAddress m_stack_top{}; 931 KProcessAddress m_stack_top{};
932 NativeExecutionParameters m_native_execution_parameters{};
917 933
918public: 934public:
919 using ConditionVariableThreadTreeType = ConditionVariableThreadTree; 935 using ConditionVariableThreadTreeType = ConditionVariableThreadTree;
diff --git a/src/core/hle/kernel/physical_core.cpp b/src/core/hle/kernel/physical_core.cpp
index 5ee869fa2..15434212e 100644
--- a/src/core/hle/kernel/physical_core.cpp
+++ b/src/core/hle/kernel/physical_core.cpp
@@ -1,8 +1,12 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/settings.h"
4#include "core/arm/dynarmic/arm_dynarmic_32.h" 5#include "core/arm/dynarmic/arm_dynarmic_32.h"
5#include "core/arm/dynarmic/arm_dynarmic_64.h" 6#include "core/arm/dynarmic/arm_dynarmic_64.h"
7#ifdef ARCHITECTURE_arm64
8#include "core/arm/nce/arm_nce.h"
9#endif
6#include "core/core.h" 10#include "core/core.h"
7#include "core/hle/kernel/k_scheduler.h" 11#include "core/hle/kernel/k_scheduler.h"
8#include "core/hle/kernel/kernel.h" 12#include "core/hle/kernel/kernel.h"
@@ -14,7 +18,8 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
14 : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} { 18 : m_core_index{core_index}, m_system{system}, m_scheduler{scheduler} {
15#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) 19#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
16 // TODO(bunnei): Initialization relies on a core being available. We may later replace this with 20 // TODO(bunnei): Initialization relies on a core being available. We may later replace this with
17 // a 32-bit instance of Dynarmic. This should be abstracted out to a CPU manager. 21 // an NCE interface or a 32-bit instance of Dynarmic. This should be abstracted out to a CPU
22 // manager.
18 auto& kernel = system.Kernel(); 23 auto& kernel = system.Kernel();
19 m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>( 24 m_arm_interface = std::make_unique<Core::ARM_Dynarmic_64>(
20 system, kernel.IsMulticore(), 25 system, kernel.IsMulticore(),
@@ -28,6 +33,13 @@ PhysicalCore::PhysicalCore(std::size_t core_index, Core::System& system, KSchedu
28PhysicalCore::~PhysicalCore() = default; 33PhysicalCore::~PhysicalCore() = default;
29 34
30void PhysicalCore::Initialize(bool is_64_bit) { 35void PhysicalCore::Initialize(bool is_64_bit) {
36#if defined(ARCHITECTURE_arm64)
37 if (Settings::IsNceEnabled()) {
38 m_arm_interface = std::make_unique<Core::ARM_NCE>(m_system, m_system.Kernel().IsMulticore(),
39 m_core_index);
40 return;
41 }
42#endif
31#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64) 43#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
32 auto& kernel = m_system.Kernel(); 44 auto& kernel = m_system.Kernel();
33 if (!is_64_bit) { 45 if (!is_64_bit) {
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 48c0edaea..e7fc8f438 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -3,6 +3,7 @@
3 3
4#include <cstring> 4#include <cstring>
5#include "common/logging/log.h" 5#include "common/logging/log.h"
6#include "common/settings.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/file_sys/content_archive.h" 8#include "core/file_sys/content_archive.h"
8#include "core/file_sys/control_metadata.h" 9#include "core/file_sys/control_metadata.h"
@@ -14,6 +15,10 @@
14#include "core/loader/deconstructed_rom_directory.h" 15#include "core/loader/deconstructed_rom_directory.h"
15#include "core/loader/nso.h" 16#include "core/loader/nso.h"
16 17
18#ifdef ARCHITECTURE_arm64
19#include "core/arm/nce/patch.h"
20#endif
21
17namespace Loader { 22namespace Loader {
18 23
19AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, 24AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_,
@@ -124,21 +129,41 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
124 } 129 }
125 metadata.Print(); 130 metadata.Print();
126 131
127 const auto static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", 132 // Enable NCE only for 64-bit programs.
128 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7", 133 Settings::SetNceEnabled(metadata.Is64BitProgram());
129 "subsdk8", "subsdk9", "sdk"}; 134
135 const std::array static_modules = {"rtld", "main", "subsdk0", "subsdk1", "subsdk2",
136 "subsdk3", "subsdk4", "subsdk5", "subsdk6", "subsdk7",
137 "subsdk8", "subsdk9", "sdk"};
130 138
131 // Use the NSO module loader to figure out the code layout
132 std::size_t code_size{}; 139 std::size_t code_size{};
133 for (const auto& module : static_modules) { 140
141 // Define an nce patch context for each potential module.
142#ifdef ARCHITECTURE_arm64
143 std::array<Core::NCE::Patcher, 13> module_patchers;
144#endif
145
146 const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* {
147#ifdef ARCHITECTURE_arm64
148 if (Settings::IsNceEnabled()) {
149 return &module_patchers[i];
150 }
151#endif
152 return nullptr;
153 };
154
155 // Use the NSO module loader to figure out the code layout
156 for (size_t i = 0; i < static_modules.size(); i++) {
157 const auto& module = static_modules[i];
134 const FileSys::VirtualFile module_file{dir->GetFile(module)}; 158 const FileSys::VirtualFile module_file{dir->GetFile(module)};
135 if (!module_file) { 159 if (!module_file) {
136 continue; 160 continue;
137 } 161 }
138 162
139 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; 163 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
140 const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( 164 const auto tentative_next_load_addr =
141 process, system, *module_file, code_size, should_pass_arguments, false); 165 AppLoader_NSO::LoadModule(process, system, *module_file, code_size,
166 should_pass_arguments, false, {}, GetPatcher(i));
142 if (!tentative_next_load_addr) { 167 if (!tentative_next_load_addr) {
143 return {ResultStatus::ErrorLoadingNSO, {}}; 168 return {ResultStatus::ErrorLoadingNSO, {}};
144 } 169 }
@@ -146,8 +171,18 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
146 code_size = *tentative_next_load_addr; 171 code_size = *tentative_next_load_addr;
147 } 172 }
148 173
174 // Enable direct memory mapping in case of NCE.
175 const u64 fastmem_base = [&]() -> size_t {
176 if (Settings::IsNceEnabled()) {
177 auto& buffer = system.DeviceMemory().buffer;
178 buffer.EnableDirectMappedAddress();
179 return reinterpret_cast<u64>(buffer.VirtualBasePointer());
180 }
181 return 0;
182 }();
183
149 // Setup the process code layout 184 // Setup the process code layout
150 if (process.LoadFromMetadata(metadata, code_size, 0, is_hbl).IsError()) { 185 if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) {
151 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; 186 return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
152 } 187 }
153 188
@@ -157,7 +192,8 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
157 VAddr next_load_addr{base_address}; 192 VAddr next_load_addr{base_address};
158 const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(), 193 const FileSys::PatchManager pm{metadata.GetTitleID(), system.GetFileSystemController(),
159 system.GetContentProvider()}; 194 system.GetContentProvider()};
160 for (const auto& module : static_modules) { 195 for (size_t i = 0; i < static_modules.size(); i++) {
196 const auto& module = static_modules[i];
161 const FileSys::VirtualFile module_file{dir->GetFile(module)}; 197 const FileSys::VirtualFile module_file{dir->GetFile(module)};
162 if (!module_file) { 198 if (!module_file) {
163 continue; 199 continue;
@@ -165,15 +201,16 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect
165 201
166 const VAddr load_addr{next_load_addr}; 202 const VAddr load_addr{next_load_addr};
167 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; 203 const bool should_pass_arguments = std::strcmp(module, "rtld") == 0;
168 const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( 204 const auto tentative_next_load_addr =
169 process, system, *module_file, load_addr, should_pass_arguments, true, pm); 205 AppLoader_NSO::LoadModule(process, system, *module_file, load_addr,
206 should_pass_arguments, true, pm, GetPatcher(i));
170 if (!tentative_next_load_addr) { 207 if (!tentative_next_load_addr) {
171 return {ResultStatus::ErrorLoadingNSO, {}}; 208 return {ResultStatus::ErrorLoadingNSO, {}};
172 } 209 }
173 210
174 next_load_addr = *tentative_next_load_addr; 211 next_load_addr = *tentative_next_load_addr;
175 modules.insert_or_assign(load_addr, module); 212 modules.insert_or_assign(load_addr, module);
176 LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr); 213 LOG_DEBUG(Loader, "loaded module {} @ {:#X}", module, load_addr);
177 } 214 }
178 215
179 // Find the RomFS by searching for a ".romfs" file in this directory 216 // Find the RomFS by searching for a ".romfs" file in this directory
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index dfed296a5..49cf90317 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -22,6 +22,10 @@
22#include "core/loader/nso.h" 22#include "core/loader/nso.h"
23#include "core/memory.h" 23#include "core/memory.h"
24 24
25#ifdef ARCHITECTURE_arm64
26#include "core/arm/nce/patch.h"
27#endif
28
25namespace Loader { 29namespace Loader {
26 30
27struct NroSegmentHeader { 31struct NroSegmentHeader {
@@ -139,7 +143,8 @@ static constexpr u32 PageAlignSize(u32 size) {
139 return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK); 143 return static_cast<u32>((size + Core::Memory::YUZU_PAGEMASK) & ~Core::Memory::YUZU_PAGEMASK);
140} 144}
141 145
142static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data) { 146static bool LoadNroImpl(Core::System& system, Kernel::KProcess& process,
147 const std::vector<u8>& data) {
143 if (data.size() < sizeof(NroHeader)) { 148 if (data.size() < sizeof(NroHeader)) {
144 return {}; 149 return {};
145 } 150 }
@@ -195,14 +200,60 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
195 codeset.DataSegment().size += bss_size; 200 codeset.DataSegment().size += bss_size;
196 program_image.resize(static_cast<u32>(program_image.size()) + bss_size); 201 program_image.resize(static_cast<u32>(program_image.size()) + bss_size);
197 202
203#ifdef ARCHITECTURE_arm64
204 const auto& code = codeset.CodeSegment();
205
206 // NROs are always 64-bit programs.
207 Settings::SetNceEnabled(true);
208
209 // Create NCE patcher
210 Core::NCE::Patcher patch{};
211 size_t image_size = program_image.size();
212
213 if (Settings::IsNceEnabled()) {
214 // Patch SVCs and MRS calls in the guest code
215 patch.PatchText(program_image, code);
216
217 // We only support PostData patching for NROs.
218 ASSERT(patch.Mode() == Core::NCE::PatchMode::PostData);
219
220 // Update patch section.
221 auto& patch_segment = codeset.PatchSegment();
222 patch_segment.addr = image_size;
223 patch_segment.size = static_cast<u32>(patch.SectionSize());
224
225 // Add patch section size to the module size.
226 image_size += patch_segment.size;
227 }
228#endif
229
230 // Enable direct memory mapping in case of NCE.
231 const u64 fastmem_base = [&]() -> size_t {
232 if (Settings::IsNceEnabled()) {
233 auto& buffer = system.DeviceMemory().buffer;
234 buffer.EnableDirectMappedAddress();
235 return reinterpret_cast<u64>(buffer.VirtualBasePointer());
236 }
237 return 0;
238 }();
239
198 // Setup the process code layout 240 // Setup the process code layout
199 if (process 241 if (process
200 .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), program_image.size(), 0, 242 .LoadFromMetadata(FileSys::ProgramMetadata::GetDefault(), image_size, fastmem_base,
201 false) 243 false)
202 .IsError()) { 244 .IsError()) {
203 return false; 245 return false;
204 } 246 }
205 247
248 // Relocate code patch and copy to the program_image if running under NCE.
249 // This needs to be after LoadFromMetadata so we can use the process entry point.
250#ifdef ARCHITECTURE_arm64
251 if (Settings::IsNceEnabled()) {
252 patch.RelocateAndCopy(process.GetEntryPoint(), code, program_image,
253 &process.GetPostHandlers());
254 }
255#endif
256
206 // Load codeset for current process 257 // Load codeset for current process
207 codeset.memory = std::move(program_image); 258 codeset.memory = std::move(program_image);
208 process.LoadModule(std::move(codeset), process.GetEntryPoint()); 259 process.LoadModule(std::move(codeset), process.GetEntryPoint());
@@ -210,8 +261,9 @@ static bool LoadNroImpl(Kernel::KProcess& process, const std::vector<u8>& data)
210 return true; 261 return true;
211} 262}
212 263
213bool AppLoader_NRO::LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file) { 264bool AppLoader_NRO::LoadNro(Core::System& system, Kernel::KProcess& process,
214 return LoadNroImpl(process, nro_file.ReadAllBytes()); 265 const FileSys::VfsFile& nro_file) {
266 return LoadNroImpl(system, process, nro_file.ReadAllBytes());
215} 267}
216 268
217AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) { 269AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::System& system) {
@@ -219,7 +271,7 @@ AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::KProcess& process, Core::S
219 return {ResultStatus::ErrorAlreadyLoaded, {}}; 271 return {ResultStatus::ErrorAlreadyLoaded, {}};
220 } 272 }
221 273
222 if (!LoadNro(process, *file)) { 274 if (!LoadNro(system, process, *file)) {
223 return {ResultStatus::ErrorLoadingNRO, {}}; 275 return {ResultStatus::ErrorLoadingNRO, {}};
224 } 276 }
225 277
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 8de6eebc6..d2928cba0 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -54,7 +54,7 @@ public:
54 bool IsRomFSUpdatable() const override; 54 bool IsRomFSUpdatable() const override;
55 55
56private: 56private:
57 bool LoadNro(Kernel::KProcess& process, const FileSys::VfsFile& nro_file); 57 bool LoadNro(Core::System& system, Kernel::KProcess& process, const FileSys::VfsFile& nro_file);
58 58
59 std::vector<u8> icon_data; 59 std::vector<u8> icon_data;
60 std::unique_ptr<FileSys::NACP> nacp; 60 std::unique_ptr<FileSys::NACP> nacp;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 1350da8dc..34b10ef2e 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -20,6 +20,10 @@
20#include "core/loader/nso.h" 20#include "core/loader/nso.h"
21#include "core/memory.h" 21#include "core/memory.h"
22 22
23#ifdef ARCHITECTURE_arm64
24#include "core/arm/nce/patch.h"
25#endif
26
23namespace Loader { 27namespace Loader {
24namespace { 28namespace {
25struct MODHeader { 29struct MODHeader {
@@ -72,7 +76,8 @@ FileType AppLoader_NSO::IdentifyType(const FileSys::VirtualFile& in_file) {
72std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system, 76std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::System& system,
73 const FileSys::VfsFile& nso_file, VAddr load_base, 77 const FileSys::VfsFile& nso_file, VAddr load_base,
74 bool should_pass_arguments, bool load_into_process, 78 bool should_pass_arguments, bool load_into_process,
75 std::optional<FileSys::PatchManager> pm) { 79 std::optional<FileSys::PatchManager> pm,
80 Core::NCE::Patcher* patch) {
76 if (nso_file.GetSize() < sizeof(NSOHeader)) { 81 if (nso_file.GetSize() < sizeof(NSOHeader)) {
77 return std::nullopt; 82 return std::nullopt;
78 } 83 }
@@ -86,6 +91,16 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
86 return std::nullopt; 91 return std::nullopt;
87 } 92 }
88 93
94 // Allocate some space at the beginning if we are patching in PreText mode.
95 const size_t module_start = [&]() -> size_t {
96#ifdef ARCHITECTURE_arm64
97 if (patch && patch->Mode() == Core::NCE::PatchMode::PreText) {
98 return patch->SectionSize();
99 }
100#endif
101 return 0;
102 }();
103
89 // Build program image 104 // Build program image
90 Kernel::CodeSet codeset; 105 Kernel::CodeSet codeset;
91 Kernel::PhysicalMemory program_image; 106 Kernel::PhysicalMemory program_image;
@@ -95,11 +110,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
95 if (nso_header.IsSegmentCompressed(i)) { 110 if (nso_header.IsSegmentCompressed(i)) {
96 data = DecompressSegment(data, nso_header.segments[i]); 111 data = DecompressSegment(data, nso_header.segments[i]);
97 } 112 }
98 program_image.resize(nso_header.segments[i].location + static_cast<u32>(data.size())); 113 program_image.resize(module_start + nso_header.segments[i].location +
99 std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), 114 static_cast<u32>(data.size()));
100 data.size()); 115 std::memcpy(program_image.data() + module_start + nso_header.segments[i].location,
101 codeset.segments[i].addr = nso_header.segments[i].location; 116 data.data(), data.size());
102 codeset.segments[i].offset = nso_header.segments[i].location; 117 codeset.segments[i].addr = module_start + nso_header.segments[i].location;
118 codeset.segments[i].offset = module_start + nso_header.segments[i].location;
103 codeset.segments[i].size = nso_header.segments[i].size; 119 codeset.segments[i].size = nso_header.segments[i].size;
104 } 120 }
105 121
@@ -118,7 +134,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
118 } 134 }
119 135
120 codeset.DataSegment().size += nso_header.segments[2].bss_size; 136 codeset.DataSegment().size += nso_header.segments[2].bss_size;
121 const u32 image_size{ 137 u32 image_size{
122 PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)}; 138 PageAlignSize(static_cast<u32>(program_image.size()) + nso_header.segments[2].bss_size)};
123 program_image.resize(image_size); 139 program_image.resize(image_size);
124 140
@@ -139,6 +155,32 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
139 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); 155 std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
140 } 156 }
141 157
158#ifdef ARCHITECTURE_arm64
159 // If we are computing the process code layout and using nce backend, patch.
160 const auto& code = codeset.CodeSegment();
161 if (patch && patch->Mode() == Core::NCE::PatchMode::None) {
162 // Patch SVCs and MRS calls in the guest code
163 patch->PatchText(program_image, code);
164
165 // Add patch section size to the module size.
166 image_size += patch->SectionSize();
167 } else if (patch) {
168 // Relocate code patch and copy to the program_image.
169 patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers());
170
171 // Update patch section.
172 auto& patch_segment = codeset.PatchSegment();
173 patch_segment.addr = patch->Mode() == Core::NCE::PatchMode::PreText ? 0 : image_size;
174 patch_segment.size = static_cast<u32>(patch->SectionSize());
175
176 // Add patch section size to the module size. In PreText mode image_size
177 // already contains the patch segment as part of module_start.
178 if (patch->Mode() == Core::NCE::PatchMode::PostData) {
179 image_size += patch_segment.size;
180 }
181 }
182#endif
183
142 // If we aren't actually loading (i.e. just computing the process code layout), we are done 184 // If we aren't actually loading (i.e. just computing the process code layout), we are done
143 if (!load_into_process) { 185 if (!load_into_process) {
144 return load_base + image_size; 186 return load_base + image_size;
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 0b53b4ecd..29b86ed4c 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -15,6 +15,10 @@ namespace Core {
15class System; 15class System;
16} 16}
17 17
18namespace Core::NCE {
19class Patcher;
20}
21
18namespace Kernel { 22namespace Kernel {
19class KProcess; 23class KProcess;
20} 24}
@@ -88,7 +92,8 @@ public:
88 static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system, 92 static std::optional<VAddr> LoadModule(Kernel::KProcess& process, Core::System& system,
89 const FileSys::VfsFile& nso_file, VAddr load_base, 93 const FileSys::VfsFile& nso_file, VAddr load_base,
90 bool should_pass_arguments, bool load_into_process, 94 bool should_pass_arguments, bool load_into_process,
91 std::optional<FileSys::PatchManager> pm = {}); 95 std::optional<FileSys::PatchManager> pm = {},
96 Core::NCE::Patcher* patch = nullptr);
92 97
93 LoadResult Load(Kernel::KProcess& process, Core::System& system) override; 98 LoadResult Load(Kernel::KProcess& process, Core::System& system) override;
94 99
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 14db64f9d..e5ca78ef4 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -1001,4 +1001,17 @@ void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
1001 impl->FlushRegion(dest_addr, size); 1001 impl->FlushRegion(dest_addr, size);
1002} 1002}
1003 1003
1004bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1005 bool mapped = true;
1006 u8* const ptr = impl->GetPointerImpl(
1007 GetInteger(vaddr),
1008 [&] {
1009 LOG_ERROR(HW_Memory, "Unmapped InvalidateNCE for {} bytes @ {:#x}", size,
1010 GetInteger(vaddr));
1011 mapped = false;
1012 },
1013 [&] { impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); });
1014 return mapped && ptr != nullptr;
1015}
1016
1004} // namespace Core::Memory 1017} // namespace Core::Memory
diff --git a/src/core/memory.h b/src/core/memory.h
index 73195549f..e5fbc0025 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -474,6 +474,7 @@ public:
474 474
475 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); 475 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
476 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); 476 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
477 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
477 void FlushRegion(Common::ProcessAddress dest_addr, size_t size); 478 void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
478 479
479private: 480private: