summaryrefslogtreecommitdiff
path: root/src/core/arm
diff options
context:
space:
mode:
authorGravatar liamwhite2023-11-30 09:20:55 -0500
committerGravatar GitHub2023-11-30 09:20:55 -0500
commit57a391e71db13ade7a3d96f59d53781eff18d2ac (patch)
tree0b4223de40a2d77598ac9095b1374353c2e9da7c /src/core/arm
parentMerge pull request #12223 from liamwhite/fruit-company (diff)
parentcore: Rename patcher file (diff)
downloadyuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.tar.gz
yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.tar.xz
yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.zip
Merge pull request #12074 from GPUCode/yuwu-on-the-metal
Implement Native Code Execution (NCE)
Diffstat (limited to 'src/core/arm')
-rw-r--r--src/core/arm/arm_interface.cpp2
-rw-r--r--src/core/arm/arm_interface.h3
-rw-r--r--src/core/arm/nce/arm_nce.cpp400
-rw-r--r--src/core/arm/nce/arm_nce.h108
-rw-r--r--src/core/arm/nce/arm_nce.s222
-rw-r--r--src/core/arm/nce/arm_nce_asm_definitions.h29
-rw-r--r--src/core/arm/nce/guest_context.h50
-rw-r--r--src/core/arm/nce/instructions.h147
-rw-r--r--src/core/arm/nce/patcher.cpp474
-rw-r--r--src/core/arm/nce/patcher.h98
10 files changed, 1533 insertions, 0 deletions
diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp
index 558fba5bd..d231bf89c 100644
--- a/src/core/arm/arm_interface.cpp
+++ b/src/core/arm/arm_interface.cpp
@@ -201,6 +201,8 @@ void ARM_Interface::Run() {
201 if (True(hr & HaltReason::DataAbort)) { 201 if (True(hr & HaltReason::DataAbort)) {
202 if (system.DebuggerEnabled()) { 202 if (system.DebuggerEnabled()) {
203 system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint()); 203 system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
204 } else {
205 LogBacktrace();
204 } 206 }
205 current_thread->RequestSuspend(SuspendType::Debug); 207 current_thread->RequestSuspend(SuspendType::Debug);
206 break; 208 break;
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 3d866ff6f..a9d9ac09d 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -81,6 +81,9 @@ public:
81 // thread context to be 800 bytes in size. 81 // thread context to be 800 bytes in size.
82 static_assert(sizeof(ThreadContext64) == 0x320); 82 static_assert(sizeof(ThreadContext64) == 0x320);
83 83
84 /// Perform any backend-specific initialization.
85 virtual void Initialize() {}
86
84 /// Runs the CPU until an event happens 87 /// Runs the CPU until an event happens
85 void Run(); 88 void Run();
86 89
diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp
new file mode 100644
index 000000000..f7bdafd39
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.cpp
@@ -0,0 +1,400 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <cinttypes>
5#include <memory>
6
7#include "common/signal_chain.h"
8#include "core/arm/nce/arm_nce.h"
9#include "core/arm/nce/patcher.h"
10#include "core/core.h"
11#include "core/memory.h"
12
13#include "core/hle/kernel/k_process.h"
14
15#include <signal.h>
16#include <sys/syscall.h>
17#include <unistd.h>
18
19namespace Core {
20
21namespace {
22
23struct sigaction g_orig_action;
24
25// Verify assembly offsets.
26using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
27static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0NativeContext);
28static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
29static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
30
31fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
32 _aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
33 while (header->magic != FPSIMD_MAGIC) {
34 header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
35 }
36 return reinterpret_cast<fpsimd_context*>(header);
37}
38
39} // namespace
40
41void* ARM_NCE::RestoreGuestContext(void* raw_context) {
42 // Retrieve the host context.
43 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
44
45 // Thread-local parameters will be located in x9.
46 auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
47 auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
48
49 // Retrieve the host floating point state.
50 auto* fpctx = GetFloatingPointState(host_ctx);
51
52 // Save host callee-saved registers.
53 std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
54 sizeof(guest_ctx->host_ctx.host_saved_vregs));
55 std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
56 sizeof(guest_ctx->host_ctx.host_saved_regs));
57
58 // Save stack pointer.
59 guest_ctx->host_ctx.host_sp = host_ctx.sp;
60
61 // Restore all guest state except tpidr_el0.
62 host_ctx.sp = guest_ctx->sp;
63 host_ctx.pc = guest_ctx->pc;
64 host_ctx.pstate = guest_ctx->pstate;
65 fpctx->fpcr = guest_ctx->fpcr;
66 fpctx->fpsr = guest_ctx->fpsr;
67 std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
68 std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
69
70 // Return the new thread-local storage pointer.
71 return tpidr;
72}
73
74void ARM_NCE::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
75 // Retrieve the host context.
76 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
77
78 // Retrieve the host floating point state.
79 auto* fpctx = GetFloatingPointState(host_ctx);
80
81 // Save all guest registers except tpidr_el0.
82 std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
83 std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
84 guest_ctx->fpsr = fpctx->fpsr;
85 guest_ctx->fpcr = fpctx->fpcr;
86 guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
87 guest_ctx->pc = host_ctx.pc;
88 guest_ctx->sp = host_ctx.sp;
89
90 // Restore stack pointer.
91 host_ctx.sp = guest_ctx->host_ctx.host_sp;
92
93 // Restore host callee-saved registers.
94 std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
95 sizeof(guest_ctx->host_ctx.host_saved_regs));
96 std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
97 sizeof(guest_ctx->host_ctx.host_saved_vregs));
98
99 // Return from the call on exit by setting pc to x30.
100 host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
101
102 // Clear esr_el1 and return it.
103 host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
104}
105
106bool ARM_NCE::HandleGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
107 auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
108 auto* info = static_cast<siginfo_t*>(raw_info);
109
110 // Try to handle an invalid access.
111 // TODO: handle accesses which split a page?
112 const Common::ProcessAddress addr =
113 (reinterpret_cast<u64>(info->si_addr) & ~Memory::YUZU_PAGEMASK);
114 if (guest_ctx->system->ApplicationMemory().InvalidateNCE(addr, Memory::YUZU_PAGESIZE)) {
115 // We handled the access successfully and are returning to guest code.
116 return true;
117 }
118
119 // We can't handle the access, so determine why we crashed.
120 const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
121
122 // For data aborts, skip the instruction and return to guest code.
123 // This will allow games to continue in many scenarios where they would otherwise crash.
124 if (!is_prefetch_abort) {
125 host_ctx.pc += 4;
126 return true;
127 }
128
129 // This is a prefetch abort.
130 guest_ctx->esr_el1.fetch_or(static_cast<u64>(HaltReason::PrefetchAbort));
131
132 // Forcibly mark the context as locked. We are still running.
133 // We may race with SignalInterrupt here:
134 // - If we lose the race, then SignalInterrupt will send us a signal we are masking,
135 // and it will do nothing when it is unmasked, as we have already left guest code.
136 // - If we win the race, then SignalInterrupt will wait for us to unlock first.
137 auto& thread_params = guest_ctx->parent->running_thread->GetNativeExecutionParameters();
138 thread_params.lock.store(SpinLockLocked);
139
140 // Return to host.
141 SaveGuestContext(guest_ctx, raw_context);
142 return false;
143}
144
145void ARM_NCE::HandleHostFault(int sig, void* raw_info, void* raw_context) {
146 return g_orig_action.sa_sigaction(sig, static_cast<siginfo_t*>(raw_info), raw_context);
147}
148
149HaltReason ARM_NCE::RunJit() {
150 // Get the thread parameters.
151 // TODO: pass the current thread down from ::Run
152 auto* thread = Kernel::GetCurrentThreadPointer(system.Kernel());
153 auto* thread_params = &thread->GetNativeExecutionParameters();
154
155 {
156 // Lock our core context.
157 std::scoped_lock lk{lock};
158
159 // We should not be running.
160 ASSERT(running_thread == nullptr);
161
162 // Check if we need to run. If we have already been halted, we are done.
163 u64 halt = guest_ctx.esr_el1.exchange(0);
164 if (halt != 0) {
165 return static_cast<HaltReason>(halt);
166 }
167
168 // Mark that we are running.
169 running_thread = thread;
170
171 // Acquire the lock on the thread parameters.
172 // This allows us to force synchronization with SignalInterrupt.
173 LockThreadParameters(thread_params);
174 }
175
176 // Assign current members.
177 guest_ctx.parent = this;
178 thread_params->native_context = &guest_ctx;
179 thread_params->tpidr_el0 = guest_ctx.tpidr_el0;
180 thread_params->tpidrro_el0 = guest_ctx.tpidrro_el0;
181 thread_params->is_running = true;
182
183 HaltReason halt{};
184
185 // TODO: finding and creating the post handler needs to be locked
186 // to deal with dynamic loading of NROs.
187 const auto& post_handlers = system.ApplicationProcess()->GetPostHandlers();
188 if (auto it = post_handlers.find(guest_ctx.pc); it != post_handlers.end()) {
189 halt = ReturnToRunCodeByTrampoline(thread_params, &guest_ctx, it->second);
190 } else {
191 halt = ReturnToRunCodeByExceptionLevelChange(thread_id, thread_params);
192 }
193
194 // Unload members.
195 // The thread does not change, so we can persist the old reference.
196 guest_ctx.tpidr_el0 = thread_params->tpidr_el0;
197 thread_params->native_context = nullptr;
198 thread_params->is_running = false;
199
200 // Unlock the thread parameters.
201 UnlockThreadParameters(thread_params);
202
203 {
204 // Lock the core context.
205 std::scoped_lock lk{lock};
206
207 // On exit, we no longer have an active thread.
208 running_thread = nullptr;
209 }
210
211 // Return the halt reason.
212 return halt;
213}
214
215HaltReason ARM_NCE::StepJit() {
216 return HaltReason::StepThread;
217}
218
219u32 ARM_NCE::GetSvcNumber() const {
220 return guest_ctx.svc_swi;
221}
222
223ARM_NCE::ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_)
224 : ARM_Interface{system_, uses_wall_clock_}, core_index{core_index_} {
225 guest_ctx.system = &system_;
226}
227
228ARM_NCE::~ARM_NCE() = default;
229
230void ARM_NCE::Initialize() {
231 thread_id = gettid();
232
233 // Setup our signals
234 static std::once_flag flag;
235 std::call_once(flag, [] {
236 using HandlerType = decltype(sigaction::sa_sigaction);
237
238 sigset_t signal_mask;
239 sigemptyset(&signal_mask);
240 sigaddset(&signal_mask, ReturnToRunCodeByExceptionLevelChangeSignal);
241 sigaddset(&signal_mask, BreakFromRunCodeSignal);
242 sigaddset(&signal_mask, GuestFaultSignal);
243
244 struct sigaction return_to_run_code_action {};
245 return_to_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
246 return_to_run_code_action.sa_sigaction = reinterpret_cast<HandlerType>(
247 &ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
248 return_to_run_code_action.sa_mask = signal_mask;
249 Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
250 nullptr);
251
252 struct sigaction break_from_run_code_action {};
253 break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
254 break_from_run_code_action.sa_sigaction =
255 reinterpret_cast<HandlerType>(&ARM_NCE::BreakFromRunCodeSignalHandler);
256 break_from_run_code_action.sa_mask = signal_mask;
257 Common::SigAction(BreakFromRunCodeSignal, &break_from_run_code_action, nullptr);
258
259 struct sigaction fault_action {};
260 fault_action.sa_flags = SA_SIGINFO | SA_ONSTACK | SA_RESTART;
261 fault_action.sa_sigaction =
262 reinterpret_cast<HandlerType>(&ARM_NCE::GuestFaultSignalHandler);
263 fault_action.sa_mask = signal_mask;
264 Common::SigAction(GuestFaultSignal, &fault_action, &g_orig_action);
265
266 // Simplify call for g_orig_action.
267 // These fields occupy the same space in memory, so this should be a no-op in practice.
268 if (!(g_orig_action.sa_flags & SA_SIGINFO)) {
269 g_orig_action.sa_sigaction =
270 reinterpret_cast<decltype(g_orig_action.sa_sigaction)>(g_orig_action.sa_handler);
271 }
272 });
273}
274
275void ARM_NCE::SetPC(u64 pc) {
276 guest_ctx.pc = pc;
277}
278
279u64 ARM_NCE::GetPC() const {
280 return guest_ctx.pc;
281}
282
283u64 ARM_NCE::GetSP() const {
284 return guest_ctx.sp;
285}
286
287u64 ARM_NCE::GetReg(int index) const {
288 return guest_ctx.cpu_registers[index];
289}
290
291void ARM_NCE::SetReg(int index, u64 value) {
292 guest_ctx.cpu_registers[index] = value;
293}
294
295u128 ARM_NCE::GetVectorReg(int index) const {
296 return guest_ctx.vector_registers[index];
297}
298
299void ARM_NCE::SetVectorReg(int index, u128 value) {
300 guest_ctx.vector_registers[index] = value;
301}
302
303u32 ARM_NCE::GetPSTATE() const {
304 return guest_ctx.pstate;
305}
306
307void ARM_NCE::SetPSTATE(u32 pstate) {
308 guest_ctx.pstate = pstate;
309}
310
311u64 ARM_NCE::GetTlsAddress() const {
312 return guest_ctx.tpidrro_el0;
313}
314
315void ARM_NCE::SetTlsAddress(u64 address) {
316 guest_ctx.tpidrro_el0 = address;
317}
318
319u64 ARM_NCE::GetTPIDR_EL0() const {
320 return guest_ctx.tpidr_el0;
321}
322
323void ARM_NCE::SetTPIDR_EL0(u64 value) {
324 guest_ctx.tpidr_el0 = value;
325}
326
327void ARM_NCE::SaveContext(ThreadContext64& ctx) const {
328 ctx.cpu_registers = guest_ctx.cpu_registers;
329 ctx.sp = guest_ctx.sp;
330 ctx.pc = guest_ctx.pc;
331 ctx.pstate = guest_ctx.pstate;
332 ctx.vector_registers = guest_ctx.vector_registers;
333 ctx.fpcr = guest_ctx.fpcr;
334 ctx.fpsr = guest_ctx.fpsr;
335 ctx.tpidr = guest_ctx.tpidr_el0;
336}
337
338void ARM_NCE::LoadContext(const ThreadContext64& ctx) {
339 guest_ctx.cpu_registers = ctx.cpu_registers;
340 guest_ctx.sp = ctx.sp;
341 guest_ctx.pc = ctx.pc;
342 guest_ctx.pstate = ctx.pstate;
343 guest_ctx.vector_registers = ctx.vector_registers;
344 guest_ctx.fpcr = ctx.fpcr;
345 guest_ctx.fpsr = ctx.fpsr;
346 guest_ctx.tpidr_el0 = ctx.tpidr;
347}
348
349void ARM_NCE::SignalInterrupt() {
350 // Lock core context.
351 std::scoped_lock lk{lock};
352
353 // Add break loop condition.
354 guest_ctx.esr_el1.fetch_or(static_cast<u64>(HaltReason::BreakLoop));
355
356 // If there is no thread running, we are done.
357 if (running_thread == nullptr) {
358 return;
359 }
360
361 // Lock the thread context.
362 auto* params = &running_thread->GetNativeExecutionParameters();
363 LockThreadParameters(params);
364
365 if (params->is_running) {
366 // We should signal to the running thread.
367 // The running thread will unlock the thread context.
368 syscall(SYS_tkill, thread_id, BreakFromRunCodeSignal);
369 } else {
370 // If the thread is no longer running, we have nothing to do.
371 UnlockThreadParameters(params);
372 }
373}
374
375void ARM_NCE::ClearInterrupt() {
376 guest_ctx.esr_el1 = {};
377}
378
379void ARM_NCE::ClearInstructionCache() {
380 // TODO: This is not possible to implement correctly on Linux because
381 // we do not have any access to ic iallu.
382
383 // Require accesses to complete.
384 std::atomic_thread_fence(std::memory_order_seq_cst);
385}
386
387void ARM_NCE::InvalidateCacheRange(u64 addr, std::size_t size) {
388 this->ClearInstructionCache();
389}
390
391void ARM_NCE::ClearExclusiveState() {
392 // No-op.
393}
394
395void ARM_NCE::PageTableChanged(Common::PageTable& page_table,
396 std::size_t new_address_space_size_in_bits) {
397 // No-op. Page table is never used.
398}
399
400} // namespace Core
diff --git a/src/core/arm/nce/arm_nce.h b/src/core/arm/nce/arm_nce.h
new file mode 100644
index 000000000..5fbd6dbf3
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.h
@@ -0,0 +1,108 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <atomic>
7#include <memory>
8#include <span>
9#include <unordered_map>
10#include <vector>
11
12#include "core/arm/arm_interface.h"
13#include "core/arm/nce/guest_context.h"
14
15namespace Core::Memory {
16class Memory;
17}
18
19namespace Core {
20
21class System;
22
23class ARM_NCE final : public ARM_Interface {
24public:
25 ARM_NCE(System& system_, bool uses_wall_clock_, std::size_t core_index_);
26
27 ~ARM_NCE() override;
28
29 void Initialize() override;
30 void SetPC(u64 pc) override;
31 u64 GetPC() const override;
32 u64 GetSP() const override;
33 u64 GetReg(int index) const override;
34 void SetReg(int index, u64 value) override;
35 u128 GetVectorReg(int index) const override;
36 void SetVectorReg(int index, u128 value) override;
37
38 u32 GetPSTATE() const override;
39 void SetPSTATE(u32 pstate) override;
40 u64 GetTlsAddress() const override;
41 void SetTlsAddress(u64 address) override;
42 void SetTPIDR_EL0(u64 value) override;
43 u64 GetTPIDR_EL0() const override;
44
45 Architecture GetArchitecture() const override {
46 return Architecture::Aarch64;
47 }
48
49 void SaveContext(ThreadContext32& ctx) const override {}
50 void SaveContext(ThreadContext64& ctx) const override;
51 void LoadContext(const ThreadContext32& ctx) override {}
52 void LoadContext(const ThreadContext64& ctx) override;
53
54 void SignalInterrupt() override;
55 void ClearInterrupt() override;
56 void ClearExclusiveState() override;
57 void ClearInstructionCache() override;
58 void InvalidateCacheRange(u64 addr, std::size_t size) override;
59 void PageTableChanged(Common::PageTable& new_page_table,
60 std::size_t new_address_space_size_in_bits) override;
61
62protected:
63 HaltReason RunJit() override;
64 HaltReason StepJit() override;
65
66 u32 GetSvcNumber() const override;
67
68 const Kernel::DebugWatchpoint* HaltedWatchpoint() const override {
69 return nullptr;
70 }
71
72 void RewindBreakpointInstruction() override {}
73
74private:
75 // Assembly definitions.
76 static HaltReason ReturnToRunCodeByTrampoline(void* tpidr, GuestContext* ctx,
77 u64 trampoline_addr);
78 static HaltReason ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr);
79
80 static void ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info,
81 void* raw_context);
82 static void BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context);
83 static void GuestFaultSignalHandler(int sig, void* info, void* raw_context);
84
85 static void LockThreadParameters(void* tpidr);
86 static void UnlockThreadParameters(void* tpidr);
87
88private:
89 // C++ implementation functions for assembly definitions.
90 static void* RestoreGuestContext(void* raw_context);
91 static void SaveGuestContext(GuestContext* ctx, void* raw_context);
92 static bool HandleGuestFault(GuestContext* ctx, void* info, void* raw_context);
93 static void HandleHostFault(int sig, void* info, void* raw_context);
94
95public:
96 // Members set on initialization.
97 std::size_t core_index{};
98 pid_t thread_id{-1};
99
100 // Core context.
101 GuestContext guest_ctx;
102
103 // Thread and invalidation info.
104 std::mutex lock;
105 Kernel::KThread* running_thread{};
106};
107
108} // namespace Core
diff --git a/src/core/arm/nce/arm_nce.s b/src/core/arm/nce/arm_nce.s
new file mode 100644
index 000000000..b98e09f31
--- /dev/null
+++ b/src/core/arm/nce/arm_nce.s
@@ -0,0 +1,222 @@
1/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
2/* SPDX-License-Identifier: GPL-2.0-or-later */
3
4#include "core/arm/nce/arm_nce_asm_definitions.h"
5
6#define LOAD_IMMEDIATE_32(reg, val) \
7 mov reg, #(((val) >> 0x00) & 0xFFFF); \
8 movk reg, #(((val) >> 0x10) & 0xFFFF), lsl #16
9
10
11/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
12.section .text._ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
13.global _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
14.type _ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
15_ZN4Core7ARM_NCE27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
16 /* Back up host sp to x3. */
17 /* Back up host tpidr_el0 to x4. */
18 mov x3, sp
19 mrs x4, tpidr_el0
20
21 /* Load guest sp. x5 is used as a scratch register. */
22 ldr x5, [x1, #(GuestContextSp)]
23 mov sp, x5
24
25 /* Offset GuestContext pointer to the host member. */
26 add x5, x1, #(GuestContextHostContext)
27
28 /* Save original host sp and tpidr_el0 (x3, x4) to host context. */
29 stp x3, x4, [x5, #(HostContextSpTpidrEl0)]
30
31 /* Save all callee-saved host GPRs. */
32 stp x19, x20, [x5, #(HostContextRegs+0x0)]
33 stp x21, x22, [x5, #(HostContextRegs+0x10)]
34 stp x23, x24, [x5, #(HostContextRegs+0x20)]
35 stp x25, x26, [x5, #(HostContextRegs+0x30)]
36 stp x27, x28, [x5, #(HostContextRegs+0x40)]
37 stp x29, x30, [x5, #(HostContextRegs+0x50)]
38
39 /* Save all callee-saved host FPRs. */
40 stp q8, q9, [x5, #(HostContextVregs+0x0)]
41 stp q10, q11, [x5, #(HostContextVregs+0x20)]
42 stp q12, q13, [x5, #(HostContextVregs+0x40)]
43 stp q14, q15, [x5, #(HostContextVregs+0x60)]
44
45 /* Load guest tpidr_el0 from argument. */
46 msr tpidr_el0, x0
47
48 /* Tail call the trampoline to restore guest state. */
49 br x2
50
51
52/* static HaltReason Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
53.section .text._ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
54.global _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv
55.type _ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
56_ZN4Core7ARM_NCE37ReturnToRunCodeByExceptionLevelChangeEiPv:
57 /* This jumps to the signal handler, which will restore the entire context. */
58 /* On entry, x0 = thread id, which is already in the right place. */
59
60 /* Move tpidr to x9 so it is not trampled. */
61 mov x9, x1
62
63 /* Set up arguments. */
64 mov x8, #(__NR_tkill)
65 mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
66
67 /* Tail call the signal handler. */
68 svc #0
69
70 /* Block execution from flowing here. */
71 brk #1000
72
73
74/* static void Core::ARM_NCE::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
75.section .text._ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
76.global _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
77.type _ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
78_ZN4Core7ARM_NCE50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
79 stp x29, x30, [sp, #-0x10]!
80 mov x29, sp
81
82 /* Call the context restorer with the raw context. */
83 mov x0, x2
84 bl _ZN4Core7ARM_NCE19RestoreGuestContextEPv
85
86 /* Save the old value of tpidr_el0. */
87 mrs x8, tpidr_el0
88 ldr x9, [x0, #(TpidrEl0NativeContext)]
89 str x8, [x9, #(GuestContextHostContext + HostContextTpidrEl0)]
90
91 /* Set our new tpidr_el0. */
92 msr tpidr_el0, x0
93
94 /* Unlock the context. */
95 bl _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
96
97 /* Returning from here will enter the guest. */
98 ldp x29, x30, [sp], #0x10
99 ret
100
101
102/* static void Core::ARM_NCE::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
103.section .text._ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
104.global _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_
105.type _ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_, %function
106_ZN4Core7ARM_NCE29BreakFromRunCodeSignalHandlerEiPvS1_:
107 /* Check to see if we have the correct TLS magic. */
108 mrs x8, tpidr_el0
109 ldr w9, [x8, #(TpidrEl0TlsMagic)]
110
111 LOAD_IMMEDIATE_32(w10, TlsMagic)
112
113 cmp w9, w10
114 b.ne 1f
115
116 /* Correct TLS magic, so this is a guest interrupt. */
117 /* Restore host tpidr_el0. */
118 ldr x0, [x8, #(TpidrEl0NativeContext)]
119 ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
120 msr tpidr_el0, x3
121
122 /* Tail call the restorer. */
123 mov x1, x2
124 b _ZN4Core7ARM_NCE16SaveGuestContextEPNS_12GuestContextEPv
125
126 /* Returning from here will enter host code. */
127
1281:
129 /* Incorrect TLS magic, so this is a spurious signal. */
130 ret
131
132
133/* static void Core::ARM_NCE::GuestFaultSignalHandler(int sig, void* info, void* raw_context) */
134.section .text._ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, "ax", %progbits
135.global _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_
136.type _ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_, %function
137_ZN4Core7ARM_NCE23GuestFaultSignalHandlerEiPvS1_:
138 /* Check to see if we have the correct TLS magic. */
139 mrs x8, tpidr_el0
140 ldr w9, [x8, #(TpidrEl0TlsMagic)]
141
142 LOAD_IMMEDIATE_32(w10, TlsMagic)
143
144 cmp w9, w10
145 b.eq 1f
146
147 /* Incorrect TLS magic, so this is a host fault. */
148 /* Tail call the handler. */
149 b _ZN4Core7ARM_NCE15HandleHostFaultEiPvS1_
150
1511:
152 /* Correct TLS magic, so this is a guest fault. */
153 stp x29, x30, [sp, #-0x20]!
154 str x19, [sp, #0x10]
155 mov x29, sp
156
157 /* Save the old tpidr_el0. */
158 mov x19, x8
159
160 /* Restore host tpidr_el0. */
161 ldr x0, [x8, #(TpidrEl0NativeContext)]
162 ldr x3, [x0, #(GuestContextHostContext + HostContextTpidrEl0)]
163 msr tpidr_el0, x3
164
165 /* Call the handler. */
166 bl _ZN4Core7ARM_NCE16HandleGuestFaultEPNS_12GuestContextEPvS3_
167
168 /* If the handler returned false, we want to preserve the host tpidr_el0. */
169 cbz x0, 2f
170
171 /* Otherwise, restore guest tpidr_el0. */
172 msr tpidr_el0, x19
173
1742:
175 ldr x19, [sp, #0x10]
176 ldp x29, x30, [sp], #0x20
177 ret
178
179
180/* static void Core::ARM_NCE::LockThreadParameters(void* tpidr) */
181.section .text._ZN4Core7ARM_NCE20LockThreadParametersEPv, "ax", %progbits
182.global _ZN4Core7ARM_NCE20LockThreadParametersEPv
183.type _ZN4Core7ARM_NCE20LockThreadParametersEPv, %function
184_ZN4Core7ARM_NCE20LockThreadParametersEPv:
185 /* Offset to lock member. */
186 add x0, x0, #(TpidrEl0Lock)
187
1881:
189 /* Clear the monitor. */
190 clrex
191
1922:
193 /* Load-linked with acquire ordering. */
194 ldaxr w1, [x0]
195
196 /* If the value was SpinLockLocked, clear monitor and retry. */
197 cbz w1, 1b
198
199 /* Store-conditional SpinLockLocked with relaxed ordering. */
200 stxr w1, wzr, [x0]
201
202 /* If we failed to store, retry. */
203 cbnz w1, 2b
204
205 ret
206
207
208/* static void Core::ARM_NCE::UnlockThreadParameters(void* tpidr) */
209.section .text._ZN4Core7ARM_NCE22UnlockThreadParametersEPv, "ax", %progbits
210.global _ZN4Core7ARM_NCE22UnlockThreadParametersEPv
211.type _ZN4Core7ARM_NCE22UnlockThreadParametersEPv, %function
212_ZN4Core7ARM_NCE22UnlockThreadParametersEPv:
213 /* Offset to lock member. */
214 add x0, x0, #(TpidrEl0Lock)
215
216 /* Load SpinLockUnlocked. */
217 mov w1, #(SpinLockUnlocked)
218
219 /* Store value with release ordering. */
220 stlr w1, [x0]
221
222 ret
diff --git a/src/core/arm/nce/arm_nce_asm_definitions.h b/src/core/arm/nce/arm_nce_asm_definitions.h
new file mode 100644
index 000000000..8a9b285b5
--- /dev/null
+++ b/src/core/arm/nce/arm_nce_asm_definitions.h
@@ -0,0 +1,29 @@
1/* SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project */
2/* SPDX-License-Identifier: GPL-2.0-or-later */
3
4#pragma once
5
6#define __ASSEMBLY__
7
8#include <asm-generic/signal.h>
9#include <asm-generic/unistd.h>
10
11#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
12#define BreakFromRunCodeSignal SIGURG
13#define GuestFaultSignal SIGSEGV
14
15#define GuestContextSp 0xF8
16#define GuestContextHostContext 0x320
17
18#define HostContextSpTpidrEl0 0xE0
19#define HostContextTpidrEl0 0xE8
20#define HostContextRegs 0x0
21#define HostContextVregs 0x60
22
23#define TpidrEl0NativeContext 0x10
24#define TpidrEl0Lock 0x18
25#define TpidrEl0TlsMagic 0x20
26#define TlsMagic 0x555a5559
27
28#define SpinLockLocked 0
29#define SpinLockUnlocked 1
diff --git a/src/core/arm/nce/guest_context.h b/src/core/arm/nce/guest_context.h
new file mode 100644
index 000000000..0767a0337
--- /dev/null
+++ b/src/core/arm/nce/guest_context.h
@@ -0,0 +1,50 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8#include "core/arm/arm_interface.h"
9#include "core/arm/nce/arm_nce_asm_definitions.h"
10
11namespace Core {
12
13class ARM_NCE;
14class System;
15
16struct HostContext {
17 alignas(16) std::array<u64, 12> host_saved_regs{};
18 alignas(16) std::array<u128, 8> host_saved_vregs{};
19 u64 host_sp{};
20 void* host_tpidr_el0{};
21};
22
23struct GuestContext {
24 std::array<u64, 31> cpu_registers{};
25 u64 sp{};
26 u64 pc{};
27 u32 fpcr{};
28 u32 fpsr{};
29 std::array<u128, 32> vector_registers{};
30 u32 pstate{};
31 alignas(16) HostContext host_ctx{};
32 u64 tpidrro_el0{};
33 u64 tpidr_el0{};
34 std::atomic<u64> esr_el1{};
35 u32 nzcv{};
36 u32 svc_swi{};
37 System* system{};
38 ARM_NCE* parent{};
39};
40
41// Verify assembly offsets.
42static_assert(offsetof(GuestContext, sp) == GuestContextSp);
43static_assert(offsetof(GuestContext, host_ctx) == GuestContextHostContext);
44static_assert(offsetof(HostContext, host_sp) == HostContextSpTpidrEl0);
45static_assert(offsetof(HostContext, host_tpidr_el0) - 8 == HostContextSpTpidrEl0);
46static_assert(offsetof(HostContext, host_tpidr_el0) == HostContextTpidrEl0);
47static_assert(offsetof(HostContext, host_saved_regs) == HostContextRegs);
48static_assert(offsetof(HostContext, host_saved_vregs) == HostContextVregs);
49
50} // namespace Core
diff --git a/src/core/arm/nce/instructions.h b/src/core/arm/nce/instructions.h
new file mode 100644
index 000000000..5b56ff857
--- /dev/null
+++ b/src/core/arm/nce/instructions.h
@@ -0,0 +1,147 @@
1// SPDX-FileCopyrightText: Copyright © 2020 Skyline Team and Contributors
2// SPDX-License-Identifier: MPL-2.0
3
4#include "common/bit_field.h"
5#include "common/common_types.h"
6
7namespace Core::NCE {
8
9enum SystemRegister : u32 {
10 TpidrEl0 = 0x5E82,
11 TpidrroEl0 = 0x5E83,
12 CntfrqEl0 = 0x5F00,
13 CntpctEl0 = 0x5F01,
14};
15
16// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/SVC--Supervisor-Call-
17union SVC {
18 constexpr explicit SVC(u32 raw_) : raw{raw_} {}
19
20 constexpr bool Verify() {
21 return (this->GetSig0() == 0x1 && this->GetSig1() == 0x6A0);
22 }
23
24 constexpr u32 GetSig0() {
25 return decltype(sig0)::ExtractValue(raw);
26 }
27
28 constexpr u32 GetValue() {
29 return decltype(value)::ExtractValue(raw);
30 }
31
32 constexpr u32 GetSig1() {
33 return decltype(sig1)::ExtractValue(raw);
34 }
35
36 u32 raw;
37
38private:
39 BitField<0, 5, u32> sig0; // 0x1
40 BitField<5, 16, u32> value; // 16-bit immediate
41 BitField<21, 11, u32> sig1; // 0x6A0
42};
43static_assert(sizeof(SVC) == sizeof(u32));
44static_assert(SVC(0xD40000C1).Verify());
45static_assert(SVC(0xD40000C1).GetValue() == 0x6);
46
47// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MRS--Move-System-Register-
48union MRS {
49 constexpr explicit MRS(u32 raw_) : raw{raw_} {}
50
51 constexpr bool Verify() {
52 return (this->GetSig() == 0xD53);
53 }
54
55 constexpr u32 GetRt() {
56 return decltype(rt)::ExtractValue(raw);
57 }
58
59 constexpr u32 GetSystemReg() {
60 return decltype(system_reg)::ExtractValue(raw);
61 }
62
63 constexpr u32 GetSig() {
64 return decltype(sig)::ExtractValue(raw);
65 }
66
67 u32 raw;
68
69private:
70 BitField<0, 5, u32> rt; // destination register
71 BitField<5, 15, u32> system_reg; // source system register
72 BitField<20, 12, u32> sig; // 0xD53
73};
74static_assert(sizeof(MRS) == sizeof(u32));
75static_assert(MRS(0xD53BE020).Verify());
76static_assert(MRS(0xD53BE020).GetSystemReg() == CntpctEl0);
77static_assert(MRS(0xD53BE020).GetRt() == 0x0);
78
79// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MSR--register---Move-general-purpose-register-to-System-Register-
80union MSR {
81 constexpr explicit MSR(u32 raw_) : raw{raw_} {}
82
83 constexpr bool Verify() {
84 return this->GetSig() == 0xD51;
85 }
86
87 constexpr u32 GetRt() {
88 return decltype(rt)::ExtractValue(raw);
89 }
90
91 constexpr u32 GetSystemReg() {
92 return decltype(system_reg)::ExtractValue(raw);
93 }
94
95 constexpr u32 GetSig() {
96 return decltype(sig)::ExtractValue(raw);
97 }
98
99 u32 raw;
100
101private:
102 BitField<0, 5, u32> rt; // source register
103 BitField<5, 15, u32> system_reg; // destination system register
104 BitField<20, 12, u32> sig; // 0xD51
105};
106static_assert(sizeof(MSR) == sizeof(u32));
107static_assert(MSR(0xD51BD040).Verify());
108static_assert(MSR(0xD51BD040).GetSystemReg() == TpidrEl0);
109static_assert(MSR(0xD51BD040).GetRt() == 0x0);
110
111// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXR--Load-Exclusive-Register-
112// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers-
113// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXR--Store-Exclusive-Register-
114// https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers-
115union Exclusive {
116 constexpr explicit Exclusive(u32 raw_) : raw{raw_} {}
117
118 constexpr bool Verify() {
119 return this->GetSig() == 0x10;
120 }
121
122 constexpr u32 GetSig() {
123 return decltype(sig)::ExtractValue(raw);
124 }
125
126 constexpr u32 AsOrdered() {
127 return raw | decltype(o0)::FormatValue(1);
128 }
129
130 u32 raw;
131
132private:
133 BitField<0, 5, u32> rt; // memory operand
134 BitField<5, 5, u32> rn; // register operand 1
135 BitField<10, 5, u32> rt2; // register operand 2
136 BitField<15, 1, u32> o0; // ordered
137 BitField<16, 5, u32> rs; // status register
138 BitField<21, 2, u32> l; // operation type
139 BitField<23, 7, u32> sig; // 0x10
140 BitField<30, 2, u32> size; // size
141};
142static_assert(Exclusive(0xC85FFC00).Verify());
143static_assert(Exclusive(0xC85FFC00).AsOrdered() == 0xC85FFC00);
144static_assert(Exclusive(0xC85F7C00).AsOrdered() == 0xC85FFC00);
145static_assert(Exclusive(0xC8200440).AsOrdered() == 0xC8208440);
146
147} // namespace Core::NCE
diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp
new file mode 100644
index 000000000..ec8527224
--- /dev/null
+++ b/src/core/arm/nce/patcher.cpp
@@ -0,0 +1,474 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/arm64/native_clock.h"
5#include "common/bit_cast.h"
6#include "common/literals.h"
7#include "core/arm/nce/arm_nce.h"
8#include "core/arm/nce/guest_context.h"
9#include "core/arm/nce/instructions.h"
10#include "core/arm/nce/patcher.h"
11#include "core/core.h"
12#include "core/core_timing.h"
13#include "core/hle/kernel/svc.h"
14
15namespace Core::NCE {
16
17using namespace Common::Literals;
18using namespace oaknut::util;
19
20using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
21
22constexpr size_t MaxRelativeBranch = 128_MiB;
23constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
24
25Patcher::Patcher() : c(m_patch_instructions) {}
26
27Patcher::~Patcher() = default;
28
29void Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
30 const Kernel::CodeSet::Segment& code) {
31
32 // Write save context helper function.
33 c.l(m_save_context);
34 WriteSaveContext();
35
36 // Write load context helper function.
37 c.l(m_load_context);
38 WriteLoadContext();
39
40 // Retrieve text segment data.
41 const auto text = std::span{program_image}.subspan(code.offset, code.size);
42 const auto text_words =
43 std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)};
44
45 // Loop through instructions, patching as needed.
46 for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
47 const u32 inst = text_words[i];
48
49 const auto AddRelocations = [&] {
50 const uintptr_t this_offset = i * sizeof(u32);
51 const uintptr_t next_offset = this_offset + sizeof(u32);
52
53 // Relocate from here to patch.
54 this->BranchToPatch(this_offset);
55
56 // Relocate from patch to next instruction.
57 return next_offset;
58 };
59
60 // SVC
61 if (auto svc = SVC{inst}; svc.Verify()) {
62 WriteSvcTrampoline(AddRelocations(), svc.GetValue());
63 continue;
64 }
65
66 // MRS Xn, TPIDR_EL0
67 // MRS Xn, TPIDRRO_EL0
68 if (auto mrs = MRS{inst};
69 mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) {
70 const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
71 : oaknut::SystemReg::TPIDR_EL0;
72 const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
73 WriteMrsHandler(AddRelocations(), dest_reg, src_reg);
74 continue;
75 }
76
77 // MRS Xn, CNTPCT_EL0
78 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
79 WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())});
80 continue;
81 }
82
83 // MRS Xn, CNTFRQ_EL0
84 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) {
85 UNREACHABLE();
86 }
87
88 // MSR TPIDR_EL0, Xn
89 if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
90 WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())});
91 continue;
92 }
93
94 if (auto exclusive = Exclusive{inst}; exclusive.Verify()) {
95 m_exclusives.push_back(i);
96 }
97 }
98
99 // Determine patching mode for the final relocation step
100 const size_t image_size = program_image.size();
101 this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
102}
103
104void Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
105 const Kernel::CodeSet::Segment& code,
106 Kernel::PhysicalMemory& program_image,
107 EntryTrampolines* out_trampolines) {
108 const size_t patch_size = GetSectionSize();
109 const size_t image_size = program_image.size();
110
111 // Retrieve text segment data.
112 const auto text = std::span{program_image}.subspan(code.offset, code.size);
113 const auto text_words =
114 std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)};
115
116 const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) {
117 oaknut::CodeGenerator rc{target};
118 if (mode == PatchMode::PreText) {
119 rc.B(rel.patch_offset - patch_size - rel.module_offset);
120 } else {
121 rc.B(image_size - rel.module_offset + rel.patch_offset);
122 }
123 };
124
125 const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
126 oaknut::CodeGenerator rc{target};
127 if (mode == PatchMode::PreText) {
128 rc.B(patch_size - rel.patch_offset + rel.module_offset);
129 } else {
130 rc.B(rel.module_offset - image_size - rel.patch_offset);
131 }
132 };
133
134 const auto RebasePatch = [&](ptrdiff_t patch_offset) {
135 if (mode == PatchMode::PreText) {
136 return GetInteger(load_base) + patch_offset;
137 } else {
138 return GetInteger(load_base) + image_size + patch_offset;
139 }
140 };
141
142 const auto RebasePc = [&](uintptr_t module_offset) {
143 if (mode == PatchMode::PreText) {
144 return GetInteger(load_base) + patch_size + module_offset;
145 } else {
146 return GetInteger(load_base) + module_offset;
147 }
148 };
149
150 // We are now ready to relocate!
151 for (const Relocation& rel : m_branch_to_patch_relocations) {
152 ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
153 }
154 for (const Relocation& rel : m_branch_to_module_relocations) {
155 ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
156 rel);
157 }
158
159 // Rewrite PC constants and record post trampolines
160 for (const Relocation& rel : m_write_module_pc_relocations) {
161 oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
162 rc.dx(RebasePc(rel.module_offset));
163 }
164 for (const Trampoline& rel : m_trampolines) {
165 out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
166 }
167
168 // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
169 // Convert to ordered to preserve this assumption.
170 for (const ModuleTextAddress i : m_exclusives) {
171 auto exclusive = Exclusive{text_words[i]};
172 text_words[i] = exclusive.AsOrdered();
173 }
174
175 // Copy to program image
176 if (this->mode == PatchMode::PreText) {
177 std::memcpy(program_image.data(), m_patch_instructions.data(),
178 m_patch_instructions.size() * sizeof(u32));
179 } else {
180 program_image.resize(image_size + patch_size);
181 std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
182 m_patch_instructions.size() * sizeof(u32));
183 }
184}
185
186size_t Patcher::GetSectionSize() const noexcept {
187 return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
188}
189
190void Patcher::WriteLoadContext() {
191 // This function was called, which modifies X30, so use that as a scratch register.
192 // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
193 // of stack.
194 c.STR(X30, SP, 8);
195 c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
196 c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
197
198 // Load system registers.
199 c.LDR(W0, X30, offsetof(GuestContext, fpsr));
200 c.MSR(oaknut::SystemReg::FPSR, X0);
201 c.LDR(W0, X30, offsetof(GuestContext, fpcr));
202 c.MSR(oaknut::SystemReg::FPCR, X0);
203 c.LDR(W0, X30, offsetof(GuestContext, nzcv));
204 c.MSR(oaknut::SystemReg::NZCV, X0);
205
206 // Load all vector registers.
207 static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
208 for (int i = 0; i <= 30; i += 2) {
209 c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
210 }
211
212 // Load all general-purpose registers except X30.
213 for (int i = 0; i <= 28; i += 2) {
214 c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
215 }
216
217 // Reload our return X30 from the stack and return.
218 // The patch code will reload the guest X30 for us.
219 c.LDR(X30, SP, 8);
220 c.RET();
221}
222
223void Patcher::WriteSaveContext() {
224 // This function was called, which modifies X30, so use that as a scratch register.
225 // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
226 // stack.
227 c.STR(X30, SP, 8);
228 c.MRS(X30, oaknut::SystemReg::TPIDR_EL0);
229 c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context));
230
231 // Store all general-purpose registers except X30.
232 for (int i = 0; i <= 28; i += 2) {
233 c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i);
234 }
235
236 // Store all vector registers.
237 static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers);
238 for (int i = 0; i <= 30; i += 2) {
239 c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i);
240 }
241
242 // Store guest system registers, X30 and SP, using X0 as a scratch register.
243 c.STR(X0, SP, PRE_INDEXED, -16);
244 c.LDR(X0, SP, 16);
245 c.STR(X0, X30, 8 * 30);
246 c.ADD(X0, SP, 32);
247 c.STR(X0, X30, offsetof(GuestContext, sp));
248 c.MRS(X0, oaknut::SystemReg::FPSR);
249 c.STR(W0, X30, offsetof(GuestContext, fpsr));
250 c.MRS(X0, oaknut::SystemReg::FPCR);
251 c.STR(W0, X30, offsetof(GuestContext, fpcr));
252 c.MRS(X0, oaknut::SystemReg::NZCV);
253 c.STR(W0, X30, offsetof(GuestContext, nzcv));
254 c.LDR(X0, SP, POST_INDEXED, 16);
255
256 // Reload our return X30 from the stack, and return.
257 c.LDR(X30, SP, 8);
258 c.RET();
259}
260
261void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
262 // We are about to start saving state, so we need to lock the context.
263 this->LockContext();
264
265 // Store guest X30 to the stack. Then, save the context and restore the stack.
266 // This will save all registers except PC, but we know PC at patch time.
267 c.STR(X30, SP, PRE_INDEXED, -16);
268 c.BL(m_save_context);
269 c.LDR(X30, SP, POST_INDEXED, 16);
270
271 // Now that we've saved all registers, we can use any registers as scratch.
272 // Store PC + 4 to arm interface, since we know the instruction offset from the entry point.
273 oaknut::Label pc_after_svc;
274 c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
275 c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
276 c.LDR(X2, pc_after_svc);
277 c.STR(X2, X1, offsetof(GuestContext, pc));
278
279 // Store SVC number to execute when we return
280 c.MOV(X2, svc_id);
281 c.STR(W2, X1, offsetof(GuestContext, svc_swi));
282
283 // We are calling a SVC. Clear esr_el1 and return it.
284 static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>);
285 oaknut::Label retry;
286 c.ADD(X2, X1, offsetof(GuestContext, esr_el1));
287 c.l(retry);
288 c.LDAXR(X0, X2);
289 c.STLXR(W3, XZR, X2);
290 c.CBNZ(W3, retry);
291
292 // Add "calling SVC" flag. Since this is X0, this is now our return value.
293 c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall));
294
295 // Offset the GuestContext pointer to the HostContext member.
296 // STP has limited range of [-512, 504] which we can't reach otherwise
297 // NB: Due to this all offsets below are from the start of HostContext.
298 c.ADD(X1, X1, offsetof(GuestContext, host_ctx));
299
300 // Reload host TPIDR_EL0 and SP.
301 static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0));
302 c.LDP(X2, X3, X1, offsetof(HostContext, host_sp));
303 c.MOV(SP, X2);
304 c.MSR(oaknut::SystemReg::TPIDR_EL0, X3);
305
306 // Load callee-saved host registers and return to host.
307 static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs);
308 static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs);
309 c.LDP(X19, X20, X1, HOST_REGS_OFF);
310 c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64));
311 c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64));
312 c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64));
313 c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64));
314 c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64));
315 c.LDP(Q8, Q9, X1, HOST_VREGS_OFF);
316 c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128));
317 c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128));
318 c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128));
319 c.RET();
320
321 // Write the post-SVC trampoline address, which will jump back to the guest after restoring its
322 // state.
323 m_trampolines.push_back({c.offset(), module_dest});
324
325 // Host called this location. Save the return address so we can
326 // unwind the stack properly when jumping back.
327 c.MRS(X2, oaknut::SystemReg::TPIDR_EL0);
328 c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context));
329 c.ADD(X0, X2, offsetof(GuestContext, host_ctx));
330 c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64));
331
332 // Reload all guest registers except X30 and PC.
333 // The function also expects 16 bytes of stack already allocated.
334 c.STR(X30, SP, PRE_INDEXED, -16);
335 c.BL(m_load_context);
336 c.LDR(X30, SP, POST_INDEXED, 16);
337
338 // Use X1 as a scratch register to restore X30.
339 c.STR(X1, SP, PRE_INDEXED, -16);
340 c.MRS(X1, oaknut::SystemReg::TPIDR_EL0);
341 c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context));
342 c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30);
343 c.LDR(X1, SP, POST_INDEXED, 16);
344
345 // Unlock the context.
346 this->UnlockContext();
347
348 // Jump back to the instruction after the emulated SVC.
349 this->BranchToModule(module_dest);
350
351 // Store PC after call.
352 c.l(pc_after_svc);
353 this->WriteModulePc(module_dest);
354}
355
356void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
357 oaknut::SystemReg src_reg) {
358 // Retrieve emulated TLS register from GuestContext.
359 c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
360 if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
361 c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0));
362 } else {
363 c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0));
364 }
365
366 // Jump back to the instruction after the emulated MRS.
367 this->BranchToModule(module_dest);
368}
369
370void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) {
371 const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
372 c.STR(scratch_reg, SP, PRE_INDEXED, -16);
373
374 // Save guest value to NativeExecutionParameters::tpidr_el0.
375 c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0);
376 c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0));
377
378 // Restore scratch register.
379 c.LDR(scratch_reg, SP, POST_INDEXED, 16);
380
381 // Jump back to the instruction after the emulated MSR.
382 this->BranchToModule(module_dest);
383}
384
385void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) {
386 static Common::Arm64::NativeClock clock{};
387 const auto factor = clock.GetGuestCNTFRQFactor();
388 const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor);
389
390 const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1;
391 oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0;
392 oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1;
393
394 oaknut::Label factorlo;
395 oaknut::Label factorhi;
396
397 // Save scratches.
398 c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16);
399
400 // Load counter value.
401 c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0);
402
403 // Load scaling factor.
404 c.LDR(scratch0, factorlo);
405 c.LDR(scratch1, factorhi);
406
407 // Multiply low bits and get result.
408 c.UMULH(scratch0, dest_reg, scratch0);
409
410 // Multiply high bits and add low bit result.
411 c.MADD(dest_reg, dest_reg, scratch1, scratch0);
412
413 // Reload scratches.
414 c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
415
416 // Jump back to the instruction after the emulated MRS.
417 this->BranchToModule(module_dest);
418
419 // Scaling factor constant values.
420 c.l(factorlo);
421 c.dx(raw_factor[0]);
422 c.l(factorhi);
423 c.dx(raw_factor[1]);
424}
425
426void Patcher::LockContext() {
427 oaknut::Label retry;
428
429 // Save scratches.
430 c.STP(X0, X1, SP, PRE_INDEXED, -16);
431
432 // Reload lock pointer.
433 c.l(retry);
434 c.CLREX();
435 c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
436 c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
437
438 static_assert(SpinLockLocked == 0);
439
440 // Load-linked with acquire ordering.
441 c.LDAXR(W1, X0);
442
443 // If the value was SpinLockLocked, clear monitor and retry.
444 c.CBZ(W1, retry);
445
446 // Store-conditional SpinLockLocked with relaxed ordering.
447 c.STXR(W1, WZR, X0);
448
449 // If we failed to store, retry.
450 c.CBNZ(W1, retry);
451
452 // We succeeded! Reload scratches.
453 c.LDP(X0, X1, SP, POST_INDEXED, 16);
454}
455
456void Patcher::UnlockContext() {
457 // Save scratches.
458 c.STP(X0, X1, SP, PRE_INDEXED, -16);
459
460 // Load lock pointer.
461 c.MRS(X0, oaknut::SystemReg::TPIDR_EL0);
462 c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock));
463
464 // Load SpinLockUnlocked.
465 c.MOV(W1, SpinLockUnlocked);
466
467 // Store value with release ordering.
468 c.STLR(W1, X0);
469
470 // Load scratches.
471 c.LDP(X0, X1, SP, POST_INDEXED, 16);
472}
473
474} // namespace Core::NCE
diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h
new file mode 100644
index 000000000..c6d1608c1
--- /dev/null
+++ b/src/core/arm/nce/patcher.h
@@ -0,0 +1,98 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <span>
7#include <unordered_map>
8#include <vector>
9#include <oaknut/code_block.hpp>
10#include <oaknut/oaknut.hpp>
11
12#include "common/common_types.h"
13#include "core/hle/kernel/code_set.h"
14#include "core/hle/kernel/k_typed_address.h"
15#include "core/hle/kernel/physical_memory.h"
16
17namespace Core::NCE {
18
19enum class PatchMode : u32 {
20 None,
21 PreText, ///< Patch section is inserted before .text
22 PostData, ///< Patch section is inserted after .data
23};
24
25using ModuleTextAddress = u64;
26using PatchTextAddress = u64;
27using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>;
28
29class Patcher {
30public:
31 explicit Patcher();
32 ~Patcher();
33
34 void PatchText(const Kernel::PhysicalMemory& program_image,
35 const Kernel::CodeSet::Segment& code);
36 void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
37 Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
38 size_t GetSectionSize() const noexcept;
39
40 [[nodiscard]] PatchMode GetPatchMode() const noexcept {
41 return mode;
42 }
43
44private:
45 using ModuleDestLabel = uintptr_t;
46
47 struct Trampoline {
48 ptrdiff_t patch_offset;
49 uintptr_t module_offset;
50 };
51
52 void WriteLoadContext();
53 void WriteSaveContext();
54 void LockContext();
55 void UnlockContext();
56 void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id);
57 void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
58 oaknut::SystemReg src_reg);
59 void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg);
60 void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg);
61
62private:
63 void BranchToPatch(uintptr_t module_dest) {
64 m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
65 }
66
67 void BranchToModule(uintptr_t module_dest) {
68 m_branch_to_module_relocations.push_back({c.offset(), module_dest});
69 c.dw(0);
70 }
71
72 void WriteModulePc(uintptr_t module_dest) {
73 m_write_module_pc_relocations.push_back({c.offset(), module_dest});
74 c.dx(0);
75 }
76
77private:
78 // List of patch instructions we have generated.
79 std::vector<u32> m_patch_instructions{};
80
81 // Relocation type for relative branch from module to patch.
82 struct Relocation {
83 ptrdiff_t patch_offset; ///< Offset in bytes from the start of the patch section.
84 uintptr_t module_offset; ///< Offset in bytes from the start of the text section.
85 };
86
87 oaknut::VectorCodeGenerator c;
88 std::vector<Trampoline> m_trampolines;
89 std::vector<Relocation> m_branch_to_patch_relocations{};
90 std::vector<Relocation> m_branch_to_module_relocations{};
91 std::vector<Relocation> m_write_module_pc_relocations{};
92 std::vector<ModuleTextAddress> m_exclusives{};
93 oaknut::Label m_save_context{};
94 oaknut::Label m_load_context{};
95 PatchMode mode{PatchMode::None};
96};
97
98} // namespace Core::NCE