diff options
| author | 2023-11-30 09:20:55 -0500 | |
|---|---|---|
| committer | 2023-11-30 09:20:55 -0500 | |
| commit | 57a391e71db13ade7a3d96f59d53781eff18d2ac (patch) | |
| tree | 0b4223de40a2d77598ac9095b1374353c2e9da7c /src/core/arm/nce/patcher.cpp | |
| parent | Merge pull request #12223 from liamwhite/fruit-company (diff) | |
| parent | core: Rename patcher file (diff) | |
| download | yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.tar.gz yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.tar.xz yuzu-57a391e71db13ade7a3d96f59d53781eff18d2ac.zip | |
Merge pull request #12074 from GPUCode/yuwu-on-the-metal
Implement Native Code Execution (NCE)
Diffstat (limited to 'src/core/arm/nce/patcher.cpp')
| -rw-r--r-- | src/core/arm/nce/patcher.cpp | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp new file mode 100644 index 000000000..ec8527224 --- /dev/null +++ b/src/core/arm/nce/patcher.cpp | |||
| @@ -0,0 +1,474 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/arm64/native_clock.h" | ||
| 5 | #include "common/bit_cast.h" | ||
| 6 | #include "common/literals.h" | ||
| 7 | #include "core/arm/nce/arm_nce.h" | ||
| 8 | #include "core/arm/nce/guest_context.h" | ||
| 9 | #include "core/arm/nce/instructions.h" | ||
| 10 | #include "core/arm/nce/patcher.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "core/core_timing.h" | ||
| 13 | #include "core/hle/kernel/svc.h" | ||
| 14 | |||
| 15 | namespace Core::NCE { | ||
| 16 | |||
| 17 | using namespace Common::Literals; | ||
| 18 | using namespace oaknut::util; | ||
| 19 | |||
| 20 | using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; | ||
| 21 | |||
| 22 | constexpr size_t MaxRelativeBranch = 128_MiB; | ||
| 23 | constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); | ||
| 24 | |||
| 25 | Patcher::Patcher() : c(m_patch_instructions) {} | ||
| 26 | |||
| 27 | Patcher::~Patcher() = default; | ||
| 28 | |||
| 29 | void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, | ||
| 30 | const Kernel::CodeSet::Segment& code) { | ||
| 31 | |||
| 32 | // Write save context helper function. | ||
| 33 | c.l(m_save_context); | ||
| 34 | WriteSaveContext(); | ||
| 35 | |||
| 36 | // Write load context helper function. | ||
| 37 | c.l(m_load_context); | ||
| 38 | WriteLoadContext(); | ||
| 39 | |||
| 40 | // Retrieve text segment data. | ||
| 41 | const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||
| 42 | const auto text_words = | ||
| 43 | std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)}; | ||
| 44 | |||
| 45 | // Loop through instructions, patching as needed. | ||
| 46 | for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) { | ||
| 47 | const u32 inst = text_words[i]; | ||
| 48 | |||
| 49 | const auto AddRelocations = [&] { | ||
| 50 | const uintptr_t this_offset = i * sizeof(u32); | ||
| 51 | const uintptr_t next_offset = this_offset + sizeof(u32); | ||
| 52 | |||
| 53 | // Relocate from here to patch. | ||
| 54 | this->BranchToPatch(this_offset); | ||
| 55 | |||
| 56 | // Relocate from patch to next instruction. | ||
| 57 | return next_offset; | ||
| 58 | }; | ||
| 59 | |||
| 60 | // SVC | ||
| 61 | if (auto svc = SVC{inst}; svc.Verify()) { | ||
| 62 | WriteSvcTrampoline(AddRelocations(), svc.GetValue()); | ||
| 63 | continue; | ||
| 64 | } | ||
| 65 | |||
| 66 | // MRS Xn, TPIDR_EL0 | ||
| 67 | // MRS Xn, TPIDRRO_EL0 | ||
| 68 | if (auto mrs = MRS{inst}; | ||
| 69 | mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) { | ||
| 70 | const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 | ||
| 71 | : oaknut::SystemReg::TPIDR_EL0; | ||
| 72 | const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())}; | ||
| 73 | WriteMrsHandler(AddRelocations(), dest_reg, src_reg); | ||
| 74 | continue; | ||
| 75 | } | ||
| 76 | |||
| 77 | // MRS Xn, CNTPCT_EL0 | ||
| 78 | if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { | ||
| 79 | WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())}); | ||
| 80 | continue; | ||
| 81 | } | ||
| 82 | |||
| 83 | // MRS Xn, CNTFRQ_EL0 | ||
| 84 | if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) { | ||
| 85 | UNREACHABLE(); | ||
| 86 | } | ||
| 87 | |||
| 88 | // MSR TPIDR_EL0, Xn | ||
| 89 | if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { | ||
| 90 | WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())}); | ||
| 91 | continue; | ||
| 92 | } | ||
| 93 | |||
| 94 | if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { | ||
| 95 | m_exclusives.push_back(i); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | // Determine patching mode for the final relocation step | ||
| 100 | const size_t image_size = program_image.size(); | ||
| 101 | this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; | ||
| 102 | } | ||
| 103 | |||
| 104 | void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, | ||
| 105 | const Kernel::CodeSet::Segment& code, | ||
| 106 | Kernel::PhysicalMemory& program_image, | ||
| 107 | EntryTrampolines* out_trampolines) { | ||
| 108 | const size_t patch_size = GetSectionSize(); | ||
| 109 | const size_t image_size = program_image.size(); | ||
| 110 | |||
| 111 | // Retrieve text segment data. | ||
| 112 | const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||
| 113 | const auto text_words = | ||
| 114 | std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)}; | ||
| 115 | |||
| 116 | const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) { | ||
| 117 | oaknut::CodeGenerator rc{target}; | ||
| 118 | if (mode == PatchMode::PreText) { | ||
| 119 | rc.B(rel.patch_offset - patch_size - rel.module_offset); | ||
| 120 | } else { | ||
| 121 | rc.B(image_size - rel.module_offset + rel.patch_offset); | ||
| 122 | } | ||
| 123 | }; | ||
| 124 | |||
| 125 | const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { | ||
| 126 | oaknut::CodeGenerator rc{target}; | ||
| 127 | if (mode == PatchMode::PreText) { | ||
| 128 | rc.B(patch_size - rel.patch_offset + rel.module_offset); | ||
| 129 | } else { | ||
| 130 | rc.B(rel.module_offset - image_size - rel.patch_offset); | ||
| 131 | } | ||
| 132 | }; | ||
| 133 | |||
| 134 | const auto RebasePatch = [&](ptrdiff_t patch_offset) { | ||
| 135 | if (mode == PatchMode::PreText) { | ||
| 136 | return GetInteger(load_base) + patch_offset; | ||
| 137 | } else { | ||
| 138 | return GetInteger(load_base) + image_size + patch_offset; | ||
| 139 | } | ||
| 140 | }; | ||
| 141 | |||
| 142 | const auto RebasePc = [&](uintptr_t module_offset) { | ||
| 143 | if (mode == PatchMode::PreText) { | ||
| 144 | return GetInteger(load_base) + patch_size + module_offset; | ||
| 145 | } else { | ||
| 146 | return GetInteger(load_base) + module_offset; | ||
| 147 | } | ||
| 148 | }; | ||
| 149 | |||
| 150 | // We are now ready to relocate! | ||
| 151 | for (const Relocation& rel : m_branch_to_patch_relocations) { | ||
| 152 | ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); | ||
| 153 | } | ||
| 154 | for (const Relocation& rel : m_branch_to_module_relocations) { | ||
| 155 | ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), | ||
| 156 | rel); | ||
| 157 | } | ||
| 158 | |||
| 159 | // Rewrite PC constants and record post trampolines | ||
| 160 | for (const Relocation& rel : m_write_module_pc_relocations) { | ||
| 161 | oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; | ||
| 162 | rc.dx(RebasePc(rel.module_offset)); | ||
| 163 | } | ||
| 164 | for (const Trampoline& rel : m_trampolines) { | ||
| 165 | out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); | ||
| 166 | } | ||
| 167 | |||
| 168 | // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. | ||
| 169 | // Convert to ordered to preserve this assumption. | ||
| 170 | for (const ModuleTextAddress i : m_exclusives) { | ||
| 171 | auto exclusive = Exclusive{text_words[i]}; | ||
| 172 | text_words[i] = exclusive.AsOrdered(); | ||
| 173 | } | ||
| 174 | |||
| 175 | // Copy to program image | ||
| 176 | if (this->mode == PatchMode::PreText) { | ||
| 177 | std::memcpy(program_image.data(), m_patch_instructions.data(), | ||
| 178 | m_patch_instructions.size() * sizeof(u32)); | ||
| 179 | } else { | ||
| 180 | program_image.resize(image_size + patch_size); | ||
| 181 | std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), | ||
| 182 | m_patch_instructions.size() * sizeof(u32)); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | size_t Patcher::GetSectionSize() const noexcept { | ||
| 187 | return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); | ||
| 188 | } | ||
| 189 | |||
| 190 | void Patcher::WriteLoadContext() { | ||
| 191 | // This function was called, which modifies X30, so use that as a scratch register. | ||
| 192 | // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes | ||
| 193 | // of stack. | ||
| 194 | c.STR(X30, SP, 8); | ||
| 195 | c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||
| 196 | c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||
| 197 | |||
| 198 | // Load system registers. | ||
| 199 | c.LDR(W0, X30, offsetof(GuestContext, fpsr)); | ||
| 200 | c.MSR(oaknut::SystemReg::FPSR, X0); | ||
| 201 | c.LDR(W0, X30, offsetof(GuestContext, fpcr)); | ||
| 202 | c.MSR(oaknut::SystemReg::FPCR, X0); | ||
| 203 | c.LDR(W0, X30, offsetof(GuestContext, nzcv)); | ||
| 204 | c.MSR(oaknut::SystemReg::NZCV, X0); | ||
| 205 | |||
| 206 | // Load all vector registers. | ||
| 207 | static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||
| 208 | for (int i = 0; i <= 30; i += 2) { | ||
| 209 | c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||
| 210 | } | ||
| 211 | |||
| 212 | // Load all general-purpose registers except X30. | ||
| 213 | for (int i = 0; i <= 28; i += 2) { | ||
| 214 | c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||
| 215 | } | ||
| 216 | |||
| 217 | // Reload our return X30 from the stack and return. | ||
| 218 | // The patch code will reload the guest X30 for us. | ||
| 219 | c.LDR(X30, SP, 8); | ||
| 220 | c.RET(); | ||
| 221 | } | ||
| 222 | |||
| 223 | void Patcher::WriteSaveContext() { | ||
| 224 | // This function was called, which modifies X30, so use that as a scratch register. | ||
| 225 | // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of | ||
| 226 | // stack. | ||
| 227 | c.STR(X30, SP, 8); | ||
| 228 | c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||
| 229 | c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||
| 230 | |||
| 231 | // Store all general-purpose registers except X30. | ||
| 232 | for (int i = 0; i <= 28; i += 2) { | ||
| 233 | c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||
| 234 | } | ||
| 235 | |||
| 236 | // Store all vector registers. | ||
| 237 | static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||
| 238 | for (int i = 0; i <= 30; i += 2) { | ||
| 239 | c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||
| 240 | } | ||
| 241 | |||
| 242 | // Store guest system registers, X30 and SP, using X0 as a scratch register. | ||
| 243 | c.STR(X0, SP, PRE_INDEXED, -16); | ||
| 244 | c.LDR(X0, SP, 16); | ||
| 245 | c.STR(X0, X30, 8 * 30); | ||
| 246 | c.ADD(X0, SP, 32); | ||
| 247 | c.STR(X0, X30, offsetof(GuestContext, sp)); | ||
| 248 | c.MRS(X0, oaknut::SystemReg::FPSR); | ||
| 249 | c.STR(W0, X30, offsetof(GuestContext, fpsr)); | ||
| 250 | c.MRS(X0, oaknut::SystemReg::FPCR); | ||
| 251 | c.STR(W0, X30, offsetof(GuestContext, fpcr)); | ||
| 252 | c.MRS(X0, oaknut::SystemReg::NZCV); | ||
| 253 | c.STR(W0, X30, offsetof(GuestContext, nzcv)); | ||
| 254 | c.LDR(X0, SP, POST_INDEXED, 16); | ||
| 255 | |||
| 256 | // Reload our return X30 from the stack, and return. | ||
| 257 | c.LDR(X30, SP, 8); | ||
| 258 | c.RET(); | ||
| 259 | } | ||
| 260 | |||
| 261 | void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { | ||
| 262 | // We are about to start saving state, so we need to lock the context. | ||
| 263 | this->LockContext(); | ||
| 264 | |||
| 265 | // Store guest X30 to the stack. Then, save the context and restore the stack. | ||
| 266 | // This will save all registers except PC, but we know PC at patch time. | ||
| 267 | c.STR(X30, SP, PRE_INDEXED, -16); | ||
| 268 | c.BL(m_save_context); | ||
| 269 | c.LDR(X30, SP, POST_INDEXED, 16); | ||
| 270 | |||
| 271 | // Now that we've saved all registers, we can use any registers as scratch. | ||
| 272 | // Store PC + 4 to arm interface, since we know the instruction offset from the entry point. | ||
| 273 | oaknut::Label pc_after_svc; | ||
| 274 | c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||
| 275 | c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||
| 276 | c.LDR(X2, pc_after_svc); | ||
| 277 | c.STR(X2, X1, offsetof(GuestContext, pc)); | ||
| 278 | |||
| 279 | // Store SVC number to execute when we return | ||
| 280 | c.MOV(X2, svc_id); | ||
| 281 | c.STR(W2, X1, offsetof(GuestContext, svc_swi)); | ||
| 282 | |||
| 283 | // We are calling a SVC. Clear esr_el1 and return it. | ||
| 284 | static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>); | ||
| 285 | oaknut::Label retry; | ||
| 286 | c.ADD(X2, X1, offsetof(GuestContext, esr_el1)); | ||
| 287 | c.l(retry); | ||
| 288 | c.LDAXR(X0, X2); | ||
| 289 | c.STLXR(W3, XZR, X2); | ||
| 290 | c.CBNZ(W3, retry); | ||
| 291 | |||
| 292 | // Add "calling SVC" flag. Since this is X0, this is now our return value. | ||
| 293 | c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall)); | ||
| 294 | |||
| 295 | // Offset the GuestContext pointer to the HostContext member. | ||
| 296 | // STP has limited range of [-512, 504] which we can't reach otherwise | ||
| 297 | // NB: Due to this all offsets below are from the start of HostContext. | ||
| 298 | c.ADD(X1, X1, offsetof(GuestContext, host_ctx)); | ||
| 299 | |||
| 300 | // Reload host TPIDR_EL0 and SP. | ||
| 301 | static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); | ||
| 302 | c.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); | ||
| 303 | c.MOV(SP, X2); | ||
| 304 | c.MSR(oaknut::SystemReg::TPIDR_EL0, X3); | ||
| 305 | |||
| 306 | // Load callee-saved host registers and return to host. | ||
| 307 | static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); | ||
| 308 | static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); | ||
| 309 | c.LDP(X19, X20, X1, HOST_REGS_OFF); | ||
| 310 | c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); | ||
| 311 | c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); | ||
| 312 | c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); | ||
| 313 | c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); | ||
| 314 | c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); | ||
| 315 | c.LDP(Q8, Q9, X1, HOST_VREGS_OFF); | ||
| 316 | c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); | ||
| 317 | c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); | ||
| 318 | c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); | ||
| 319 | c.RET(); | ||
| 320 | |||
| 321 | // Write the post-SVC trampoline address, which will jump back to the guest after restoring its | ||
| 322 | // state. | ||
| 323 | m_trampolines.push_back({c.offset(), module_dest}); | ||
| 324 | |||
| 325 | // Host called this location. Save the return address so we can | ||
| 326 | // unwind the stack properly when jumping back. | ||
| 327 | c.MRS(X2, oaknut::SystemReg::TPIDR_EL0); | ||
| 328 | c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); | ||
| 329 | c.ADD(X0, X2, offsetof(GuestContext, host_ctx)); | ||
| 330 | c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); | ||
| 331 | |||
| 332 | // Reload all guest registers except X30 and PC. | ||
| 333 | // The function also expects 16 bytes of stack already allocated. | ||
| 334 | c.STR(X30, SP, PRE_INDEXED, -16); | ||
| 335 | c.BL(m_load_context); | ||
| 336 | c.LDR(X30, SP, POST_INDEXED, 16); | ||
| 337 | |||
| 338 | // Use X1 as a scratch register to restore X30. | ||
| 339 | c.STR(X1, SP, PRE_INDEXED, -16); | ||
| 340 | c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||
| 341 | c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||
| 342 | c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); | ||
| 343 | c.LDR(X1, SP, POST_INDEXED, 16); | ||
| 344 | |||
| 345 | // Unlock the context. | ||
| 346 | this->UnlockContext(); | ||
| 347 | |||
| 348 | // Jump back to the instruction after the emulated SVC. | ||
| 349 | this->BranchToModule(module_dest); | ||
| 350 | |||
| 351 | // Store PC after call. | ||
| 352 | c.l(pc_after_svc); | ||
| 353 | this->WriteModulePc(module_dest); | ||
| 354 | } | ||
| 355 | |||
| 356 | void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, | ||
| 357 | oaknut::SystemReg src_reg) { | ||
| 358 | // Retrieve emulated TLS register from GuestContext. | ||
| 359 | c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); | ||
| 360 | if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { | ||
| 361 | c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0)); | ||
| 362 | } else { | ||
| 363 | c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||
| 364 | } | ||
| 365 | |||
| 366 | // Jump back to the instruction after the emulated MRS. | ||
| 367 | this->BranchToModule(module_dest); | ||
| 368 | } | ||
| 369 | |||
| 370 | void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { | ||
| 371 | const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; | ||
| 372 | c.STR(scratch_reg, SP, PRE_INDEXED, -16); | ||
| 373 | |||
| 374 | // Save guest value to NativeExecutionParameters::tpidr_el0. | ||
| 375 | c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0); | ||
| 376 | c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||
| 377 | |||
| 378 | // Restore scratch register. | ||
| 379 | c.LDR(scratch_reg, SP, POST_INDEXED, 16); | ||
| 380 | |||
| 381 | // Jump back to the instruction after the emulated MSR. | ||
| 382 | this->BranchToModule(module_dest); | ||
| 383 | } | ||
| 384 | |||
| 385 | void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { | ||
| 386 | static Common::Arm64::NativeClock clock{}; | ||
| 387 | const auto factor = clock.GetGuestCNTFRQFactor(); | ||
| 388 | const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor); | ||
| 389 | |||
| 390 | const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1; | ||
| 391 | oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0; | ||
| 392 | oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1; | ||
| 393 | |||
| 394 | oaknut::Label factorlo; | ||
| 395 | oaknut::Label factorhi; | ||
| 396 | |||
| 397 | // Save scratches. | ||
| 398 | c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16); | ||
| 399 | |||
| 400 | // Load counter value. | ||
| 401 | c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0); | ||
| 402 | |||
| 403 | // Load scaling factor. | ||
| 404 | c.LDR(scratch0, factorlo); | ||
| 405 | c.LDR(scratch1, factorhi); | ||
| 406 | |||
| 407 | // Multiply low bits and get result. | ||
| 408 | c.UMULH(scratch0, dest_reg, scratch0); | ||
| 409 | |||
| 410 | // Multiply high bits and add low bit result. | ||
| 411 | c.MADD(dest_reg, dest_reg, scratch1, scratch0); | ||
| 412 | |||
| 413 | // Reload scratches. | ||
| 414 | c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); | ||
| 415 | |||
| 416 | // Jump back to the instruction after the emulated MRS. | ||
| 417 | this->BranchToModule(module_dest); | ||
| 418 | |||
| 419 | // Scaling factor constant values. | ||
| 420 | c.l(factorlo); | ||
| 421 | c.dx(raw_factor[0]); | ||
| 422 | c.l(factorhi); | ||
| 423 | c.dx(raw_factor[1]); | ||
| 424 | } | ||
| 425 | |||
| 426 | void Patcher::LockContext() { | ||
| 427 | oaknut::Label retry; | ||
| 428 | |||
| 429 | // Save scratches. | ||
| 430 | c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||
| 431 | |||
| 432 | // Reload lock pointer. | ||
| 433 | c.l(retry); | ||
| 434 | c.CLREX(); | ||
| 435 | c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||
| 436 | c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||
| 437 | |||
| 438 | static_assert(SpinLockLocked == 0); | ||
| 439 | |||
| 440 | // Load-linked with acquire ordering. | ||
| 441 | c.LDAXR(W1, X0); | ||
| 442 | |||
| 443 | // If the value was SpinLockLocked, clear monitor and retry. | ||
| 444 | c.CBZ(W1, retry); | ||
| 445 | |||
| 446 | // Store-conditional SpinLockLocked with relaxed ordering. | ||
| 447 | c.STXR(W1, WZR, X0); | ||
| 448 | |||
| 449 | // If we failed to store, retry. | ||
| 450 | c.CBNZ(W1, retry); | ||
| 451 | |||
| 452 | // We succeeded! Reload scratches. | ||
| 453 | c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||
| 454 | } | ||
| 455 | |||
| 456 | void Patcher::UnlockContext() { | ||
| 457 | // Save scratches. | ||
| 458 | c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||
| 459 | |||
| 460 | // Load lock pointer. | ||
| 461 | c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||
| 462 | c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||
| 463 | |||
| 464 | // Load SpinLockUnlocked. | ||
| 465 | c.MOV(W1, SpinLockUnlocked); | ||
| 466 | |||
| 467 | // Store value with release ordering. | ||
| 468 | c.STLR(W1, X0); | ||
| 469 | |||
| 470 | // Load scratches. | ||
| 471 | c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||
| 472 | } | ||
| 473 | |||
| 474 | } // namespace Core::NCE | ||