diff options
Diffstat (limited to 'src/core/arm/nce/patch.cpp')
| -rw-r--r-- | src/core/arm/nce/patch.cpp | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/src/core/arm/nce/patch.cpp b/src/core/arm/nce/patch.cpp new file mode 100644 index 000000000..c79399c2b --- /dev/null +++ b/src/core/arm/nce/patch.cpp | |||
| @@ -0,0 +1,472 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/arm64/native_clock.h" | ||
| 5 | #include "common/bit_cast.h" | ||
| 6 | #include "common/literals.h" | ||
| 7 | #include "core/arm/nce/arm_nce.h" | ||
| 8 | #include "core/arm/nce/guest_context.h" | ||
| 9 | #include "core/arm/nce/instructions.h" | ||
| 10 | #include "core/arm/nce/patch.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "core/core_timing.h" | ||
| 13 | #include "core/hle/kernel/svc.h" | ||
| 14 | |||
| 15 | namespace Core::NCE { | ||
| 16 | |||
| 17 | using namespace Common::Literals; | ||
| 18 | using namespace oaknut::util; | ||
| 19 | |||
| 20 | using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; | ||
| 21 | |||
| 22 | constexpr size_t MaxRelativeBranch = 128_MiB; | ||
| 23 | |||
| 24 | Patcher::Patcher() : c(m_patch_instructions) {} | ||
| 25 | |||
| 26 | Patcher::~Patcher() = default; | ||
| 27 | |||
| 28 | void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, | ||
| 29 | const Kernel::CodeSet::Segment& code) { | ||
| 30 | |||
| 31 | // Write save context helper function. | ||
| 32 | c.l(m_save_context); | ||
| 33 | WriteSaveContext(); | ||
| 34 | |||
| 35 | // Write load context helper function. | ||
| 36 | c.l(m_load_context); | ||
| 37 | WriteLoadContext(); | ||
| 38 | |||
| 39 | // Retrieve text segment data. | ||
| 40 | const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||
| 41 | const auto text_words = | ||
| 42 | std::span<const u32>{reinterpret_cast<const u32*>(text.data()), text.size() / sizeof(u32)}; | ||
| 43 | |||
| 44 | // Loop through instructions, patching as needed. | ||
| 45 | for (u32 i = 0; i < static_cast<u32>(text_words.size()); i++) { | ||
| 46 | const u32 inst = text_words[i]; | ||
| 47 | |||
| 48 | const auto AddRelocations = [&] { | ||
| 49 | const uintptr_t this_offset = i * sizeof(u32); | ||
| 50 | const uintptr_t next_offset = this_offset + sizeof(u32); | ||
| 51 | |||
| 52 | // Relocate from here to patch. | ||
| 53 | this->BranchToPatch(this_offset); | ||
| 54 | |||
| 55 | // Relocate from patch to next instruction. | ||
| 56 | return next_offset; | ||
| 57 | }; | ||
| 58 | |||
| 59 | // SVC | ||
| 60 | if (auto svc = SVC{inst}; svc.Verify()) { | ||
| 61 | WriteSvcTrampoline(AddRelocations(), svc.GetValue()); | ||
| 62 | continue; | ||
| 63 | } | ||
| 64 | |||
| 65 | // MRS Xn, TPIDR_EL0 | ||
| 66 | // MRS Xn, TPIDRRO_EL0 | ||
| 67 | if (auto mrs = MRS{inst}; | ||
| 68 | mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) { | ||
| 69 | const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 | ||
| 70 | : oaknut::SystemReg::TPIDR_EL0; | ||
| 71 | const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())}; | ||
| 72 | WriteMrsHandler(AddRelocations(), dest_reg, src_reg); | ||
| 73 | continue; | ||
| 74 | } | ||
| 75 | |||
| 76 | // MRS Xn, CNTPCT_EL0 | ||
| 77 | if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { | ||
| 78 | WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())}); | ||
| 79 | continue; | ||
| 80 | } | ||
| 81 | |||
| 82 | // MRS Xn, CNTFRQ_EL0 | ||
| 83 | if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) { | ||
| 84 | UNREACHABLE(); | ||
| 85 | } | ||
| 86 | |||
| 87 | // MSR TPIDR_EL0, Xn | ||
| 88 | if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { | ||
| 89 | WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())}); | ||
| 90 | continue; | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | // Determine patching mode for the final relocation step | ||
| 95 | const size_t image_size = program_image.size(); | ||
| 96 | this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; | ||
| 97 | } | ||
| 98 | |||
| 99 | void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, | ||
| 100 | const Kernel::CodeSet::Segment& code, | ||
| 101 | Kernel::PhysicalMemory& program_image, | ||
| 102 | EntryTrampolines* out_trampolines) { | ||
| 103 | const size_t patch_size = SectionSize(); | ||
| 104 | const size_t image_size = program_image.size(); | ||
| 105 | |||
| 106 | // Retrieve text segment data. | ||
| 107 | const auto text = std::span{program_image}.subspan(code.offset, code.size); | ||
| 108 | const auto text_words = | ||
| 109 | std::span<u32>{reinterpret_cast<u32*>(text.data()), text.size() / sizeof(u32)}; | ||
| 110 | |||
| 111 | const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) { | ||
| 112 | oaknut::CodeGenerator rc{target}; | ||
| 113 | if (mode == PatchMode::PreText) { | ||
| 114 | rc.B(rel.patch_offset - patch_size - rel.module_offset); | ||
| 115 | } else { | ||
| 116 | rc.B(image_size - rel.module_offset + rel.patch_offset); | ||
| 117 | } | ||
| 118 | }; | ||
| 119 | |||
| 120 | const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { | ||
| 121 | oaknut::CodeGenerator rc{target}; | ||
| 122 | if (mode == PatchMode::PreText) { | ||
| 123 | rc.B(patch_size - rel.patch_offset + rel.module_offset); | ||
| 124 | } else { | ||
| 125 | rc.B(rel.module_offset - image_size - rel.patch_offset); | ||
| 126 | } | ||
| 127 | }; | ||
| 128 | |||
| 129 | const auto RebasePatch = [&](ptrdiff_t patch_offset) { | ||
| 130 | if (mode == PatchMode::PreText) { | ||
| 131 | return GetInteger(load_base) + patch_offset; | ||
| 132 | } else { | ||
| 133 | return GetInteger(load_base) + image_size + patch_offset; | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | const auto RebasePc = [&](uintptr_t module_offset) { | ||
| 138 | if (mode == PatchMode::PreText) { | ||
| 139 | return GetInteger(load_base) + patch_size + module_offset; | ||
| 140 | } else { | ||
| 141 | return GetInteger(load_base) + module_offset; | ||
| 142 | } | ||
| 143 | }; | ||
| 144 | |||
| 145 | // We are now ready to relocate! | ||
| 146 | for (const Relocation& rel : m_branch_to_patch_relocations) { | ||
| 147 | ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); | ||
| 148 | } | ||
| 149 | for (const Relocation& rel : m_branch_to_module_relocations) { | ||
| 150 | ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), | ||
| 151 | rel); | ||
| 152 | } | ||
| 153 | |||
| 154 | // Rewrite PC constants and record post trampolines | ||
| 155 | for (const Relocation& rel : m_write_module_pc_relocations) { | ||
| 156 | oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; | ||
| 157 | rc.dx(RebasePc(rel.module_offset)); | ||
| 158 | } | ||
| 159 | for (const Trampoline& rel : m_trampolines) { | ||
| 160 | out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); | ||
| 161 | } | ||
| 162 | |||
| 163 | // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. | ||
| 164 | // Convert to ordered to preserve this assumption | ||
| 165 | for (u32 i = 0; i < static_cast<u32>(text_words.size()); i++) { | ||
| 166 | const u32 inst = text_words[i]; | ||
| 167 | if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { | ||
| 168 | text_words[i] = exclusive.AsOrdered(); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | // Copy to program image | ||
| 173 | if (this->mode == PatchMode::PreText) { | ||
| 174 | std::memcpy(program_image.data(), m_patch_instructions.data(), | ||
| 175 | m_patch_instructions.size() * sizeof(u32)); | ||
| 176 | } else { | ||
| 177 | program_image.resize(image_size + patch_size); | ||
| 178 | std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), | ||
| 179 | m_patch_instructions.size() * sizeof(u32)); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | size_t Patcher::SectionSize() const noexcept { | ||
| 184 | return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); | ||
| 185 | } | ||
| 186 | |||
| 187 | void Patcher::WriteLoadContext() { | ||
| 188 | // This function was called, which modifies X30, so use that as a scratch register. | ||
| 189 | // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes | ||
| 190 | // of stack. | ||
| 191 | c.STR(X30, SP, 8); | ||
| 192 | c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||
| 193 | c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||
| 194 | |||
| 195 | // Load system registers. | ||
| 196 | c.LDR(W0, X30, offsetof(GuestContext, fpsr)); | ||
| 197 | c.MSR(oaknut::SystemReg::FPSR, X0); | ||
| 198 | c.LDR(W0, X30, offsetof(GuestContext, fpcr)); | ||
| 199 | c.MSR(oaknut::SystemReg::FPCR, X0); | ||
| 200 | c.LDR(W0, X30, offsetof(GuestContext, nzcv)); | ||
| 201 | c.MSR(oaknut::SystemReg::NZCV, X0); | ||
| 202 | |||
| 203 | // Load all vector registers. | ||
| 204 | static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||
| 205 | for (int i = 0; i <= 30; i += 2) { | ||
| 206 | c.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||
| 207 | } | ||
| 208 | |||
| 209 | // Load all general-purpose registers except X30. | ||
| 210 | for (int i = 0; i <= 28; i += 2) { | ||
| 211 | c.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||
| 212 | } | ||
| 213 | |||
| 214 | // Reload our return X30 from the stack and return. | ||
| 215 | // The patch code will reload the guest X30 for us. | ||
| 216 | c.LDR(X30, SP, 8); | ||
| 217 | c.RET(); | ||
| 218 | } | ||
| 219 | |||
| 220 | void Patcher::WriteSaveContext() { | ||
| 221 | // This function was called, which modifies X30, so use that as a scratch register. | ||
| 222 | // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of | ||
| 223 | // stack. | ||
| 224 | c.STR(X30, SP, 8); | ||
| 225 | c.MRS(X30, oaknut::SystemReg::TPIDR_EL0); | ||
| 226 | c.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); | ||
| 227 | |||
| 228 | // Store all general-purpose registers except X30. | ||
| 229 | for (int i = 0; i <= 28; i += 2) { | ||
| 230 | c.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); | ||
| 231 | } | ||
| 232 | |||
| 233 | // Store all vector registers. | ||
| 234 | static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); | ||
| 235 | for (int i = 0; i <= 30; i += 2) { | ||
| 236 | c.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); | ||
| 237 | } | ||
| 238 | |||
| 239 | // Store guest system registers, X30 and SP, using X0 as a scratch register. | ||
| 240 | c.STR(X0, SP, PRE_INDEXED, -16); | ||
| 241 | c.LDR(X0, SP, 16); | ||
| 242 | c.STR(X0, X30, 8 * 30); | ||
| 243 | c.ADD(X0, SP, 32); | ||
| 244 | c.STR(X0, X30, offsetof(GuestContext, sp)); | ||
| 245 | c.MRS(X0, oaknut::SystemReg::FPSR); | ||
| 246 | c.STR(W0, X30, offsetof(GuestContext, fpsr)); | ||
| 247 | c.MRS(X0, oaknut::SystemReg::FPCR); | ||
| 248 | c.STR(W0, X30, offsetof(GuestContext, fpcr)); | ||
| 249 | c.MRS(X0, oaknut::SystemReg::NZCV); | ||
| 250 | c.STR(W0, X30, offsetof(GuestContext, nzcv)); | ||
| 251 | c.LDR(X0, SP, POST_INDEXED, 16); | ||
| 252 | |||
| 253 | // Reload our return X30 from the stack, and return. | ||
| 254 | c.LDR(X30, SP, 8); | ||
| 255 | c.RET(); | ||
| 256 | } | ||
| 257 | |||
| 258 | void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { | ||
| 259 | LOG_ERROR(Core_ARM, "Patching SVC {:#x} at {:#x}", svc_id, module_dest - 4); | ||
| 260 | // We are about to start saving state, so we need to lock the context. | ||
| 261 | this->LockContext(); | ||
| 262 | |||
| 263 | // Store guest X30 to the stack. Then, save the context and restore the stack. | ||
| 264 | // This will save all registers except PC, but we know PC at patch time. | ||
| 265 | c.STR(X30, SP, PRE_INDEXED, -16); | ||
| 266 | c.BL(m_save_context); | ||
| 267 | c.LDR(X30, SP, POST_INDEXED, 16); | ||
| 268 | |||
| 269 | // Now that we've saved all registers, we can use any registers as scratch. | ||
| 270 | // Store PC + 4 to arm interface, since we know the instruction offset from the entry point. | ||
| 271 | oaknut::Label pc_after_svc; | ||
| 272 | c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||
| 273 | c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||
| 274 | c.LDR(X2, pc_after_svc); | ||
| 275 | c.STR(X2, X1, offsetof(GuestContext, pc)); | ||
| 276 | |||
| 277 | // Store SVC number to execute when we return | ||
| 278 | c.MOV(X2, svc_id); | ||
| 279 | c.STR(W2, X1, offsetof(GuestContext, svc_swi)); | ||
| 280 | |||
| 281 | // We are calling a SVC. Clear esr_el1 and return it. | ||
| 282 | static_assert(std::is_same_v<std::underlying_type_t<HaltReason>, u64>); | ||
| 283 | oaknut::Label retry; | ||
| 284 | c.ADD(X2, X1, offsetof(GuestContext, esr_el1)); | ||
| 285 | c.l(retry); | ||
| 286 | c.LDAXR(X0, X2); | ||
| 287 | c.STLXR(W3, XZR, X2); | ||
| 288 | c.CBNZ(W3, retry); | ||
| 289 | |||
| 290 | // Add "calling SVC" flag. Since this is X0, this is now our return value. | ||
| 291 | c.ORR(X0, X0, static_cast<u64>(HaltReason::SupervisorCall)); | ||
| 292 | |||
| 293 | // Offset the GuestContext pointer to the HostContext member. | ||
| 294 | // STP has limited range of [-512, 504] which we can't reach otherwise | ||
| 295 | // NB: Due to this all offsets below are from the start of HostContext. | ||
| 296 | c.ADD(X1, X1, offsetof(GuestContext, host_ctx)); | ||
| 297 | |||
| 298 | // Reload host TPIDR_EL0 and SP. | ||
| 299 | static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); | ||
| 300 | c.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); | ||
| 301 | c.MOV(SP, X2); | ||
| 302 | c.MSR(oaknut::SystemReg::TPIDR_EL0, X3); | ||
| 303 | |||
| 304 | // Load callee-saved host registers and return to host. | ||
| 305 | static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); | ||
| 306 | static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); | ||
| 307 | c.LDP(X19, X20, X1, HOST_REGS_OFF); | ||
| 308 | c.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); | ||
| 309 | c.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); | ||
| 310 | c.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); | ||
| 311 | c.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); | ||
| 312 | c.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); | ||
| 313 | c.LDP(Q8, Q9, X1, HOST_VREGS_OFF); | ||
| 314 | c.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); | ||
| 315 | c.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); | ||
| 316 | c.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); | ||
| 317 | c.RET(); | ||
| 318 | |||
| 319 | // Write the post-SVC trampoline address, which will jump back to the guest after restoring its | ||
| 320 | // state. | ||
| 321 | m_trampolines.push_back({c.offset(), module_dest}); | ||
| 322 | |||
| 323 | // Host called this location. Save the return address so we can | ||
| 324 | // unwind the stack properly when jumping back. | ||
| 325 | c.MRS(X2, oaknut::SystemReg::TPIDR_EL0); | ||
| 326 | c.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); | ||
| 327 | c.ADD(X0, X2, offsetof(GuestContext, host_ctx)); | ||
| 328 | c.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); | ||
| 329 | |||
| 330 | // Reload all guest registers except X30 and PC. | ||
| 331 | // The function also expects 16 bytes of stack already allocated. | ||
| 332 | c.STR(X30, SP, PRE_INDEXED, -16); | ||
| 333 | c.BL(m_load_context); | ||
| 334 | c.LDR(X30, SP, POST_INDEXED, 16); | ||
| 335 | |||
| 336 | // Use X1 as a scratch register to restore X30. | ||
| 337 | c.STR(X1, SP, PRE_INDEXED, -16); | ||
| 338 | c.MRS(X1, oaknut::SystemReg::TPIDR_EL0); | ||
| 339 | c.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); | ||
| 340 | c.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); | ||
| 341 | c.LDR(X1, SP, POST_INDEXED, 16); | ||
| 342 | |||
| 343 | // Unlock the context. | ||
| 344 | this->UnlockContext(); | ||
| 345 | |||
| 346 | // Jump back to the instruction after the emulated SVC. | ||
| 347 | this->BranchToModule(module_dest); | ||
| 348 | |||
| 349 | // Store PC after call. | ||
| 350 | c.l(pc_after_svc); | ||
| 351 | this->WriteModulePc(module_dest); | ||
| 352 | } | ||
| 353 | |||
| 354 | void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, | ||
| 355 | oaknut::SystemReg src_reg) { | ||
| 356 | // Retrieve emulated TLS register from GuestContext. | ||
| 357 | c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); | ||
| 358 | if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { | ||
| 359 | c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0)); | ||
| 360 | } else { | ||
| 361 | c.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||
| 362 | } | ||
| 363 | |||
| 364 | // Jump back to the instruction after the emulated MRS. | ||
| 365 | this->BranchToModule(module_dest); | ||
| 366 | } | ||
| 367 | |||
| 368 | void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { | ||
| 369 | const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; | ||
| 370 | c.STR(scratch_reg, SP, PRE_INDEXED, -16); | ||
| 371 | |||
| 372 | // Save guest value to NativeExecutionParameters::tpidr_el0. | ||
| 373 | c.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0); | ||
| 374 | c.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0)); | ||
| 375 | |||
| 376 | // Restore scratch register. | ||
| 377 | c.LDR(scratch_reg, SP, POST_INDEXED, 16); | ||
| 378 | |||
| 379 | // Jump back to the instruction after the emulated MSR. | ||
| 380 | this->BranchToModule(module_dest); | ||
| 381 | } | ||
| 382 | |||
| 383 | void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { | ||
| 384 | static Common::Arm64::NativeClock clock{}; | ||
| 385 | const auto factor = clock.GetGuestCNTFRQFactor(); | ||
| 386 | const auto raw_factor = Common::BitCast<std::array<u64, 2>>(factor); | ||
| 387 | |||
| 388 | const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1; | ||
| 389 | oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0; | ||
| 390 | oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1; | ||
| 391 | |||
| 392 | oaknut::Label factorlo; | ||
| 393 | oaknut::Label factorhi; | ||
| 394 | |||
| 395 | // Save scratches. | ||
| 396 | c.STP(scratch0, scratch1, SP, PRE_INDEXED, -16); | ||
| 397 | |||
| 398 | // Load counter value. | ||
| 399 | c.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0); | ||
| 400 | |||
| 401 | // Load scaling factor. | ||
| 402 | c.LDR(scratch0, factorlo); | ||
| 403 | c.LDR(scratch1, factorhi); | ||
| 404 | |||
| 405 | // Multiply low bits and get result. | ||
| 406 | c.UMULH(scratch0, dest_reg, scratch0); | ||
| 407 | |||
| 408 | // Multiply high bits and add low bit result. | ||
| 409 | c.MADD(dest_reg, dest_reg, scratch1, scratch0); | ||
| 410 | |||
| 411 | // Reload scratches. | ||
| 412 | c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); | ||
| 413 | |||
| 414 | // Jump back to the instruction after the emulated MRS. | ||
| 415 | this->BranchToModule(module_dest); | ||
| 416 | |||
| 417 | // Scaling factor constant values. | ||
| 418 | c.l(factorlo); | ||
| 419 | c.dx(raw_factor[0]); | ||
| 420 | c.l(factorhi); | ||
| 421 | c.dx(raw_factor[1]); | ||
| 422 | } | ||
| 423 | |||
| 424 | void Patcher::LockContext() { | ||
| 425 | oaknut::Label retry; | ||
| 426 | |||
| 427 | // Save scratches. | ||
| 428 | c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||
| 429 | |||
| 430 | // Reload lock pointer. | ||
| 431 | c.l(retry); | ||
| 432 | c.CLREX(); | ||
| 433 | c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||
| 434 | c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||
| 435 | |||
| 436 | static_assert(SpinLockLocked == 0); | ||
| 437 | |||
| 438 | // Load-linked with acquire ordering. | ||
| 439 | c.LDAXR(W1, X0); | ||
| 440 | |||
| 441 | // If the value was SpinLockLocked, clear monitor and retry. | ||
| 442 | c.CBZ(W1, retry); | ||
| 443 | |||
| 444 | // Store-conditional SpinLockLocked with relaxed ordering. | ||
| 445 | c.STXR(W1, WZR, X0); | ||
| 446 | |||
| 447 | // If we failed to store, retry. | ||
| 448 | c.CBNZ(W1, retry); | ||
| 449 | |||
| 450 | // We succeeded! Reload scratches. | ||
| 451 | c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||
| 452 | } | ||
| 453 | |||
| 454 | void Patcher::UnlockContext() { | ||
| 455 | // Save scratches. | ||
| 456 | c.STP(X0, X1, SP, PRE_INDEXED, -16); | ||
| 457 | |||
| 458 | // Load lock pointer. | ||
| 459 | c.MRS(X0, oaknut::SystemReg::TPIDR_EL0); | ||
| 460 | c.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); | ||
| 461 | |||
| 462 | // Load SpinLockUnlocked. | ||
| 463 | c.MOV(W1, SpinLockUnlocked); | ||
| 464 | |||
| 465 | // Store value with release ordering. | ||
| 466 | c.STLR(W1, X0); | ||
| 467 | |||
| 468 | // Load scratches. | ||
| 469 | c.LDP(X0, X1, SP, POST_INDEXED, 16); | ||
| 470 | } | ||
| 471 | |||
| 472 | } // namespace Core::NCE | ||