summaryrefslogtreecommitdiff
path: root/src/common/x64
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/x64')
-rw-r--r--src/common/x64/cpu_detect.cpp38
-rw-r--r--src/common/x64/cpu_detect.h13
-rw-r--r--src/common/x64/native_clock.cpp103
-rw-r--r--src/common/x64/native_clock.h48
-rw-r--r--src/common/x64/xbyak_abi.h229
-rw-r--r--src/common/x64/xbyak_util.h47
6 files changed, 478 insertions, 0 deletions
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..fccd2eee5 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); 62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); 63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); 64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
65 if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
66 caps.manufacturer = Manufacturer::Intel;
67 else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
68 caps.manufacturer = Manufacturer::AMD;
69 else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
70 caps.manufacturer = Manufacturer::Hygon;
71 else
72 caps.manufacturer = Manufacturer::Unknown;
73
74 u32 family = {};
75 u32 model = {};
65 76
66 __cpuid(cpu_id, 0x80000000); 77 __cpuid(cpu_id, 0x80000000);
67 78
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
73 // Detect family and other miscellaneous features 84 // Detect family and other miscellaneous features
74 if (max_std_fn >= 1) { 85 if (max_std_fn >= 1) {
75 __cpuid(cpu_id, 0x00000001); 86 __cpuid(cpu_id, 0x00000001);
87 family = (cpu_id[0] >> 8) & 0xf;
88 model = (cpu_id[0] >> 4) & 0xf;
89 if (family == 0xf) {
90 family += (cpu_id[0] >> 20) & 0xff;
91 }
92 if (family >= 6) {
93 model += ((cpu_id[0] >> 16) & 0xf) << 4;
94 }
76 95
77 if ((cpu_id[3] >> 25) & 1) 96 if ((cpu_id[3] >> 25) & 1)
78 caps.sse = true; 97 caps.sse = true;
@@ -110,6 +129,11 @@ static CPUCaps Detect() {
110 caps.bmi1 = true; 129 caps.bmi1 = true;
111 if ((cpu_id[1] >> 8) & 1) 130 if ((cpu_id[1] >> 8) & 1)
112 caps.bmi2 = true; 131 caps.bmi2 = true;
132 // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
133 if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
134 (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
135 caps.avx512 = caps.avx2;
136 }
113 } 137 }
114 } 138 }
115 139
@@ -130,6 +154,20 @@ static CPUCaps Detect() {
130 caps.fma4 = true; 154 caps.fma4 = true;
131 } 155 }
132 156
157 if (max_ex_fn >= 0x80000007) {
158 __cpuid(cpu_id, 0x80000007);
159 if (cpu_id[3] & (1 << 8)) {
160 caps.invariant_tsc = true;
161 }
162 }
163
164 if (max_std_fn >= 0x16) {
165 __cpuid(cpu_id, 0x16);
166 caps.base_frequency = cpu_id[0];
167 caps.max_frequency = cpu_id[1];
168 caps.bus_frequency = cpu_id[2];
169 }
170
133 return caps; 171 return caps;
134} 172}
135 173
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..e3b63302e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
6 6
7namespace Common { 7namespace Common {
8 8
9enum class Manufacturer : u32 {
10 Intel = 0,
11 AMD = 1,
12 Hygon = 2,
13 Unknown = 3,
14};
15
9/// x86/x64 CPU capabilities that may be detected by this module 16/// x86/x64 CPU capabilities that may be detected by this module
10struct CPUCaps { 17struct CPUCaps {
18 Manufacturer manufacturer;
11 char cpu_string[0x21]; 19 char cpu_string[0x21];
12 char brand_string[0x41]; 20 char brand_string[0x41];
13 bool sse; 21 bool sse;
@@ -19,11 +27,16 @@ struct CPUCaps {
19 bool lzcnt; 27 bool lzcnt;
20 bool avx; 28 bool avx;
21 bool avx2; 29 bool avx2;
30 bool avx512;
22 bool bmi1; 31 bool bmi1;
23 bool bmi2; 32 bool bmi2;
24 bool fma; 33 bool fma;
25 bool fma4; 34 bool fma4;
26 bool aes; 35 bool aes;
36 bool invariant_tsc;
37 u32 base_frequency;
38 u32 max_frequency;
39 u32 bus_frequency;
27}; 40};
28 41
29/** 42/**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..424b39b1f
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,103 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <mutex>
7#include <thread>
8
9#ifdef _MSC_VER
10#include <intrin.h>
11#else
12#include <x86intrin.h>
13#endif
14
15#include "common/uint128.h"
16#include "common/x64/native_clock.h"
17
18namespace Common {
19
20u64 EstimateRDTSCFrequency() {
21 const auto milli_10 = std::chrono::milliseconds{10};
22 // get current time
23 _mm_mfence();
24 const u64 tscStart = __rdtsc();
25 const auto startTime = std::chrono::high_resolution_clock::now();
26 // wait roughly 3 seconds
27 while (true) {
28 auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
29 std::chrono::high_resolution_clock::now() - startTime);
30 if (milli.count() >= 3000)
31 break;
32 std::this_thread::sleep_for(milli_10);
33 }
34 const auto endTime = std::chrono::high_resolution_clock::now();
35 _mm_mfence();
36 const u64 tscEnd = __rdtsc();
37 // calculate difference
38 const u64 timer_diff =
39 std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
40 const u64 tsc_diff = tscEnd - tscStart;
41 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
42 return tsc_freq;
43}
44
45namespace X64 {
46NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
47 u64 rtsc_frequency)
48 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
49 rtsc_frequency} {
50 _mm_mfence();
51 last_measure = __rdtsc();
52 accumulated_ticks = 0U;
53}
54
55u64 NativeClock::GetRTSC() {
56 std::scoped_lock scope{rtsc_serialize};
57 _mm_mfence();
58 const u64 current_measure = __rdtsc();
59 u64 diff = current_measure - last_measure;
60 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
61 if (current_measure > last_measure) {
62 last_measure = current_measure;
63 }
64 accumulated_ticks += diff;
65 /// The clock cannot be more precise than the guest timer, remove the lower bits
66 return accumulated_ticks & inaccuracy_mask;
67}
68
69void NativeClock::Pause(bool is_paused) {
70 if (!is_paused) {
71 _mm_mfence();
72 last_measure = __rdtsc();
73 }
74}
75
76std::chrono::nanoseconds NativeClock::GetTimeNS() {
77 const u64 rtsc_value = GetRTSC();
78 return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
79}
80
81std::chrono::microseconds NativeClock::GetTimeUS() {
82 const u64 rtsc_value = GetRTSC();
83 return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
84}
85
86std::chrono::milliseconds NativeClock::GetTimeMS() {
87 const u64 rtsc_value = GetRTSC();
88 return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
89}
90
91u64 NativeClock::GetClockCycles() {
92 const u64 rtsc_value = GetRTSC();
93 return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
94}
95
96u64 NativeClock::GetCPUCycles() {
97 const u64 rtsc_value = GetRTSC();
98 return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
99}
100
101} // namespace X64
102
103} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..97aab6ac9
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,48 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/spin_lock.h"
10#include "common/wall_clock.h"
11
12namespace Common {
13
14namespace X64 {
15class NativeClock final : public WallClock {
16public:
17 NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
18
19 std::chrono::nanoseconds GetTimeNS() override;
20
21 std::chrono::microseconds GetTimeUS() override;
22
23 std::chrono::milliseconds GetTimeMS() override;
24
25 u64 GetClockCycles() override;
26
27 u64 GetCPUCycles() override;
28
29 void Pause(bool is_paused) override;
30
31private:
32 u64 GetRTSC();
33
34 /// value used to reduce the native clocks accuracy as some apss rely on
35 /// undefined behavior where the level of accuracy in the clock shouldn't
36 /// be higher.
37 static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
38
39 SpinLock rtsc_serialize{};
40 u64 last_measure{};
41 u64 accumulated_ticks{};
42 u64 rtsc_frequency;
43};
44} // namespace X64
45
46u64 EstimateRDTSCFrequency();
47
48} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..26e4bfda5
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,229 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <initializer_list>
9#include <xbyak.h>
10#include "common/assert.h"
11
12namespace Common::X64 {
13
14constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers.");
18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20}
21
22constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(static_cast<int>(reg_index));
25}
26
27constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(static_cast<int>(reg_index - 16));
30}
31
32constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) {
33 if (reg_index < 16) {
34 return IndexToReg64(reg_index);
35 } else {
36 return IndexToXmm(reg_index);
37 }
38}
39
40inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
41 std::bitset<32> bits;
42 for (const Xbyak::Reg& reg : regs) {
43 bits[RegToIndex(reg)] = true;
44 }
45 return bits;
46}
47
48constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
49constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
50
51#ifdef _WIN32
52
53// Microsoft x64 ABI
54constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
55constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
56constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
57constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
58constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
59
60const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
61 // GPRs
62 Xbyak::util::rcx,
63 Xbyak::util::rdx,
64 Xbyak::util::r8,
65 Xbyak::util::r9,
66 Xbyak::util::r10,
67 Xbyak::util::r11,
68 // XMMs
69 Xbyak::util::xmm0,
70 Xbyak::util::xmm1,
71 Xbyak::util::xmm2,
72 Xbyak::util::xmm3,
73 Xbyak::util::xmm4,
74 Xbyak::util::xmm5,
75});
76
77const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
78 // GPRs
79 Xbyak::util::rbx,
80 Xbyak::util::rsi,
81 Xbyak::util::rdi,
82 Xbyak::util::rbp,
83 Xbyak::util::r12,
84 Xbyak::util::r13,
85 Xbyak::util::r14,
86 Xbyak::util::r15,
87 // XMMs
88 Xbyak::util::xmm6,
89 Xbyak::util::xmm7,
90 Xbyak::util::xmm8,
91 Xbyak::util::xmm9,
92 Xbyak::util::xmm10,
93 Xbyak::util::xmm11,
94 Xbyak::util::xmm12,
95 Xbyak::util::xmm13,
96 Xbyak::util::xmm14,
97 Xbyak::util::xmm15,
98});
99
100constexpr size_t ABI_SHADOW_SPACE = 0x20;
101
102#else
103
104// System V x86-64 ABI
105constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
106constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
107constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
108constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
109constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
110
111const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
112 // GPRs
113 Xbyak::util::rcx,
114 Xbyak::util::rdx,
115 Xbyak::util::rdi,
116 Xbyak::util::rsi,
117 Xbyak::util::r8,
118 Xbyak::util::r9,
119 Xbyak::util::r10,
120 Xbyak::util::r11,
121 // XMMs
122 Xbyak::util::xmm0,
123 Xbyak::util::xmm1,
124 Xbyak::util::xmm2,
125 Xbyak::util::xmm3,
126 Xbyak::util::xmm4,
127 Xbyak::util::xmm5,
128 Xbyak::util::xmm6,
129 Xbyak::util::xmm7,
130 Xbyak::util::xmm8,
131 Xbyak::util::xmm9,
132 Xbyak::util::xmm10,
133 Xbyak::util::xmm11,
134 Xbyak::util::xmm12,
135 Xbyak::util::xmm13,
136 Xbyak::util::xmm14,
137 Xbyak::util::xmm15,
138});
139
140const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
141 // GPRs
142 Xbyak::util::rbx,
143 Xbyak::util::rbp,
144 Xbyak::util::r12,
145 Xbyak::util::r13,
146 Xbyak::util::r14,
147 Xbyak::util::r15,
148});
149
150constexpr size_t ABI_SHADOW_SPACE = 0;
151
152#endif
153
154struct ABIFrameInfo {
155 s32 subtraction;
156 s32 xmm_offset;
157};
158
159inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
160 size_t needed_frame_size) {
161 const auto count = (regs & ABI_ALL_GPRS).count();
162 rsp_alignment -= count * 8;
163 size_t subtraction = 0;
164 const auto xmm_count = (regs & ABI_ALL_XMMS).count();
165 if (xmm_count) {
166 // If we have any XMMs to save, we must align the stack here.
167 subtraction = rsp_alignment & 0xF;
168 }
169 subtraction += 0x10 * xmm_count;
170 size_t xmm_base_subtraction = subtraction;
171 subtraction += needed_frame_size;
172 subtraction += ABI_SHADOW_SPACE;
173 // Final alignment.
174 rsp_alignment -= subtraction;
175 subtraction += rsp_alignment & 0xF;
176
177 return ABIFrameInfo{static_cast<s32>(subtraction),
178 static_cast<s32>(subtraction - xmm_base_subtraction)};
179}
180
181inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
182 size_t rsp_alignment, size_t needed_frame_size = 0) {
183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
184
185 for (std::size_t i = 0; i < regs.size(); ++i) {
186 if (regs[i] && ABI_ALL_GPRS[i]) {
187 code.push(IndexToReg64(i));
188 }
189 }
190
191 if (frame_info.subtraction != 0) {
192 code.sub(code.rsp, frame_info.subtraction);
193 }
194
195 for (std::size_t i = 0; i < regs.size(); ++i) {
196 if (regs[i] && ABI_ALL_XMMS[i]) {
197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
198 frame_info.xmm_offset += 0x10;
199 }
200 }
201
202 return ABI_SHADOW_SPACE;
203}
204
205inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
206 size_t rsp_alignment, size_t needed_frame_size = 0) {
207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
208
209 for (std::size_t i = 0; i < regs.size(); ++i) {
210 if (regs[i] && ABI_ALL_XMMS[i]) {
211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
212 frame_info.xmm_offset += 0x10;
213 }
214 }
215
216 if (frame_info.subtraction != 0) {
217 code.add(code.rsp, frame_info.subtraction);
218 }
219
220 // GPRs need to be popped in reverse order
221 for (std::size_t j = 0; j < regs.size(); ++j) {
222 const std::size_t i = regs.size() - j - 1;
223 if (regs[i] && ABI_ALL_GPRS[i]) {
224 code.pop(IndexToReg64(i));
225 }
226 }
227}
228
229} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <xbyak.h>
9#include "common/x64/xbyak_abi.h"
10
11namespace Common::X64 {
12
13// Constants for use with cmpps/cmpss
14enum {
15 CMP_EQ = 0,
16 CMP_LT = 1,
17 CMP_LE = 2,
18 CMP_UNORD = 3,
19 CMP_NEQ = 4,
20 CMP_NLT = 5,
21 CMP_NLE = 6,
22 CMP_ORD = 7,
23};
24
25constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
26 const u64 distance = target - (ref + 5);
27 return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
28}
29
30inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
31 return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
32}
33
34template <typename T>
35inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
36 static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
37 size_t addr = reinterpret_cast<size_t>(f);
38 if (IsWithin2G(code, addr)) {
39 code.call(f);
40 } else {
41 // ABI_RETURN is a safe temp register to use before a call
42 code.mov(ABI_RETURN, addr);
43 code.call(ABI_RETURN);
44 }
45}
46
47} // namespace Common::X64