summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/telemetry.cpp1
-rw-r--r--src/common/x64/cpu_detect.cpp1
-rw-r--r--src/common/x64/cpu_detect.h1
-rw-r--r--src/common/x64/cpu_wait.cpp69
-rw-r--r--src/common/x64/cpu_wait.h10
-rw-r--r--src/common/x64/native_clock.cpp13
-rw-r--r--src/core/core_timing.cpp8
8 files changed, 97 insertions, 8 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index c1d2b24a1..13ed68b3f 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -160,6 +160,8 @@ if(ARCHITECTURE_x86_64)
160 PRIVATE 160 PRIVATE
161 x64/cpu_detect.cpp 161 x64/cpu_detect.cpp
162 x64/cpu_detect.h 162 x64/cpu_detect.h
163 x64/cpu_wait.cpp
164 x64/cpu_wait.h
163 x64/native_clock.cpp 165 x64/native_clock.cpp
164 x64/native_clock.h 166 x64/native_clock.h
165 x64/xbyak_abi.h 167 x64/xbyak_abi.h
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index d26394359..91352912d 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -97,6 +97,7 @@ void AppendCPUInfo(FieldCollection& fc) {
97 add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); 97 add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
98 add_field("CPU_Extension_x64_POPCNT", caps.popcnt); 98 add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
99 add_field("CPU_Extension_x64_SHA", caps.sha); 99 add_field("CPU_Extension_x64_SHA", caps.sha);
100 add_field("CPU_Extension_x64_WAITPKG", caps.waitpkg);
100#else 101#else
101 fc.AddField(FieldType::UserSystem, "CPU_Model", "Other"); 102 fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
102#endif 103#endif
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index e54383a4a..72ed6e96c 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -144,6 +144,7 @@ static CPUCaps Detect() {
144 caps.bmi2 = Common::Bit<8>(cpu_id[1]); 144 caps.bmi2 = Common::Bit<8>(cpu_id[1]);
145 caps.sha = Common::Bit<29>(cpu_id[1]); 145 caps.sha = Common::Bit<29>(cpu_id[1]);
146 146
147 caps.waitpkg = Common::Bit<5>(cpu_id[2]);
147 caps.gfni = Common::Bit<8>(cpu_id[2]); 148 caps.gfni = Common::Bit<8>(cpu_id[2]);
148 149
149 __cpuidex(cpu_id, 0x00000007, 0x00000001); 150 __cpuidex(cpu_id, 0x00000007, 0x00000001);
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index ca8db19d6..8253944d6 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -67,6 +67,7 @@ struct CPUCaps {
67 bool pclmulqdq : 1; 67 bool pclmulqdq : 1;
68 bool popcnt : 1; 68 bool popcnt : 1;
69 bool sha : 1; 69 bool sha : 1;
70 bool waitpkg : 1;
70}; 71};
71 72
72/** 73/**
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
new file mode 100644
index 000000000..cfeef6a3d
--- /dev/null
+++ b/src/common/x64/cpu_wait.cpp
@@ -0,0 +1,69 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <thread>
5
6#ifdef _MSC_VER
7#include <intrin.h>
8#endif
9
10#include "common/x64/cpu_detect.h"
11#include "common/x64/cpu_wait.h"
12
13namespace Common::X64 {
14
15#ifdef _MSC_VER
16__forceinline static u64 FencedRDTSC() {
17 _mm_lfence();
18 _ReadWriteBarrier();
19 const u64 result = __rdtsc();
20 _mm_lfence();
21 _ReadWriteBarrier();
22 return result;
23}
24
25__forceinline static void TPAUSE() {
26 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
27 // For reference:
28 // At 1 GHz, 100K cycles is 100us
29 // At 2 GHz, 100K cycles is 50us
30 // At 4 GHz, 100K cycles is 25us
31 static constexpr auto PauseCycles = 100'000;
32 _tpause(0, FencedRDTSC() + PauseCycles);
33}
34#else
35static u64 FencedRDTSC() {
36 u64 eax;
37 u64 edx;
38 asm volatile("lfence\n\t"
39 "rdtsc\n\t"
40 "lfence\n\t"
41 : "=a"(eax), "=d"(edx));
42 return (edx << 32) | eax;
43}
44
45static void TPAUSE() {
46 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
47 // For reference:
48 // At 1 GHz, 100K cycles is 100us
49 // At 2 GHz, 100K cycles is 50us
50 // At 4 GHz, 100K cycles is 25us
51 static constexpr auto PauseCycles = 100'000;
52 const auto tsc = FencedRDTSC() + PauseCycles;
53 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
54 const auto edx = static_cast<u32>(tsc >> 32);
55 asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax));
56}
57#endif
58
59void MicroSleep() {
60 static const bool has_waitpkg = GetCPUCaps().waitpkg;
61
62 if (has_waitpkg) {
63 TPAUSE();
64 } else {
65 std::this_thread::yield();
66 }
67}
68
69} // namespace Common::X64
diff --git a/src/common/x64/cpu_wait.h b/src/common/x64/cpu_wait.h
new file mode 100644
index 000000000..99d3757a7
--- /dev/null
+++ b/src/common/x64/cpu_wait.h
@@ -0,0 +1,10 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6namespace Common::X64 {
7
8void MicroSleep();
9
10} // namespace Common::X64
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index 76c66e7ee..277b00662 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -27,16 +27,13 @@ __forceinline static u64 FencedRDTSC() {
27} 27}
28#else 28#else
29static u64 FencedRDTSC() { 29static u64 FencedRDTSC() {
30 u64 result; 30 u64 eax;
31 u64 edx;
31 asm volatile("lfence\n\t" 32 asm volatile("lfence\n\t"
32 "rdtsc\n\t" 33 "rdtsc\n\t"
33 "shl $32, %%rdx\n\t" 34 "lfence\n\t"
34 "or %%rdx, %0\n\t" 35 : "=a"(eax), "=d"(edx));
35 "lfence" 36 return (edx << 32) | eax;
36 : "=a"(result)
37 :
38 : "rdx", "memory", "cc");
39 return result;
40} 37}
41#endif 38#endif
42 39
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index cd4df4522..4f2692b05 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -10,6 +10,10 @@
10#include "common/windows/timer_resolution.h" 10#include "common/windows/timer_resolution.h"
11#endif 11#endif
12 12
13#ifdef ARCHITECTURE_x86_64
14#include "common/x64/cpu_wait.h"
15#endif
16
13#include "common/microprofile.h" 17#include "common/microprofile.h"
14#include "core/core_timing.h" 18#include "core/core_timing.h"
15#include "core/core_timing_util.h" 19#include "core/core_timing_util.h"
@@ -269,7 +273,11 @@ void CoreTiming::ThreadLoop() {
269 if (wait_time >= timer_resolution_ns) { 273 if (wait_time >= timer_resolution_ns) {
270 Common::Windows::SleepForOneTick(); 274 Common::Windows::SleepForOneTick();
271 } else { 275 } else {
276#ifdef ARCHITECTURE_x86_64
277 Common::X64::MicroSleep();
278#else
272 std::this_thread::yield(); 279 std::this_thread::yield();
280#endif
273 } 281 }
274 } 282 }
275 283