summaryrefslogtreecommitdiff
path: root/src/common/x64/cpu_wait.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/x64/cpu_wait.cpp52
1 files changed, 38 insertions, 14 deletions
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
index c53dd4945..41d385f59 100644
--- a/src/common/x64/cpu_wait.cpp
+++ b/src/common/x64/cpu_wait.cpp
@@ -13,36 +13,60 @@
13 13
14namespace Common::X64 { 14namespace Common::X64 {
15 15
16namespace {
17
18// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
19// For reference:
20// At 1 GHz, 100K cycles is 100us
21// At 2 GHz, 100K cycles is 50us
22// At 4 GHz, 100K cycles is 25us
23constexpr auto PauseCycles = 100'000U;
24
25} // Anonymous namespace
26
16#ifdef _MSC_VER 27#ifdef _MSC_VER
17__forceinline static void TPAUSE() { 28__forceinline static void TPAUSE() {
18 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 29 static constexpr auto RequestC02State = 0U;
19 // For reference: 30 _tpause(RequestC02State, FencedRDTSC() + PauseCycles);
20 // At 1 GHz, 100K cycles is 100us 31}
21 // At 2 GHz, 100K cycles is 50us 32
22 // At 4 GHz, 100K cycles is 25us 33__forceinline static void MWAITX() {
23 static constexpr auto PauseCycles = 100'000; 34 static constexpr auto EnableWaitTimeFlag = 1U << 1;
24 _tpause(0, FencedRDTSC() + PauseCycles); 35 static constexpr auto RequestC1State = 0U;
36
37 // monitor_var should be aligned to a cache line.
38 alignas(64) u64 monitor_var{};
39 _mm_monitorx(&monitor_var, 0, 0);
40 _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
25} 41}
26#else 42#else
27static void TPAUSE() { 43static void TPAUSE() {
28 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 44 static constexpr auto RequestC02State = 0U;
29 // For reference:
30 // At 1 GHz, 100K cycles is 100us
31 // At 2 GHz, 100K cycles is 50us
32 // At 4 GHz, 100K cycles is 25us
33 static constexpr auto PauseCycles = 100'000;
34 const auto tsc = FencedRDTSC() + PauseCycles; 45 const auto tsc = FencedRDTSC() + PauseCycles;
35 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); 46 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
36 const auto edx = static_cast<u32>(tsc >> 32); 47 const auto edx = static_cast<u32>(tsc >> 32);
37 asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); 48 asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
49}
50
51static void MWAITX() {
52 static constexpr auto EnableWaitTimeFlag = 1U << 1;
53 static constexpr auto RequestC1State = 0U;
54
55 // monitor_var should be aligned to a cache line.
56 alignas(64) u64 monitor_var{};
57 asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
58 asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
38} 59}
39#endif 60#endif
40 61
41void MicroSleep() { 62void MicroSleep() {
42 static const bool has_waitpkg = GetCPUCaps().waitpkg; 63 static const bool has_waitpkg = GetCPUCaps().waitpkg;
64 static const bool has_monitorx = GetCPUCaps().monitorx;
43 65
44 if (has_waitpkg) { 66 if (has_waitpkg) {
45 TPAUSE(); 67 TPAUSE();
68 } else if (has_monitorx) {
69 MWAITX();
46 } else { 70 } else {
47 std::this_thread::yield(); 71 std::this_thread::yield();
48 } 72 }