summaryrefslogtreecommitdiff
path: root/src/common/x64/cpu_wait.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/common/x64/cpu_wait.cpp70
1 files changed, 38 insertions, 32 deletions
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp
index cfeef6a3d..41d385f59 100644
--- a/src/common/x64/cpu_wait.cpp
+++ b/src/common/x64/cpu_wait.cpp
@@ -9,58 +9,64 @@
9 9
10#include "common/x64/cpu_detect.h" 10#include "common/x64/cpu_detect.h"
11#include "common/x64/cpu_wait.h" 11#include "common/x64/cpu_wait.h"
12#include "common/x64/rdtsc.h"
12 13
13namespace Common::X64 { 14namespace Common::X64 {
14 15
16namespace {
17
18// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
19// For reference:
20// At 1 GHz, 100K cycles is 100us
21// At 2 GHz, 100K cycles is 50us
22// At 4 GHz, 100K cycles is 25us
23constexpr auto PauseCycles = 100'000U;
24
25} // Anonymous namespace
26
15#ifdef _MSC_VER 27#ifdef _MSC_VER
16__forceinline static u64 FencedRDTSC() { 28__forceinline static void TPAUSE() {
17 _mm_lfence(); 29 static constexpr auto RequestC02State = 0U;
18 _ReadWriteBarrier(); 30 _tpause(RequestC02State, FencedRDTSC() + PauseCycles);
19 const u64 result = __rdtsc();
20 _mm_lfence();
21 _ReadWriteBarrier();
22 return result;
23} 31}
24 32
25__forceinline static void TPAUSE() { 33__forceinline static void MWAITX() {
26 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 34 static constexpr auto EnableWaitTimeFlag = 1U << 1;
27 // For reference: 35 static constexpr auto RequestC1State = 0U;
28 // At 1 GHz, 100K cycles is 100us 36
29 // At 2 GHz, 100K cycles is 50us 37 // monitor_var should be aligned to a cache line.
30 // At 4 GHz, 100K cycles is 25us 38 alignas(64) u64 monitor_var{};
31 static constexpr auto PauseCycles = 100'000; 39 _mm_monitorx(&monitor_var, 0, 0);
32 _tpause(0, FencedRDTSC() + PauseCycles); 40 _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
33} 41}
34#else 42#else
35static u64 FencedRDTSC() {
36 u64 eax;
37 u64 edx;
38 asm volatile("lfence\n\t"
39 "rdtsc\n\t"
40 "lfence\n\t"
41 : "=a"(eax), "=d"(edx));
42 return (edx << 32) | eax;
43}
44
45static void TPAUSE() { 43static void TPAUSE() {
46 // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. 44 static constexpr auto RequestC02State = 0U;
47 // For reference:
48 // At 1 GHz, 100K cycles is 100us
49 // At 2 GHz, 100K cycles is 50us
50 // At 4 GHz, 100K cycles is 25us
51 static constexpr auto PauseCycles = 100'000;
52 const auto tsc = FencedRDTSC() + PauseCycles; 45 const auto tsc = FencedRDTSC() + PauseCycles;
53 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); 46 const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
54 const auto edx = static_cast<u32>(tsc >> 32); 47 const auto edx = static_cast<u32>(tsc >> 32);
55 asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); 48 asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
49}
50
51static void MWAITX() {
52 static constexpr auto EnableWaitTimeFlag = 1U << 1;
53 static constexpr auto RequestC1State = 0U;
54
55 // monitor_var should be aligned to a cache line.
56 alignas(64) u64 monitor_var{};
57 asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
58 asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
56} 59}
57#endif 60#endif
58 61
59void MicroSleep() { 62void MicroSleep() {
60 static const bool has_waitpkg = GetCPUCaps().waitpkg; 63 static const bool has_waitpkg = GetCPUCaps().waitpkg;
64 static const bool has_monitorx = GetCPUCaps().monitorx;
61 65
62 if (has_waitpkg) { 66 if (has_waitpkg) {
63 TPAUSE(); 67 TPAUSE();
68 } else if (has_monitorx) {
69 MWAITX();
64 } else { 70 } else {
65 std::this_thread::yield(); 71 std::this_thread::yield();
66 } 72 }