diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 1 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 52 |
4 files changed, 41 insertions, 14 deletions
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 91352912d..929ed67e4 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp | |||
| @@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) { | |||
| 93 | add_field("CPU_Extension_x64_GFNI", caps.gfni); | 93 | add_field("CPU_Extension_x64_GFNI", caps.gfni); |
| 94 | add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); | 94 | add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); |
| 95 | add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); | 95 | add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); |
| 96 | add_field("CPU_Extension_x64_MONITORX", caps.monitorx); | ||
| 96 | add_field("CPU_Extension_x64_MOVBE", caps.movbe); | 97 | add_field("CPU_Extension_x64_MOVBE", caps.movbe); |
| 97 | add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); | 98 | add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); |
| 98 | add_field("CPU_Extension_x64_POPCNT", caps.popcnt); | 99 | add_field("CPU_Extension_x64_POPCNT", caps.popcnt); |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index c998b1197..780120a5b 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -168,6 +168,7 @@ static CPUCaps Detect() { | |||
| 168 | __cpuid(cpu_id, 0x80000001); | 168 | __cpuid(cpu_id, 0x80000001); |
| 169 | caps.lzcnt = Common::Bit<5>(cpu_id[2]); | 169 | caps.lzcnt = Common::Bit<5>(cpu_id[2]); |
| 170 | caps.fma4 = Common::Bit<16>(cpu_id[2]); | 170 | caps.fma4 = Common::Bit<16>(cpu_id[2]); |
| 171 | caps.monitorx = Common::Bit<29>(cpu_id[2]); | ||
| 171 | } | 172 | } |
| 172 | 173 | ||
| 173 | if (max_ex_fn >= 0x80000007) { | 174 | if (max_ex_fn >= 0x80000007) { |
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 8253944d6..756459417 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -63,6 +63,7 @@ struct CPUCaps { | |||
| 63 | bool gfni : 1; | 63 | bool gfni : 1; |
| 64 | bool invariant_tsc : 1; | 64 | bool invariant_tsc : 1; |
| 65 | bool lzcnt : 1; | 65 | bool lzcnt : 1; |
| 66 | bool monitorx : 1; | ||
| 66 | bool movbe : 1; | 67 | bool movbe : 1; |
| 67 | bool pclmulqdq : 1; | 68 | bool pclmulqdq : 1; |
| 68 | bool popcnt : 1; | 69 | bool popcnt : 1; |
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index c53dd4945..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -13,36 +13,60 @@ | |||
| 13 | 13 | ||
| 14 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 15 | 15 | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 19 | // For reference: | ||
| 20 | // At 1 GHz, 100K cycles is 100us | ||
| 21 | // At 2 GHz, 100K cycles is 50us | ||
| 22 | // At 4 GHz, 100K cycles is 25us | ||
| 23 | constexpr auto PauseCycles = 100'000U; | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 16 | #ifdef _MSC_VER | 27 | #ifdef _MSC_VER |
| 17 | __forceinline static void TPAUSE() { | 28 | __forceinline static void TPAUSE() { |
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 29 | static constexpr auto RequestC02State = 0U; |
| 19 | // For reference: | 30 | _tpause(RequestC02State, FencedRDTSC() + PauseCycles); |
| 20 | // At 1 GHz, 100K cycles is 100us | 31 | } |
| 21 | // At 2 GHz, 100K cycles is 50us | 32 | |
| 22 | // At 4 GHz, 100K cycles is 25us | 33 | __forceinline static void MWAITX() { |
| 23 | static constexpr auto PauseCycles = 100'000; | 34 | static constexpr auto EnableWaitTimeFlag = 1U << 1; |
| 24 | _tpause(0, FencedRDTSC() + PauseCycles); | 35 | static constexpr auto RequestC1State = 0U; |
| 36 | |||
| 37 | // monitor_var should be aligned to a cache line. | ||
| 38 | alignas(64) u64 monitor_var{}; | ||
| 39 | _mm_monitorx(&monitor_var, 0, 0); | ||
| 40 | _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); | ||
| 25 | } | 41 | } |
| 26 | #else | 42 | #else |
| 27 | static void TPAUSE() { | 43 | static void TPAUSE() { |
| 28 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 44 | static constexpr auto RequestC02State = 0U; |
| 29 | // For reference: | ||
| 30 | // At 1 GHz, 100K cycles is 100us | ||
| 31 | // At 2 GHz, 100K cycles is 50us | ||
| 32 | // At 4 GHz, 100K cycles is 25us | ||
| 33 | static constexpr auto PauseCycles = 100'000; | ||
| 34 | const auto tsc = FencedRDTSC() + PauseCycles; | 45 | const auto tsc = FencedRDTSC() + PauseCycles; |
| 35 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | 46 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
| 36 | const auto edx = static_cast<u32>(tsc >> 32); | 47 | const auto edx = static_cast<u32>(tsc >> 32); |
| 37 | asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); | 48 | asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); |
| 49 | } | ||
| 50 | |||
| 51 | static void MWAITX() { | ||
| 52 | static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||
| 53 | static constexpr auto RequestC1State = 0U; | ||
| 54 | |||
| 55 | // monitor_var should be aligned to a cache line. | ||
| 56 | alignas(64) u64 monitor_var{}; | ||
| 57 | asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); | ||
| 58 | asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); | ||
| 38 | } | 59 | } |
| 39 | #endif | 60 | #endif |
| 40 | 61 | ||
| 41 | void MicroSleep() { | 62 | void MicroSleep() { |
| 42 | static const bool has_waitpkg = GetCPUCaps().waitpkg; | 63 | static const bool has_waitpkg = GetCPUCaps().waitpkg; |
| 64 | static const bool has_monitorx = GetCPUCaps().monitorx; | ||
| 43 | 65 | ||
| 44 | if (has_waitpkg) { | 66 | if (has_waitpkg) { |
| 45 | TPAUSE(); | 67 | TPAUSE(); |
| 68 | } else if (has_monitorx) { | ||
| 69 | MWAITX(); | ||
| 46 | } else { | 70 | } else { |
| 47 | std::this_thread::yield(); | 71 | std::this_thread::yield(); |
| 48 | } | 72 | } |