diff options
Diffstat (limited to 'src/common/x64/cpu_wait.cpp')
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 70 |
1 files changed, 38 insertions, 32 deletions
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -9,58 +9,64 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/x64/cpu_detect.h" | 10 | #include "common/x64/cpu_detect.h" |
| 11 | #include "common/x64/cpu_wait.h" | 11 | #include "common/x64/cpu_wait.h" |
| 12 | #include "common/x64/rdtsc.h" | ||
| 12 | 13 | ||
| 13 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 14 | 15 | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 19 | // For reference: | ||
| 20 | // At 1 GHz, 100K cycles is 100us | ||
| 21 | // At 2 GHz, 100K cycles is 50us | ||
| 22 | // At 4 GHz, 100K cycles is 25us | ||
| 23 | constexpr auto PauseCycles = 100'000U; | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 15 | #ifdef _MSC_VER | 27 | #ifdef _MSC_VER |
| 16 | __forceinline static u64 FencedRDTSC() { | 28 | __forceinline static void TPAUSE() { |
| 17 | _mm_lfence(); | 29 | static constexpr auto RequestC02State = 0U; |
| 18 | _ReadWriteBarrier(); | 30 | _tpause(RequestC02State, FencedRDTSC() + PauseCycles); |
| 19 | const u64 result = __rdtsc(); | ||
| 20 | _mm_lfence(); | ||
| 21 | _ReadWriteBarrier(); | ||
| 22 | return result; | ||
| 23 | } | 31 | } |
| 24 | 32 | ||
| 25 | __forceinline static void TPAUSE() { | 33 | __forceinline static void MWAITX() { |
| 26 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 34 | static constexpr auto EnableWaitTimeFlag = 1U << 1; |
| 27 | // For reference: | 35 | static constexpr auto RequestC1State = 0U; |
| 28 | // At 1 GHz, 100K cycles is 100us | 36 | |
| 29 | // At 2 GHz, 100K cycles is 50us | 37 | // monitor_var should be aligned to a cache line. |
| 30 | // At 4 GHz, 100K cycles is 25us | 38 | alignas(64) u64 monitor_var{}; |
| 31 | static constexpr auto PauseCycles = 100'000; | 39 | _mm_monitorx(&monitor_var, 0, 0); |
| 32 | _tpause(0, FencedRDTSC() + PauseCycles); | 40 | _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); |
| 33 | } | 41 | } |
| 34 | #else | 42 | #else |
| 35 | static u64 FencedRDTSC() { | ||
| 36 | u64 eax; | ||
| 37 | u64 edx; | ||
| 38 | asm volatile("lfence\n\t" | ||
| 39 | "rdtsc\n\t" | ||
| 40 | "lfence\n\t" | ||
| 41 | : "=a"(eax), "=d"(edx)); | ||
| 42 | return (edx << 32) | eax; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void TPAUSE() { | 43 | static void TPAUSE() { |
| 46 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 44 | static constexpr auto RequestC02State = 0U; |
| 47 | // For reference: | ||
| 48 | // At 1 GHz, 100K cycles is 100us | ||
| 49 | // At 2 GHz, 100K cycles is 50us | ||
| 50 | // At 4 GHz, 100K cycles is 25us | ||
| 51 | static constexpr auto PauseCycles = 100'000; | ||
| 52 | const auto tsc = FencedRDTSC() + PauseCycles; | 45 | const auto tsc = FencedRDTSC() + PauseCycles; |
| 53 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | 46 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
| 54 | const auto edx = static_cast<u32>(tsc >> 32); | 47 | const auto edx = static_cast<u32>(tsc >> 32); |
| 55 | asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); | 48 | asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); |
| 49 | } | ||
| 50 | |||
| 51 | static void MWAITX() { | ||
| 52 | static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||
| 53 | static constexpr auto RequestC1State = 0U; | ||
| 54 | |||
| 55 | // monitor_var should be aligned to a cache line. | ||
| 56 | alignas(64) u64 monitor_var{}; | ||
| 57 | asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); | ||
| 58 | asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); | ||
| 56 | } | 59 | } |
| 57 | #endif | 60 | #endif |
| 58 | 61 | ||
| 59 | void MicroSleep() { | 62 | void MicroSleep() { |
| 60 | static const bool has_waitpkg = GetCPUCaps().waitpkg; | 63 | static const bool has_waitpkg = GetCPUCaps().waitpkg; |
| 64 | static const bool has_monitorx = GetCPUCaps().monitorx; | ||
| 61 | 65 | ||
| 62 | if (has_waitpkg) { | 66 | if (has_waitpkg) { |
| 63 | TPAUSE(); | 67 | TPAUSE(); |
| 68 | } else if (has_monitorx) { | ||
| 69 | MWAITX(); | ||
| 64 | } else { | 70 | } else { |
| 65 | std::this_thread::yield(); | 71 | std::this_thread::yield(); |
| 66 | } | 72 | } |