diff options
Diffstat (limited to 'src/common/x64/cpu_wait.cpp')
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index c53dd4945..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -13,36 +13,60 @@ | |||
| 13 | 13 | ||
| 14 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 15 | 15 | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 19 | // For reference: | ||
| 20 | // At 1 GHz, 100K cycles is 100us | ||
| 21 | // At 2 GHz, 100K cycles is 50us | ||
| 22 | // At 4 GHz, 100K cycles is 25us | ||
| 23 | constexpr auto PauseCycles = 100'000U; | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 16 | #ifdef _MSC_VER | 27 | #ifdef _MSC_VER |
| 17 | __forceinline static void TPAUSE() { | 28 | __forceinline static void TPAUSE() { |
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 29 | static constexpr auto RequestC02State = 0U; |
| 19 | // For reference: | 30 | _tpause(RequestC02State, FencedRDTSC() + PauseCycles); |
| 20 | // At 1 GHz, 100K cycles is 100us | 31 | } |
| 21 | // At 2 GHz, 100K cycles is 50us | 32 | |
| 22 | // At 4 GHz, 100K cycles is 25us | 33 | __forceinline static void MWAITX() { |
| 23 | static constexpr auto PauseCycles = 100'000; | 34 | static constexpr auto EnableWaitTimeFlag = 1U << 1; |
| 24 | _tpause(0, FencedRDTSC() + PauseCycles); | 35 | static constexpr auto RequestC1State = 0U; |
| 36 | |||
| 37 | // monitor_var should be aligned to a cache line. | ||
| 38 | alignas(64) u64 monitor_var{}; | ||
| 39 | _mm_monitorx(&monitor_var, 0, 0); | ||
| 40 | _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); | ||
| 25 | } | 41 | } |
| 26 | #else | 42 | #else |
| 27 | static void TPAUSE() { | 43 | static void TPAUSE() { |
| 28 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 44 | static constexpr auto RequestC02State = 0U; |
| 29 | // For reference: | ||
| 30 | // At 1 GHz, 100K cycles is 100us | ||
| 31 | // At 2 GHz, 100K cycles is 50us | ||
| 32 | // At 4 GHz, 100K cycles is 25us | ||
| 33 | static constexpr auto PauseCycles = 100'000; | ||
| 34 | const auto tsc = FencedRDTSC() + PauseCycles; | 45 | const auto tsc = FencedRDTSC() + PauseCycles; |
| 35 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | 46 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
| 36 | const auto edx = static_cast<u32>(tsc >> 32); | 47 | const auto edx = static_cast<u32>(tsc >> 32); |
| 37 | asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); | 48 | asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); |
| 49 | } | ||
| 50 | |||
| 51 | static void MWAITX() { | ||
| 52 | static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||
| 53 | static constexpr auto RequestC1State = 0U; | ||
| 54 | |||
| 55 | // monitor_var should be aligned to a cache line. | ||
| 56 | alignas(64) u64 monitor_var{}; | ||
| 57 | asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); | ||
| 58 | asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); | ||
| 38 | } | 59 | } |
| 39 | #endif | 60 | #endif |
| 40 | 61 | ||
| 41 | void MicroSleep() { | 62 | void MicroSleep() { |
| 42 | static const bool has_waitpkg = GetCPUCaps().waitpkg; | 63 | static const bool has_waitpkg = GetCPUCaps().waitpkg; |
| 64 | static const bool has_monitorx = GetCPUCaps().monitorx; | ||
| 43 | 65 | ||
| 44 | if (has_waitpkg) { | 66 | if (has_waitpkg) { |
| 45 | TPAUSE(); | 67 | TPAUSE(); |
| 68 | } else if (has_monitorx) { | ||
| 69 | MWAITX(); | ||
| 46 | } else { | 70 | } else { |
| 47 | std::this_thread::yield(); | 71 | std::this_thread::yield(); |
| 48 | } | 72 | } |