diff options
Diffstat (limited to 'src/common/x64/cpu_wait.cpp')
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 33 |
1 files changed, 21 insertions, 12 deletions
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index c53dd4945..11b9c4d83 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -13,24 +13,30 @@ | |||
| 13 | 13 | ||
| 14 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 15 | 15 | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 19 | // For reference: | ||
| 20 | // At 1 GHz, 100K cycles is 100us | ||
| 21 | // At 2 GHz, 100K cycles is 50us | ||
| 22 | // At 4 GHz, 100K cycles is 25us | ||
| 23 | constexpr auto PauseCycles = 100'000U; | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 16 | #ifdef _MSC_VER | 27 | #ifdef _MSC_VER |
| 17 | __forceinline static void TPAUSE() { | 28 | __forceinline static void TPAUSE() { |
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 19 | // For reference: | ||
| 20 | // At 1 GHz, 100K cycles is 100us | ||
| 21 | // At 2 GHz, 100K cycles is 50us | ||
| 22 | // At 4 GHz, 100K cycles is 25us | ||
| 23 | static constexpr auto PauseCycles = 100'000; | ||
| 24 | _tpause(0, FencedRDTSC() + PauseCycles); | 29 | _tpause(0, FencedRDTSC() + PauseCycles); |
| 25 | } | 30 | } |
| 31 | |||
| 32 | __forceinline static void MWAITX() { | ||
| 33 | // monitor_var should be aligned to a cache line. | ||
| 34 | alignas(64) u64 monitor_var{}; | ||
| 35 | _mm_monitorx(&monitor_var, 0, 0); | ||
| 36 | _mm_mwaitx(/* extensions*/ 2, /* hints */ 0, /* cycles */ PauseCycles); | ||
| 37 | } | ||
| 26 | #else | 38 | #else |
| 27 | static void TPAUSE() { | 39 | static void TPAUSE() { |
| 28 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 29 | // For reference: | ||
| 30 | // At 1 GHz, 100K cycles is 100us | ||
| 31 | // At 2 GHz, 100K cycles is 50us | ||
| 32 | // At 4 GHz, 100K cycles is 25us | ||
| 33 | static constexpr auto PauseCycles = 100'000; | ||
| 34 | const auto tsc = FencedRDTSC() + PauseCycles; | 40 | const auto tsc = FencedRDTSC() + PauseCycles; |
| 35 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | 41 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
| 36 | const auto edx = static_cast<u32>(tsc >> 32); | 42 | const auto edx = static_cast<u32>(tsc >> 32); |
| @@ -40,9 +46,12 @@ static void TPAUSE() { | |||
| 40 | 46 | ||
| 41 | void MicroSleep() { | 47 | void MicroSleep() { |
| 42 | static const bool has_waitpkg = GetCPUCaps().waitpkg; | 48 | static const bool has_waitpkg = GetCPUCaps().waitpkg; |
| 49 | static const bool has_monitorx = GetCPUCaps().monitorx; | ||
| 43 | 50 | ||
| 44 | if (has_waitpkg) { | 51 | if (has_waitpkg) { |
| 45 | TPAUSE(); | 52 | TPAUSE(); |
| 53 | } else if (has_monitorx) { | ||
| 54 | MWAITX(); | ||
| 46 | } else { | 55 | } else { |
| 47 | std::this_thread::yield(); | 56 | std::this_thread::yield(); |
| 48 | } | 57 | } |