diff options
Diffstat (limited to 'src/common/x64/native_clock.cpp')
| -rw-r--r-- | src/common/x64/native_clock.cpp | 110 |
1 files changed, 91 insertions, 19 deletions
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index eb8a7782f..a65f6b832 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp | |||
| @@ -2,19 +2,74 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 5 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <limits> | ||
| 6 | #include <mutex> | 8 | #include <mutex> |
| 7 | #include <thread> | 9 | #include <thread> |
| 8 | 10 | ||
| 9 | #ifdef _MSC_VER | 11 | #ifdef _MSC_VER |
| 10 | #include <intrin.h> | 12 | #include <intrin.h> |
| 13 | |||
| 14 | #pragma intrinsic(__umulh) | ||
| 15 | #pragma intrinsic(_udiv128) | ||
| 11 | #else | 16 | #else |
| 12 | #include <x86intrin.h> | 17 | #include <x86intrin.h> |
| 13 | #endif | 18 | #endif |
| 14 | 19 | ||
| 20 | #include "common/atomic_ops.h" | ||
| 15 | #include "common/uint128.h" | 21 | #include "common/uint128.h" |
| 16 | #include "common/x64/native_clock.h" | 22 | #include "common/x64/native_clock.h" |
| 17 | 23 | ||
| 24 | namespace { | ||
| 25 | |||
| 26 | [[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { | ||
| 27 | #ifdef __SIZEOF_INT128__ | ||
| 28 | const auto base = static_cast<unsigned __int128>(numerator) << 64ULL; | ||
| 29 | return static_cast<u64>(base / divisor); | ||
| 30 | #elif defined(_M_X64) || defined(_M_ARM64) | ||
| 31 | std::array<u64, 2> r = {0, numerator}; | ||
| 32 | u64 remainder; | ||
| 33 | #if _MSC_VER < 1923 | ||
| 34 | return udiv128(r[1], r[0], divisor, &remainder); | ||
| 35 | #else | ||
| 36 | return _udiv128(r[1], r[0], divisor, &remainder); | ||
| 37 | #endif | ||
| 38 | #else | ||
| 39 | // This one is bit more inaccurate. | ||
| 40 | return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor); | ||
| 41 | #endif | ||
| 42 | } | ||
| 43 | |||
| 44 | [[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { | ||
| 45 | #ifdef __SIZEOF_INT128__ | ||
| 46 | return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64; | ||
| 47 | #elif defined(_M_X64) || defined(_M_ARM64) | ||
| 48 | return __umulh(a, b); // MSVC | ||
| 49 | #else | ||
| 50 | // Generic fallback | ||
| 51 | const u64 a_lo = u32(a); | ||
| 52 | const u64 a_hi = a >> 32; | ||
| 53 | const u64 b_lo = u32(b); | ||
| 54 | const u64 b_hi = b >> 32; | ||
| 55 | |||
| 56 | const u64 a_x_b_hi = a_hi * b_hi; | ||
| 57 | const u64 a_x_b_mid = a_hi * b_lo; | ||
| 58 | const u64 b_x_a_mid = b_hi * a_lo; | ||
| 59 | const u64 a_x_b_lo = a_lo * b_lo; | ||
| 60 | |||
| 61 | const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) + | ||
| 62 | static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >> | ||
| 63 | 32; | ||
| 64 | |||
| 65 | const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; | ||
| 66 | |||
| 67 | return multhi; | ||
| 68 | #endif | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace | ||
| 72 | |||
| 18 | namespace Common { | 73 | namespace Common { |
| 19 | 74 | ||
| 20 | u64 EstimateRDTSCFrequency() { | 75 | u64 EstimateRDTSCFrequency() { |
| @@ -48,54 +103,71 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen | |||
| 48 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ | 103 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ |
| 49 | rtsc_frequency_} { | 104 | rtsc_frequency_} { |
| 50 | _mm_mfence(); | 105 | _mm_mfence(); |
| 51 | last_measure = __rdtsc(); | 106 | time_point.inner.last_measure = __rdtsc(); |
| 52 | accumulated_ticks = 0U; | 107 | time_point.inner.accumulated_ticks = 0U; |
| 108 | ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); | ||
| 109 | us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); | ||
| 110 | ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); | ||
| 111 | clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); | ||
| 112 | cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); | ||
| 53 | } | 113 | } |
| 54 | 114 | ||
| 55 | u64 NativeClock::GetRTSC() { | 115 | u64 NativeClock::GetRTSC() { |
| 56 | std::scoped_lock scope{rtsc_serialize}; | 116 | TimePoint new_time_point{}; |
| 57 | _mm_mfence(); | 117 | TimePoint current_time_point{}; |
| 58 | const u64 current_measure = __rdtsc(); | 118 | do { |
| 59 | u64 diff = current_measure - last_measure; | 119 | current_time_point.pack = time_point.pack; |
| 60 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) | 120 | _mm_mfence(); |
| 61 | if (current_measure > last_measure) { | 121 | const u64 current_measure = __rdtsc(); |
| 62 | last_measure = current_measure; | 122 | u64 diff = current_measure - current_time_point.inner.last_measure; |
| 63 | } | 123 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) |
| 64 | accumulated_ticks += diff; | 124 | new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure |
| 125 | ? current_measure | ||
| 126 | : current_time_point.inner.last_measure; | ||
| 127 | new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; | ||
| 128 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 129 | current_time_point.pack)); | ||
| 65 | /// The clock cannot be more precise than the guest timer, remove the lower bits | 130 | /// The clock cannot be more precise than the guest timer, remove the lower bits |
| 66 | return accumulated_ticks & inaccuracy_mask; | 131 | return new_time_point.inner.accumulated_ticks & inaccuracy_mask; |
| 67 | } | 132 | } |
| 68 | 133 | ||
| 69 | void NativeClock::Pause(bool is_paused) { | 134 | void NativeClock::Pause(bool is_paused) { |
| 70 | if (!is_paused) { | 135 | if (!is_paused) { |
| 71 | _mm_mfence(); | 136 | TimePoint current_time_point{}; |
| 72 | last_measure = __rdtsc(); | 137 | TimePoint new_time_point{}; |
| 138 | do { | ||
| 139 | current_time_point.pack = time_point.pack; | ||
| 140 | new_time_point.pack = current_time_point.pack; | ||
| 141 | _mm_mfence(); | ||
| 142 | new_time_point.inner.last_measure = __rdtsc(); | ||
| 143 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 144 | current_time_point.pack)); | ||
| 73 | } | 145 | } |
| 74 | } | 146 | } |
| 75 | 147 | ||
| 76 | std::chrono::nanoseconds NativeClock::GetTimeNS() { | 148 | std::chrono::nanoseconds NativeClock::GetTimeNS() { |
| 77 | const u64 rtsc_value = GetRTSC(); | 149 | const u64 rtsc_value = GetRTSC(); |
| 78 | return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; | 150 | return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; |
| 79 | } | 151 | } |
| 80 | 152 | ||
| 81 | std::chrono::microseconds NativeClock::GetTimeUS() { | 153 | std::chrono::microseconds NativeClock::GetTimeUS() { |
| 82 | const u64 rtsc_value = GetRTSC(); | 154 | const u64 rtsc_value = GetRTSC(); |
| 83 | return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; | 155 | return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; |
| 84 | } | 156 | } |
| 85 | 157 | ||
| 86 | std::chrono::milliseconds NativeClock::GetTimeMS() { | 158 | std::chrono::milliseconds NativeClock::GetTimeMS() { |
| 87 | const u64 rtsc_value = GetRTSC(); | 159 | const u64 rtsc_value = GetRTSC(); |
| 88 | return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; | 160 | return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; |
| 89 | } | 161 | } |
| 90 | 162 | ||
| 91 | u64 NativeClock::GetClockCycles() { | 163 | u64 NativeClock::GetClockCycles() { |
| 92 | const u64 rtsc_value = GetRTSC(); | 164 | const u64 rtsc_value = GetRTSC(); |
| 93 | return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); | 165 | return MultiplyHigh(rtsc_value, clock_rtsc_factor); |
| 94 | } | 166 | } |
| 95 | 167 | ||
| 96 | u64 NativeClock::GetCPUCycles() { | 168 | u64 NativeClock::GetCPUCycles() { |
| 97 | const u64 rtsc_value = GetRTSC(); | 169 | const u64 rtsc_value = GetRTSC(); |
| 98 | return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); | 170 | return MultiplyHigh(rtsc_value, cpu_rtsc_factor); |
| 99 | } | 171 | } |
| 100 | 172 | ||
| 101 | } // namespace X64 | 173 | } // namespace X64 |