diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/x64/native_clock.cpp | 69 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 7 |
2 files changed, 71 insertions, 5 deletions
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index eb8a7782f..e246432d0 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp | |||
| @@ -2,12 +2,17 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 5 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <limits> | ||
| 6 | #include <mutex> | 8 | #include <mutex> |
| 7 | #include <thread> | 9 | #include <thread> |
| 8 | 10 | ||
| 9 | #ifdef _MSC_VER | 11 | #ifdef _MSC_VER |
| 10 | #include <intrin.h> | 12 | #include <intrin.h> |
| 13 | |||
| 14 | #pragma intrinsic(__umulh) | ||
| 15 | #pragma intrinsic(_udiv128) | ||
| 11 | #else | 16 | #else |
| 12 | #include <x86intrin.h> | 17 | #include <x86intrin.h> |
| 13 | #endif | 18 | #endif |
| @@ -15,6 +20,55 @@ | |||
| 15 | #include "common/uint128.h" | 20 | #include "common/uint128.h" |
| 16 | #include "common/x64/native_clock.h" | 21 | #include "common/x64/native_clock.h" |
| 17 | 22 | ||
| 23 | namespace { | ||
| 24 | |||
| 25 | [[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) { | ||
| 26 | #ifdef __SIZEOF_INT128__ | ||
| 27 | const auto base = static_cast<unsigned __int128>(numerator) << 64ULL; | ||
| 28 | return static_cast<u64>(base / divisor); | ||
| 29 | #elif defined(_M_X64) || defined(_M_ARM64) | ||
| 30 | std::array<u64, 2> r = {0, numerator}; | ||
| 31 | u64 remainder; | ||
| 32 | #if _MSC_VER < 1923 | ||
| 33 | return udiv128(r[1], r[0], divisor, &remainder); | ||
| 34 | #else | ||
| 35 | return _udiv128(r[1], r[0], divisor, &remainder); | ||
| 36 | #endif | ||
| 37 | #else | ||
| 38 | // This one is bit more inaccurate. | ||
| 39 | return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor); | ||
| 40 | #endif | ||
| 41 | } | ||
| 42 | |||
| 43 | [[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) { | ||
| 44 | #ifdef __SIZEOF_INT128__ | ||
| 45 | return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64; | ||
| 46 | #elif defined(_M_X64) || defined(_M_ARM64) | ||
| 47 | return __umulh(a, b); // MSVC | ||
| 48 | #else | ||
| 49 | // Generic fallback | ||
| 50 | const u64 a_lo = u32(a); | ||
| 51 | const u64 a_hi = a >> 32; | ||
| 52 | const u64 b_lo = u32(b); | ||
| 53 | const u64 b_hi = b >> 32; | ||
| 54 | |||
| 55 | const u64 a_x_b_hi = a_hi * b_hi; | ||
| 56 | const u64 a_x_b_mid = a_hi * b_lo; | ||
| 57 | const u64 b_x_a_mid = b_hi * a_lo; | ||
| 58 | const u64 a_x_b_lo = a_lo * b_lo; | ||
| 59 | |||
| 60 | const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) + | ||
| 61 | static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >> | ||
| 62 | 32; | ||
| 63 | |||
| 64 | const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; | ||
| 65 | |||
| 66 | return multhi; | ||
| 67 | #endif | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace | ||
| 71 | |||
| 18 | namespace Common { | 72 | namespace Common { |
| 19 | 73 | ||
| 20 | u64 EstimateRDTSCFrequency() { | 74 | u64 EstimateRDTSCFrequency() { |
| @@ -50,6 +104,11 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen | |||
| 50 | _mm_mfence(); | 104 | _mm_mfence(); |
| 51 | last_measure = __rdtsc(); | 105 | last_measure = __rdtsc(); |
| 52 | accumulated_ticks = 0U; | 106 | accumulated_ticks = 0U; |
| 107 | ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); | ||
| 108 | us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); | ||
| 109 | ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); | ||
| 110 | clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); | ||
| 111 | cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); | ||
| 53 | } | 112 | } |
| 54 | 113 | ||
| 55 | u64 NativeClock::GetRTSC() { | 114 | u64 NativeClock::GetRTSC() { |
| @@ -75,27 +134,27 @@ void NativeClock::Pause(bool is_paused) { | |||
| 75 | 134 | ||
| 76 | std::chrono::nanoseconds NativeClock::GetTimeNS() { | 135 | std::chrono::nanoseconds NativeClock::GetTimeNS() { |
| 77 | const u64 rtsc_value = GetRTSC(); | 136 | const u64 rtsc_value = GetRTSC(); |
| 78 | return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; | 137 | return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; |
| 79 | } | 138 | } |
| 80 | 139 | ||
| 81 | std::chrono::microseconds NativeClock::GetTimeUS() { | 140 | std::chrono::microseconds NativeClock::GetTimeUS() { |
| 82 | const u64 rtsc_value = GetRTSC(); | 141 | const u64 rtsc_value = GetRTSC(); |
| 83 | return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; | 142 | return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; |
| 84 | } | 143 | } |
| 85 | 144 | ||
| 86 | std::chrono::milliseconds NativeClock::GetTimeMS() { | 145 | std::chrono::milliseconds NativeClock::GetTimeMS() { |
| 87 | const u64 rtsc_value = GetRTSC(); | 146 | const u64 rtsc_value = GetRTSC(); |
| 88 | return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; | 147 | return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; |
| 89 | } | 148 | } |
| 90 | 149 | ||
| 91 | u64 NativeClock::GetClockCycles() { | 150 | u64 NativeClock::GetClockCycles() { |
| 92 | const u64 rtsc_value = GetRTSC(); | 151 | const u64 rtsc_value = GetRTSC(); |
| 93 | return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); | 152 | return MultiplyHigh(rtsc_value, clock_rtsc_factor); |
| 94 | } | 153 | } |
| 95 | 154 | ||
| 96 | u64 NativeClock::GetCPUCycles() { | 155 | u64 NativeClock::GetCPUCycles() { |
| 97 | const u64 rtsc_value = GetRTSC(); | 156 | const u64 rtsc_value = GetRTSC(); |
| 98 | return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); | 157 | return MultiplyHigh(rtsc_value, cpu_rtsc_factor); |
| 99 | } | 158 | } |
| 100 | 159 | ||
| 101 | } // namespace X64 | 160 | } // namespace X64 |
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 6d1e32ac8..a7b1ee9e0 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h | |||
| @@ -41,6 +41,13 @@ private: | |||
| 41 | u64 last_measure{}; | 41 | u64 last_measure{}; |
| 42 | u64 accumulated_ticks{}; | 42 | u64 accumulated_ticks{}; |
| 43 | u64 rtsc_frequency; | 43 | u64 rtsc_frequency; |
| 44 | |||
| 45 | // factors | ||
| 46 | u64 ns_rtsc_factor{}; | ||
| 47 | u64 us_rtsc_factor{}; | ||
| 48 | u64 ms_rtsc_factor{}; | ||
| 49 | u64 clock_rtsc_factor{}; | ||
| 50 | u64 cpu_rtsc_factor{}; | ||
| 44 | }; | 51 | }; |
| 45 | } // namespace X64 | 52 | } // namespace X64 |
| 46 | 53 | ||