summaryrefslogtreecommitdiff
path: root/src/common/x64/native_clock.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/x64/native_clock.cpp')
-rw-r--r--src/common/x64/native_clock.cpp110
1 files changed, 91 insertions, 19 deletions
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index eb8a7782f..a65f6b832 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -2,19 +2,74 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <chrono> 6#include <chrono>
7#include <limits>
6#include <mutex> 8#include <mutex>
7#include <thread> 9#include <thread>
8 10
9#ifdef _MSC_VER 11#ifdef _MSC_VER
10#include <intrin.h> 12#include <intrin.h>
13
14#pragma intrinsic(__umulh)
15#pragma intrinsic(_udiv128)
11#else 16#else
12#include <x86intrin.h> 17#include <x86intrin.h>
13#endif 18#endif
14 19
20#include "common/atomic_ops.h"
15#include "common/uint128.h" 21#include "common/uint128.h"
16#include "common/x64/native_clock.h" 22#include "common/x64/native_clock.h"
17 23
24namespace {
25
26[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
27#ifdef __SIZEOF_INT128__
28 const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
29 return static_cast<u64>(base / divisor);
30#elif defined(_M_X64) || defined(_M_ARM64)
31 std::array<u64, 2> r = {0, numerator};
32 u64 remainder;
33#if _MSC_VER < 1923
34 return udiv128(r[1], r[0], divisor, &remainder);
35#else
36 return _udiv128(r[1], r[0], divisor, &remainder);
37#endif
38#else
39 // This one is bit more inaccurate.
40 return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
41#endif
42}
43
44[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
45#ifdef __SIZEOF_INT128__
46 return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
47#elif defined(_M_X64) || defined(_M_ARM64)
48 return __umulh(a, b); // MSVC
49#else
50 // Generic fallback
51 const u64 a_lo = u32(a);
52 const u64 a_hi = a >> 32;
53 const u64 b_lo = u32(b);
54 const u64 b_hi = b >> 32;
55
56 const u64 a_x_b_hi = a_hi * b_hi;
57 const u64 a_x_b_mid = a_hi * b_lo;
58 const u64 b_x_a_mid = b_hi * a_lo;
59 const u64 a_x_b_lo = a_lo * b_lo;
60
61 const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
62 static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
63 32;
64
65 const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
66
67 return multhi;
68#endif
69}
70
71} // namespace
72
18namespace Common { 73namespace Common {
19 74
20u64 EstimateRDTSCFrequency() { 75u64 EstimateRDTSCFrequency() {
@@ -48,54 +103,71 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
48 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ 103 : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
49 rtsc_frequency_} { 104 rtsc_frequency_} {
50 _mm_mfence(); 105 _mm_mfence();
51 last_measure = __rdtsc(); 106 time_point.inner.last_measure = __rdtsc();
52 accumulated_ticks = 0U; 107 time_point.inner.accumulated_ticks = 0U;
108 ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
109 us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
110 ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
111 clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
112 cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
53} 113}
54 114
55u64 NativeClock::GetRTSC() { 115u64 NativeClock::GetRTSC() {
56 std::scoped_lock scope{rtsc_serialize}; 116 TimePoint new_time_point{};
57 _mm_mfence(); 117 TimePoint current_time_point{};
58 const u64 current_measure = __rdtsc(); 118 do {
59 u64 diff = current_measure - last_measure; 119 current_time_point.pack = time_point.pack;
60 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) 120 _mm_mfence();
61 if (current_measure > last_measure) { 121 const u64 current_measure = __rdtsc();
62 last_measure = current_measure; 122 u64 diff = current_measure - current_time_point.inner.last_measure;
63 } 123 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
64 accumulated_ticks += diff; 124 new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
125 ? current_measure
126 : current_time_point.inner.last_measure;
127 new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
128 } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
129 current_time_point.pack));
65 /// The clock cannot be more precise than the guest timer, remove the lower bits 130 /// The clock cannot be more precise than the guest timer, remove the lower bits
66 return accumulated_ticks & inaccuracy_mask; 131 return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
67} 132}
68 133
69void NativeClock::Pause(bool is_paused) { 134void NativeClock::Pause(bool is_paused) {
70 if (!is_paused) { 135 if (!is_paused) {
71 _mm_mfence(); 136 TimePoint current_time_point{};
72 last_measure = __rdtsc(); 137 TimePoint new_time_point{};
138 do {
139 current_time_point.pack = time_point.pack;
140 new_time_point.pack = current_time_point.pack;
141 _mm_mfence();
142 new_time_point.inner.last_measure = __rdtsc();
143 } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
144 current_time_point.pack));
73 } 145 }
74} 146}
75 147
76std::chrono::nanoseconds NativeClock::GetTimeNS() { 148std::chrono::nanoseconds NativeClock::GetTimeNS() {
77 const u64 rtsc_value = GetRTSC(); 149 const u64 rtsc_value = GetRTSC();
78 return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; 150 return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
79} 151}
80 152
81std::chrono::microseconds NativeClock::GetTimeUS() { 153std::chrono::microseconds NativeClock::GetTimeUS() {
82 const u64 rtsc_value = GetRTSC(); 154 const u64 rtsc_value = GetRTSC();
83 return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; 155 return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
84} 156}
85 157
86std::chrono::milliseconds NativeClock::GetTimeMS() { 158std::chrono::milliseconds NativeClock::GetTimeMS() {
87 const u64 rtsc_value = GetRTSC(); 159 const u64 rtsc_value = GetRTSC();
88 return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; 160 return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
89} 161}
90 162
91u64 NativeClock::GetClockCycles() { 163u64 NativeClock::GetClockCycles() {
92 const u64 rtsc_value = GetRTSC(); 164 const u64 rtsc_value = GetRTSC();
93 return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); 165 return MultiplyHigh(rtsc_value, clock_rtsc_factor);
94} 166}
95 167
96u64 NativeClock::GetCPUCycles() { 168u64 NativeClock::GetCPUCycles() {
97 const u64 rtsc_value = GetRTSC(); 169 const u64 rtsc_value = GetRTSC();
98 return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); 170 return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
99} 171}
100 172
101} // namespace X64 173} // namespace X64