summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/x64/native_clock.cpp69
-rw-r--r--src/common/x64/native_clock.h7
2 files changed, 71 insertions, 5 deletions
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
index eb8a7782f..e246432d0 100644
--- a/src/common/x64/native_clock.cpp
+++ b/src/common/x64/native_clock.cpp
@@ -2,12 +2,17 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
5#include <chrono> 6#include <chrono>
7#include <limits>
6#include <mutex> 8#include <mutex>
7#include <thread> 9#include <thread>
8 10
9#ifdef _MSC_VER 11#ifdef _MSC_VER
10#include <intrin.h> 12#include <intrin.h>
13
14#pragma intrinsic(__umulh)
15#pragma intrinsic(_udiv128)
11#else 16#else
12#include <x86intrin.h> 17#include <x86intrin.h>
13#endif 18#endif
@@ -15,6 +20,55 @@
15#include "common/uint128.h" 20#include "common/uint128.h"
16#include "common/x64/native_clock.h" 21#include "common/x64/native_clock.h"
17 22
23namespace {
24
25[[nodiscard]] u64 GetFixedPoint64Factor(u64 numerator, u64 divisor) {
26#ifdef __SIZEOF_INT128__
27 const auto base = static_cast<unsigned __int128>(numerator) << 64ULL;
28 return static_cast<u64>(base / divisor);
29#elif defined(_M_X64) || defined(_M_ARM64)
30 std::array<u64, 2> r = {0, numerator};
31 u64 remainder;
32#if _MSC_VER < 1923
33 return udiv128(r[1], r[0], divisor, &remainder);
34#else
35 return _udiv128(r[1], r[0], divisor, &remainder);
36#endif
37#else
38 // This one is bit more inaccurate.
39 return MultiplyAndDivide64(std::numeric_limits<u64>::max(), numerator, divisor);
40#endif
41}
42
43[[nodiscard]] u64 MultiplyHigh(u64 a, u64 b) {
44#ifdef __SIZEOF_INT128__
45 return (static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b)) >> 64;
46#elif defined(_M_X64) || defined(_M_ARM64)
47 return __umulh(a, b); // MSVC
48#else
49 // Generic fallback
50 const u64 a_lo = u32(a);
51 const u64 a_hi = a >> 32;
52 const u64 b_lo = u32(b);
53 const u64 b_hi = b >> 32;
54
55 const u64 a_x_b_hi = a_hi * b_hi;
56 const u64 a_x_b_mid = a_hi * b_lo;
57 const u64 b_x_a_mid = b_hi * a_lo;
58 const u64 a_x_b_lo = a_lo * b_lo;
59
60 const u64 carry_bit = (static_cast<u64>(static_cast<u32>(a_x_b_mid)) +
61 static_cast<u64>(static_cast<u32>(b_x_a_mid)) + (a_x_b_lo >> 32)) >>
62 32;
63
64 const u64 multhi = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
65
66 return multhi;
67#endif
68}
69
70} // namespace
71
18namespace Common { 72namespace Common {
19 73
20u64 EstimateRDTSCFrequency() { 74u64 EstimateRDTSCFrequency() {
@@ -50,6 +104,11 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
50 _mm_mfence(); 104 _mm_mfence();
51 last_measure = __rdtsc(); 105 last_measure = __rdtsc();
52 accumulated_ticks = 0U; 106 accumulated_ticks = 0U;
107 ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
108 us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
109 ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
110 clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency);
111 cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency);
53} 112}
54 113
55u64 NativeClock::GetRTSC() { 114u64 NativeClock::GetRTSC() {
@@ -75,27 +134,27 @@ void NativeClock::Pause(bool is_paused) {
75 134
76std::chrono::nanoseconds NativeClock::GetTimeNS() { 135std::chrono::nanoseconds NativeClock::GetTimeNS() {
77 const u64 rtsc_value = GetRTSC(); 136 const u64 rtsc_value = GetRTSC();
78 return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)}; 137 return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)};
79} 138}
80 139
81std::chrono::microseconds NativeClock::GetTimeUS() { 140std::chrono::microseconds NativeClock::GetTimeUS() {
82 const u64 rtsc_value = GetRTSC(); 141 const u64 rtsc_value = GetRTSC();
83 return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)}; 142 return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)};
84} 143}
85 144
86std::chrono::milliseconds NativeClock::GetTimeMS() { 145std::chrono::milliseconds NativeClock::GetTimeMS() {
87 const u64 rtsc_value = GetRTSC(); 146 const u64 rtsc_value = GetRTSC();
88 return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)}; 147 return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)};
89} 148}
90 149
91u64 NativeClock::GetClockCycles() { 150u64 NativeClock::GetClockCycles() {
92 const u64 rtsc_value = GetRTSC(); 151 const u64 rtsc_value = GetRTSC();
93 return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency); 152 return MultiplyHigh(rtsc_value, clock_rtsc_factor);
94} 153}
95 154
96u64 NativeClock::GetCPUCycles() { 155u64 NativeClock::GetCPUCycles() {
97 const u64 rtsc_value = GetRTSC(); 156 const u64 rtsc_value = GetRTSC();
98 return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency); 157 return MultiplyHigh(rtsc_value, cpu_rtsc_factor);
99} 158}
100 159
101} // namespace X64 160} // namespace X64
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
index 6d1e32ac8..a7b1ee9e0 100644
--- a/src/common/x64/native_clock.h
+++ b/src/common/x64/native_clock.h
@@ -41,6 +41,13 @@ private:
41 u64 last_measure{}; 41 u64 last_measure{};
42 u64 accumulated_ticks{}; 42 u64 accumulated_ticks{};
43 u64 rtsc_frequency; 43 u64 rtsc_frequency;
44
45 // factors
46 u64 ns_rtsc_factor{};
47 u64 us_rtsc_factor{};
48 u64 ms_rtsc_factor{};
49 u64 clock_rtsc_factor{};
50 u64 cpu_rtsc_factor{};
44}; 51};
45} // namespace X64 52} // namespace X64
46 53