diff options
Diffstat (limited to 'src/common')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/settings.h | 1 | ||||
| -rw-r--r-- | src/common/steady_clock.cpp | 5 | ||||
| -rw-r--r-- | src/common/wall_clock.cpp | 77 | ||||
| -rw-r--r-- | src/common/wall_clock.h | 89 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 3 | ||||
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 20 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 166 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 59 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.cpp | 39 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.h | 37 |
11 files changed, 224 insertions, 274 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) | |||
| 172 | x64/cpu_wait.h | 172 | x64/cpu_wait.h |
| 173 | x64/native_clock.cpp | 173 | x64/native_clock.cpp |
| 174 | x64/native_clock.h | 174 | x64/native_clock.h |
| 175 | x64/rdtsc.cpp | ||
| 176 | x64/rdtsc.h | ||
| 175 | x64/xbyak_abi.h | 177 | x64/xbyak_abi.h |
| 176 | x64/xbyak_util.h | 178 | x64/xbyak_util.h |
| 177 | ) | 179 | ) |
diff --git a/src/common/settings.h b/src/common/settings.h index 9682281b0..3aedf3850 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -483,6 +483,7 @@ struct Values { | |||
| 483 | AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, | 483 | AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, |
| 484 | "astc_recompression"}; | 484 | "astc_recompression"}; |
| 485 | SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; | 485 | SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; |
| 486 | SwitchableSetting<bool> barrier_feedback_loops{true, "barrier_feedback_loops"}; | ||
| 486 | 487 | ||
| 487 | SwitchableSetting<u8> bg_red{0, "bg_red"}; | 488 | SwitchableSetting<u8> bg_red{0, "bg_red"}; |
| 488 | SwitchableSetting<u8> bg_green{0, "bg_green"}; | 489 | SwitchableSetting<u8> bg_green{0, "bg_green"}; |
diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp | |||
| @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { | |||
| 28 | // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. | 28 | // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. |
| 29 | static constexpr s64 Multiplier = 100; | 29 | static constexpr s64 Multiplier = 100; |
| 30 | // Convert Windows epoch to Unix epoch. | 30 | // Convert Windows epoch to Unix epoch. |
| 31 | static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; | 31 | static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; |
| 32 | 32 | ||
| 33 | FILETIME filetime; | 33 | FILETIME filetime; |
| 34 | GetSystemTimePreciseAsFileTime(&filetime); | 34 | GetSystemTimePreciseAsFileTime(&filetime); |
| 35 | return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + | 35 | return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + |
| 36 | static_cast<s64>(filetime.dwLowDateTime)) - | 36 | static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); |
| 37 | WindowsEpochToUnixEpochNS; | ||
| 38 | } | 37 | } |
| 39 | #endif | 38 | #endif |
| 40 | 39 | ||
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp | |||
| @@ -2,88 +2,75 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/steady_clock.h" | 4 | #include "common/steady_clock.h" |
| 5 | #include "common/uint128.h" | ||
| 6 | #include "common/wall_clock.h" | 5 | #include "common/wall_clock.h" |
| 7 | 6 | ||
| 8 | #ifdef ARCHITECTURE_x86_64 | 7 | #ifdef ARCHITECTURE_x86_64 |
| 9 | #include "common/x64/cpu_detect.h" | 8 | #include "common/x64/cpu_detect.h" |
| 10 | #include "common/x64/native_clock.h" | 9 | #include "common/x64/native_clock.h" |
| 10 | #include "common/x64/rdtsc.h" | ||
| 11 | #endif | 11 | #endif |
| 12 | 12 | ||
| 13 | namespace Common { | 13 | namespace Common { |
| 14 | 14 | ||
| 15 | class StandardWallClock final : public WallClock { | 15 | class StandardWallClock final : public WallClock { |
| 16 | public: | 16 | public: |
| 17 | explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) | 17 | explicit StandardWallClock() : start_time{SteadyClock::Now()} {} |
| 18 | : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, | ||
| 19 | start_time{SteadyClock::Now()} {} | ||
| 20 | 18 | ||
| 21 | std::chrono::nanoseconds GetTimeNS() override { | 19 | std::chrono::nanoseconds GetTimeNS() const override { |
| 22 | return SteadyClock::Now() - start_time; | 20 | return SteadyClock::Now() - start_time; |
| 23 | } | 21 | } |
| 24 | 22 | ||
| 25 | std::chrono::microseconds GetTimeUS() override { | 23 | std::chrono::microseconds GetTimeUS() const override { |
| 26 | return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); | 24 | return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den); |
| 27 | } | 25 | } |
| 28 | 26 | ||
| 29 | std::chrono::milliseconds GetTimeMS() override { | 27 | std::chrono::milliseconds GetTimeMS() const override { |
| 30 | return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); | 28 | return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den); |
| 31 | } | 29 | } |
| 32 | 30 | ||
| 33 | u64 GetClockCycles() override { | 31 | u64 GetCNTPCT() const override { |
| 34 | const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); | 32 | return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; |
| 35 | return Common::Divide128On32(temp, NS_RATIO).first; | ||
| 36 | } | 33 | } |
| 37 | 34 | ||
| 38 | u64 GetCPUCycles() override { | 35 | u64 GetGPUTick() const override { |
| 39 | const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); | 36 | return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; |
| 40 | return Common::Divide128On32(temp, NS_RATIO).first; | ||
| 41 | } | 37 | } |
| 42 | 38 | ||
| 43 | void Pause([[maybe_unused]] bool is_paused) override { | 39 | u64 GetHostTicksNow() const override { |
| 44 | // Do nothing in this clock type. | 40 | return static_cast<u64>(SteadyClock::Now().time_since_epoch().count()); |
| 41 | } | ||
| 42 | |||
| 43 | u64 GetHostTicksElapsed() const override { | ||
| 44 | return static_cast<u64>(GetTimeNS().count()); | ||
| 45 | } | ||
| 46 | |||
| 47 | bool IsNative() const override { | ||
| 48 | return false; | ||
| 45 | } | 49 | } |
| 46 | 50 | ||
| 47 | private: | 51 | private: |
| 48 | SteadyClock::time_point start_time; | 52 | SteadyClock::time_point start_time; |
| 49 | }; | 53 | }; |
| 50 | 54 | ||
| 55 | std::unique_ptr<WallClock> CreateOptimalClock() { | ||
| 51 | #ifdef ARCHITECTURE_x86_64 | 56 | #ifdef ARCHITECTURE_x86_64 |
| 52 | |||
| 53 | std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||
| 54 | u64 emulated_clock_frequency) { | ||
| 55 | const auto& caps = GetCPUCaps(); | 57 | const auto& caps = GetCPUCaps(); |
| 56 | u64 rtsc_frequency = 0; | ||
| 57 | if (caps.invariant_tsc) { | ||
| 58 | rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); | ||
| 59 | } | ||
| 60 | 58 | ||
| 61 | // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: | 59 | if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { |
| 62 | // - A nanosecond | 60 | return std::make_unique<X64::NativeClock>(caps.tsc_frequency); |
| 63 | // - The emulated CPU frequency | ||
| 64 | // - The emulated clock counter frequency (CNTFRQ) | ||
| 65 | if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || | ||
| 66 | rtsc_frequency <= emulated_clock_frequency) { | ||
| 67 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, | ||
| 68 | emulated_clock_frequency); | ||
| 69 | } else { | 61 | } else { |
| 70 | return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, | 62 | // Fallback to StandardWallClock if the hardware TSC |
| 71 | rtsc_frequency); | 63 | // - Is not invariant |
| 64 | // - Is not more precise than GPUTickFreq | ||
| 65 | return std::make_unique<StandardWallClock>(); | ||
| 72 | } | 66 | } |
| 73 | } | ||
| 74 | |||
| 75 | #else | 67 | #else |
| 76 | 68 | return std::make_unique<StandardWallClock>(); | |
| 77 | std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||
| 78 | u64 emulated_clock_frequency) { | ||
| 79 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||
| 80 | } | ||
| 81 | |||
| 82 | #endif | 69 | #endif |
| 70 | } | ||
| 83 | 71 | ||
| 84 | std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | 72 | std::unique_ptr<WallClock> CreateStandardWallClock() { |
| 85 | u64 emulated_clock_frequency) { | 73 | return std::make_unique<StandardWallClock>(); |
| 86 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||
| 87 | } | 74 | } |
| 88 | 75 | ||
| 89 | } // namespace Common | 76 | } // namespace Common |
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..f45d3d8c5 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <ratio> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | 11 | ||
| @@ -12,50 +13,82 @@ namespace Common { | |||
| 12 | 13 | ||
| 13 | class WallClock { | 14 | class WallClock { |
| 14 | public: | 15 | public: |
| 15 | static constexpr u64 NS_RATIO = 1'000'000'000; | 16 | static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz |
| 16 | static constexpr u64 US_RATIO = 1'000'000; | 17 | static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz |
| 17 | static constexpr u64 MS_RATIO = 1'000; | 18 | static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz |
| 18 | 19 | ||
| 19 | virtual ~WallClock() = default; | 20 | virtual ~WallClock() = default; |
| 20 | 21 | ||
| 21 | /// Returns current wall time in nanoseconds | 22 | /// @returns The time in nanoseconds since the construction of this clock. |
| 22 | [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; | 23 | virtual std::chrono::nanoseconds GetTimeNS() const = 0; |
| 23 | 24 | ||
| 24 | /// Returns current wall time in microseconds | 25 | /// @returns The time in microseconds since the construction of this clock. |
| 25 | [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; | 26 | virtual std::chrono::microseconds GetTimeUS() const = 0; |
| 26 | 27 | ||
| 27 | /// Returns current wall time in milliseconds | 28 | /// @returns The time in milliseconds since the construction of this clock. |
| 28 | [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; | 29 | virtual std::chrono::milliseconds GetTimeMS() const = 0; |
| 29 | 30 | ||
| 30 | /// Returns current wall time in emulated clock cycles | 31 | /// @returns The guest CNTPCT ticks since the construction of this clock. |
| 31 | [[nodiscard]] virtual u64 GetClockCycles() = 0; | 32 | virtual u64 GetCNTPCT() const = 0; |
| 32 | 33 | ||
| 33 | /// Returns current wall time in emulated cpu cycles | 34 | /// @returns The guest GPU ticks since the construction of this clock. |
| 34 | [[nodiscard]] virtual u64 GetCPUCycles() = 0; | 35 | virtual u64 GetGPUTick() const = 0; |
| 35 | 36 | ||
| 36 | virtual void Pause(bool is_paused) = 0; | 37 | /// @returns The raw host timer ticks since an indeterminate epoch. |
| 38 | virtual u64 GetHostTicksNow() const = 0; | ||
| 37 | 39 | ||
| 38 | /// Tells if the wall clock, uses the host CPU's hardware clock | 40 | /// @returns The raw host timer ticks since the construction of this clock. |
| 39 | [[nodiscard]] bool IsNative() const { | 41 | virtual u64 GetHostTicksElapsed() const = 0; |
| 40 | return is_native; | 42 | |
| 43 | /// @returns Whether the clock directly uses the host's hardware clock. | ||
| 44 | virtual bool IsNative() const = 0; | ||
| 45 | |||
| 46 | static inline u64 NSToCNTPCT(u64 ns) { | ||
| 47 | return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; | ||
| 48 | } | ||
| 49 | |||
| 50 | static inline u64 NSToGPUTick(u64 ns) { | ||
| 51 | return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; | ||
| 52 | } | ||
| 53 | |||
| 54 | // Cycle Timing | ||
| 55 | |||
| 56 | static inline u64 CPUTickToNS(u64 cpu_tick) { | ||
| 57 | return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den; | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline u64 CPUTickToUS(u64 cpu_tick) { | ||
| 61 | return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den; | ||
| 62 | } | ||
| 63 | |||
| 64 | static inline u64 CPUTickToCNTPCT(u64 cpu_tick) { | ||
| 65 | return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den; | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline u64 CPUTickToGPUTick(u64 cpu_tick) { | ||
| 69 | return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; | ||
| 41 | } | 70 | } |
| 42 | 71 | ||
| 43 | protected: | 72 | protected: |
| 44 | explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) | 73 | using NsRatio = std::nano; |
| 45 | : emulated_cpu_frequency{emulated_cpu_frequency_}, | 74 | using UsRatio = std::micro; |
| 46 | emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} | 75 | using MsRatio = std::milli; |
| 76 | |||
| 77 | using NsToUsRatio = std::ratio_divide<std::nano, std::micro>; | ||
| 78 | using NsToMsRatio = std::ratio_divide<std::nano, std::milli>; | ||
| 79 | using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>; | ||
| 80 | using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>; | ||
| 47 | 81 | ||
| 48 | u64 emulated_cpu_frequency; | 82 | // Cycle Timing |
| 49 | u64 emulated_clock_frequency; | ||
| 50 | 83 | ||
| 51 | private: | 84 | using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>; |
| 52 | bool is_native; | 85 | using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>; |
| 86 | using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>; | ||
| 87 | using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>; | ||
| 53 | }; | 88 | }; |
| 54 | 89 | ||
| 55 | [[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | 90 | std::unique_ptr<WallClock> CreateOptimalClock(); |
| 56 | u64 emulated_clock_frequency); | ||
| 57 | 91 | ||
| 58 | [[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | 92 | std::unique_ptr<WallClock> CreateStandardWallClock(); |
| 59 | u64 emulated_clock_frequency); | ||
| 60 | 93 | ||
| 61 | } // namespace Common | 94 | } // namespace Common |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "common/x64/cpu_detect.h" | 16 | #include "common/x64/cpu_detect.h" |
| 17 | #include "common/x64/rdtsc.h" | ||
| 17 | 18 | ||
| 18 | #ifdef _WIN32 | 19 | #ifdef _WIN32 |
| 19 | #include <windows.h> | 20 | #include <windows.h> |
| @@ -187,6 +188,8 @@ static CPUCaps Detect() { | |||
| 187 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * | 188 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * |
| 188 | caps.tsc_crystal_ratio_numerator / | 189 | caps.tsc_crystal_ratio_numerator / |
| 189 | caps.tsc_crystal_ratio_denominator; | 190 | caps.tsc_crystal_ratio_denominator; |
| 191 | } else { | ||
| 192 | caps.tsc_frequency = X64::EstimateRDTSCFrequency(); | ||
| 190 | } | 193 | } |
| 191 | } | 194 | } |
| 192 | 195 | ||
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -9,19 +9,11 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/x64/cpu_detect.h" | 10 | #include "common/x64/cpu_detect.h" |
| 11 | #include "common/x64/cpu_wait.h" | 11 | #include "common/x64/cpu_wait.h" |
| 12 | #include "common/x64/rdtsc.h" | ||
| 12 | 13 | ||
| 13 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 14 | 15 | ||
| 15 | #ifdef _MSC_VER | 16 | #ifdef _MSC_VER |
| 16 | __forceinline static u64 FencedRDTSC() { | ||
| 17 | _mm_lfence(); | ||
| 18 | _ReadWriteBarrier(); | ||
| 19 | const u64 result = __rdtsc(); | ||
| 20 | _mm_lfence(); | ||
| 21 | _ReadWriteBarrier(); | ||
| 22 | return result; | ||
| 23 | } | ||
| 24 | |||
| 25 | __forceinline static void TPAUSE() { | 17 | __forceinline static void TPAUSE() { |
| 26 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. |
| 27 | // For reference: | 19 | // For reference: |
| @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { | |||
| 32 | _tpause(0, FencedRDTSC() + PauseCycles); | 24 | _tpause(0, FencedRDTSC() + PauseCycles); |
| 33 | } | 25 | } |
| 34 | #else | 26 | #else |
| 35 | static u64 FencedRDTSC() { | ||
| 36 | u64 eax; | ||
| 37 | u64 edx; | ||
| 38 | asm volatile("lfence\n\t" | ||
| 39 | "rdtsc\n\t" | ||
| 40 | "lfence\n\t" | ||
| 41 | : "=a"(eax), "=d"(edx)); | ||
| 42 | return (edx << 32) | eax; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void TPAUSE() { | 27 | static void TPAUSE() { |
| 46 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 28 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. |
| 47 | // For reference: | 29 | // For reference: |
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp | |||
| @@ -1,164 +1,50 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <array> | ||
| 5 | #include <chrono> | ||
| 6 | #include <thread> | ||
| 7 | |||
| 8 | #include "common/atomic_ops.h" | ||
| 9 | #include "common/steady_clock.h" | ||
| 10 | #include "common/uint128.h" | 4 | #include "common/uint128.h" |
| 11 | #include "common/x64/native_clock.h" | 5 | #include "common/x64/native_clock.h" |
| 6 | #include "common/x64/rdtsc.h" | ||
| 12 | 7 | ||
| 13 | #ifdef _MSC_VER | 8 | namespace Common::X64 { |
| 14 | #include <intrin.h> | ||
| 15 | #endif | ||
| 16 | |||
| 17 | namespace Common { | ||
| 18 | 9 | ||
| 19 | #ifdef _MSC_VER | 10 | NativeClock::NativeClock(u64 rdtsc_frequency_) |
| 20 | __forceinline static u64 FencedRDTSC() { | 11 | : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, |
| 21 | _mm_lfence(); | 12 | ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, |
| 22 | _ReadWriteBarrier(); | 13 | us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, |
| 23 | const u64 result = __rdtsc(); | 14 | ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, |
| 24 | _mm_lfence(); | 15 | cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, |
| 25 | _ReadWriteBarrier(); | 16 | gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} |
| 26 | return result; | ||
| 27 | } | ||
| 28 | #else | ||
| 29 | static u64 FencedRDTSC() { | ||
| 30 | u64 eax; | ||
| 31 | u64 edx; | ||
| 32 | asm volatile("lfence\n\t" | ||
| 33 | "rdtsc\n\t" | ||
| 34 | "lfence\n\t" | ||
| 35 | : "=a"(eax), "=d"(edx)); | ||
| 36 | return (edx << 32) | eax; | ||
| 37 | } | ||
| 38 | #endif | ||
| 39 | 17 | ||
| 40 | template <u64 Nearest> | 18 | std::chrono::nanoseconds NativeClock::GetTimeNS() const { |
| 41 | static u64 RoundToNearest(u64 value) { | 19 | return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; |
| 42 | const auto mod = value % Nearest; | ||
| 43 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 44 | } | 20 | } |
| 45 | 21 | ||
| 46 | u64 EstimateRDTSCFrequency() { | 22 | std::chrono::microseconds NativeClock::GetTimeUS() const { |
| 47 | // Discard the first result measuring the rdtsc. | 23 | return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; |
| 48 | FencedRDTSC(); | ||
| 49 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 50 | FencedRDTSC(); | ||
| 51 | |||
| 52 | // Get the current time. | ||
| 53 | const auto start_time = Common::RealTimeClock::Now(); | ||
| 54 | const u64 tsc_start = FencedRDTSC(); | ||
| 55 | // Wait for 250 milliseconds. | ||
| 56 | std::this_thread::sleep_for(std::chrono::milliseconds{250}); | ||
| 57 | const auto end_time = Common::RealTimeClock::Now(); | ||
| 58 | const u64 tsc_end = FencedRDTSC(); | ||
| 59 | // Calculate differences. | ||
| 60 | const u64 timer_diff = static_cast<u64>( | ||
| 61 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 62 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 63 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 64 | return RoundToNearest<1000>(tsc_freq); | ||
| 65 | } | 24 | } |
| 66 | 25 | ||
| 67 | namespace X64 { | 26 | std::chrono::milliseconds NativeClock::GetTimeMS() const { |
| 68 | NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, | 27 | return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; |
| 69 | u64 rtsc_frequency_) | ||
| 70 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ | ||
| 71 | rtsc_frequency_} { | ||
| 72 | // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. | ||
| 73 | time_sync_thread = std::jthread{[this](std::stop_token token) { | ||
| 74 | // Get the current time. | ||
| 75 | const auto start_time = Common::RealTimeClock::Now(); | ||
| 76 | const u64 tsc_start = FencedRDTSC(); | ||
| 77 | // Wait for 10 seconds. | ||
| 78 | if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { | ||
| 79 | return; | ||
| 80 | } | ||
| 81 | const auto end_time = Common::RealTimeClock::Now(); | ||
| 82 | const u64 tsc_end = FencedRDTSC(); | ||
| 83 | // Calculate differences. | ||
| 84 | const u64 timer_diff = static_cast<u64>( | ||
| 85 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 86 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 87 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 88 | rtsc_frequency = tsc_freq; | ||
| 89 | CalculateAndSetFactors(); | ||
| 90 | }}; | ||
| 91 | |||
| 92 | time_point.inner.last_measure = FencedRDTSC(); | ||
| 93 | time_point.inner.accumulated_ticks = 0U; | ||
| 94 | CalculateAndSetFactors(); | ||
| 95 | } | 28 | } |
| 96 | 29 | ||
| 97 | u64 NativeClock::GetRTSC() { | 30 | u64 NativeClock::GetCNTPCT() const { |
| 98 | TimePoint new_time_point{}; | 31 | return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); |
| 99 | TimePoint current_time_point{}; | ||
| 100 | |||
| 101 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 102 | do { | ||
| 103 | const u64 current_measure = FencedRDTSC(); | ||
| 104 | u64 diff = current_measure - current_time_point.inner.last_measure; | ||
| 105 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) | ||
| 106 | new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure | ||
| 107 | ? current_measure | ||
| 108 | : current_time_point.inner.last_measure; | ||
| 109 | new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; | ||
| 110 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 111 | current_time_point.pack, current_time_point.pack)); | ||
| 112 | return new_time_point.inner.accumulated_ticks; | ||
| 113 | } | 32 | } |
| 114 | 33 | ||
| 115 | void NativeClock::Pause(bool is_paused) { | 34 | u64 NativeClock::GetGPUTick() const { |
| 116 | if (!is_paused) { | 35 | return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); |
| 117 | TimePoint current_time_point{}; | ||
| 118 | TimePoint new_time_point{}; | ||
| 119 | |||
| 120 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 121 | do { | ||
| 122 | new_time_point.pack = current_time_point.pack; | ||
| 123 | new_time_point.inner.last_measure = FencedRDTSC(); | ||
| 124 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 125 | current_time_point.pack, current_time_point.pack)); | ||
| 126 | } | ||
| 127 | } | 36 | } |
| 128 | 37 | ||
| 129 | std::chrono::nanoseconds NativeClock::GetTimeNS() { | 38 | u64 NativeClock::GetHostTicksNow() const { |
| 130 | const u64 rtsc_value = GetRTSC(); | 39 | return FencedRDTSC(); |
| 131 | return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; | ||
| 132 | } | 40 | } |
| 133 | 41 | ||
| 134 | std::chrono::microseconds NativeClock::GetTimeUS() { | 42 | u64 NativeClock::GetHostTicksElapsed() const { |
| 135 | const u64 rtsc_value = GetRTSC(); | 43 | return FencedRDTSC() - start_ticks; |
| 136 | return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; | ||
| 137 | } | 44 | } |
| 138 | 45 | ||
| 139 | std::chrono::milliseconds NativeClock::GetTimeMS() { | 46 | bool NativeClock::IsNative() const { |
| 140 | const u64 rtsc_value = GetRTSC(); | 47 | return true; |
| 141 | return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; | ||
| 142 | } | 48 | } |
| 143 | 49 | ||
| 144 | u64 NativeClock::GetClockCycles() { | 50 | } // namespace Common::X64 |
| 145 | const u64 rtsc_value = GetRTSC(); | ||
| 146 | return MultiplyHigh(rtsc_value, clock_rtsc_factor); | ||
| 147 | } | ||
| 148 | |||
| 149 | u64 NativeClock::GetCPUCycles() { | ||
| 150 | const u64 rtsc_value = GetRTSC(); | ||
| 151 | return MultiplyHigh(rtsc_value, cpu_rtsc_factor); | ||
| 152 | } | ||
| 153 | |||
| 154 | void NativeClock::CalculateAndSetFactors() { | ||
| 155 | ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); | ||
| 156 | us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); | ||
| 157 | ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); | ||
| 158 | clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); | ||
| 159 | cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); | ||
| 160 | } | ||
| 161 | |||
| 162 | } // namespace X64 | ||
| 163 | |||
| 164 | } // namespace Common | ||
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h | |||
| @@ -3,58 +3,39 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "common/polyfill_thread.h" | ||
| 7 | #include "common/wall_clock.h" | 6 | #include "common/wall_clock.h" |
| 8 | 7 | ||
| 9 | namespace Common { | 8 | namespace Common::X64 { |
| 10 | 9 | ||
| 11 | namespace X64 { | ||
| 12 | class NativeClock final : public WallClock { | 10 | class NativeClock final : public WallClock { |
| 13 | public: | 11 | public: |
| 14 | explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, | 12 | explicit NativeClock(u64 rdtsc_frequency_); |
| 15 | u64 rtsc_frequency_); | ||
| 16 | 13 | ||
| 17 | std::chrono::nanoseconds GetTimeNS() override; | 14 | std::chrono::nanoseconds GetTimeNS() const override; |
| 18 | 15 | ||
| 19 | std::chrono::microseconds GetTimeUS() override; | 16 | std::chrono::microseconds GetTimeUS() const override; |
| 20 | 17 | ||
| 21 | std::chrono::milliseconds GetTimeMS() override; | 18 | std::chrono::milliseconds GetTimeMS() const override; |
| 22 | 19 | ||
| 23 | u64 GetClockCycles() override; | 20 | u64 GetCNTPCT() const override; |
| 24 | 21 | ||
| 25 | u64 GetCPUCycles() override; | 22 | u64 GetGPUTick() const override; |
| 26 | 23 | ||
| 27 | void Pause(bool is_paused) override; | 24 | u64 GetHostTicksNow() const override; |
| 28 | 25 | ||
| 29 | private: | 26 | u64 GetHostTicksElapsed() const override; |
| 30 | u64 GetRTSC(); | ||
| 31 | |||
| 32 | void CalculateAndSetFactors(); | ||
| 33 | |||
| 34 | union alignas(16) TimePoint { | ||
| 35 | TimePoint() : pack{} {} | ||
| 36 | u128 pack{}; | ||
| 37 | struct Inner { | ||
| 38 | u64 last_measure{}; | ||
| 39 | u64 accumulated_ticks{}; | ||
| 40 | } inner; | ||
| 41 | }; | ||
| 42 | |||
| 43 | TimePoint time_point; | ||
| 44 | 27 | ||
| 45 | // factors | 28 | bool IsNative() const override; |
| 46 | u64 clock_rtsc_factor{}; | ||
| 47 | u64 cpu_rtsc_factor{}; | ||
| 48 | u64 ns_rtsc_factor{}; | ||
| 49 | u64 us_rtsc_factor{}; | ||
| 50 | u64 ms_rtsc_factor{}; | ||
| 51 | 29 | ||
| 52 | u64 rtsc_frequency; | 30 | private: |
| 53 | 31 | u64 start_ticks; | |
| 54 | std::jthread time_sync_thread; | 32 | u64 rdtsc_frequency; |
| 33 | |||
| 34 | u64 ns_rdtsc_factor; | ||
| 35 | u64 us_rdtsc_factor; | ||
| 36 | u64 ms_rdtsc_factor; | ||
| 37 | u64 cntpct_rdtsc_factor; | ||
| 38 | u64 gputick_rdtsc_factor; | ||
| 55 | }; | 39 | }; |
| 56 | } // namespace X64 | ||
| 57 | |||
| 58 | u64 EstimateRDTSCFrequency(); | ||
| 59 | 40 | ||
| 60 | } // namespace Common | 41 | } // namespace Common::X64 |
diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <thread> | ||
| 5 | |||
| 6 | #include "common/steady_clock.h" | ||
| 7 | #include "common/uint128.h" | ||
| 8 | #include "common/x64/rdtsc.h" | ||
| 9 | |||
| 10 | namespace Common::X64 { | ||
| 11 | |||
| 12 | template <u64 Nearest> | ||
| 13 | static u64 RoundToNearest(u64 value) { | ||
| 14 | const auto mod = value % Nearest; | ||
| 15 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 16 | } | ||
| 17 | |||
| 18 | u64 EstimateRDTSCFrequency() { | ||
| 19 | // Discard the first result measuring the rdtsc. | ||
| 20 | FencedRDTSC(); | ||
| 21 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 22 | FencedRDTSC(); | ||
| 23 | |||
| 24 | // Get the current time. | ||
| 25 | const auto start_time = RealTimeClock::Now(); | ||
| 26 | const u64 tsc_start = FencedRDTSC(); | ||
| 27 | // Wait for 100 milliseconds. | ||
| 28 | std::this_thread::sleep_for(std::chrono::milliseconds{100}); | ||
| 29 | const auto end_time = RealTimeClock::Now(); | ||
| 30 | const u64 tsc_end = FencedRDTSC(); | ||
| 31 | // Calculate differences. | ||
| 32 | const u64 timer_diff = static_cast<u64>( | ||
| 33 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 34 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 35 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 36 | return RoundToNearest<100'000>(tsc_freq); | ||
| 37 | } | ||
| 38 | |||
| 39 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #ifdef _MSC_VER | ||
| 7 | #include <intrin.h> | ||
| 8 | #endif | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | #ifdef _MSC_VER | ||
| 15 | __forceinline static u64 FencedRDTSC() { | ||
| 16 | _mm_lfence(); | ||
| 17 | _ReadWriteBarrier(); | ||
| 18 | const u64 result = __rdtsc(); | ||
| 19 | _mm_lfence(); | ||
| 20 | _ReadWriteBarrier(); | ||
| 21 | return result; | ||
| 22 | } | ||
| 23 | #else | ||
| 24 | static inline u64 FencedRDTSC() { | ||
| 25 | u64 eax; | ||
| 26 | u64 edx; | ||
| 27 | asm volatile("lfence\n\t" | ||
| 28 | "rdtsc\n\t" | ||
| 29 | "lfence\n\t" | ||
| 30 | : "=a"(eax), "=d"(edx)); | ||
| 31 | return (edx << 32) | eax; | ||
| 32 | } | ||
| 33 | #endif | ||
| 34 | |||
| 35 | u64 EstimateRDTSCFrequency(); | ||
| 36 | |||
| 37 | } // namespace Common::X64 | ||