diff options
| author | 2023-06-21 21:12:46 -0700 | |
|---|---|---|
| committer | 2023-06-21 21:12:46 -0700 | |
| commit | e3122c5b468fd59b7eded5a3a7300643d05616bc (patch) | |
| tree | 3fe4c2c0bea83f3bb8849ef8839b9520873d41c2 /src | |
| parent | Merge pull request #10777 from liamwhite/no-barrier (diff) | |
| parent | nvdisp: Fix SingleCore frametime reporting (diff) | |
| download | yuzu-e3122c5b468fd59b7eded5a3a7300643d05616bc.tar.gz yuzu-e3122c5b468fd59b7eded5a3a7300643d05616bc.tar.xz yuzu-e3122c5b468fd59b7eded5a3a7300643d05616bc.zip | |
Merge pull request #10086 from Morph1984/coretiming-ng-1
core_timing: Use CNTPCT as the guest CPU tick
Diffstat (limited to 'src')
31 files changed, 280 insertions, 429 deletions
diff --git a/src/audio_core/renderer/adsp/adsp.cpp b/src/audio_core/renderer/adsp/adsp.cpp index 74772fc50..b1db31e93 100644 --- a/src/audio_core/renderer/adsp/adsp.cpp +++ b/src/audio_core/renderer/adsp/adsp.cpp | |||
| @@ -7,7 +7,6 @@ | |||
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 10 | #include "core/core_timing_util.h" | ||
| 11 | #include "core/memory.h" | 10 | #include "core/memory.h" |
| 12 | 11 | ||
| 13 | namespace AudioCore::AudioRenderer::ADSP { | 12 | namespace AudioCore::AudioRenderer::ADSP { |
diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp index 8bc39f9f9..9ca716b60 100644 --- a/src/audio_core/renderer/adsp/audio_renderer.cpp +++ b/src/audio_core/renderer/adsp/audio_renderer.cpp | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | #include "common/thread.h" | 13 | #include "common/thread.h" |
| 14 | #include "core/core.h" | 14 | #include "core/core.h" |
| 15 | #include "core/core_timing.h" | 15 | #include "core/core_timing.h" |
| 16 | #include "core/core_timing_util.h" | ||
| 17 | 16 | ||
| 18 | MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97)); | 17 | MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97)); |
| 19 | 18 | ||
| @@ -144,6 +143,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { | |||
| 144 | 143 | ||
| 145 | mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK); | 144 | mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK); |
| 146 | 145 | ||
| 146 | // 0.12 seconds (2304000 / 19200000) | ||
| 147 | constexpr u64 max_process_time{2'304'000ULL}; | 147 | constexpr u64 max_process_time{2'304'000ULL}; |
| 148 | 148 | ||
| 149 | while (!stop_token.stop_requested()) { | 149 | while (!stop_token.stop_requested()) { |
| @@ -184,8 +184,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { | |||
| 184 | u64 max_time{max_process_time}; | 184 | u64 max_time{max_process_time}; |
| 185 | if (index == 1 && command_buffer.applet_resource_user_id == | 185 | if (index == 1 && command_buffer.applet_resource_user_id == |
| 186 | mailbox->GetCommandBuffer(0).applet_resource_user_id) { | 186 | mailbox->GetCommandBuffer(0).applet_resource_user_id) { |
| 187 | max_time = max_process_time - | 187 | max_time = max_process_time - render_times_taken[0]; |
| 188 | Core::Timing::CyclesToNs(render_times_taken[0]).count(); | ||
| 189 | if (render_times_taken[0] > max_process_time) { | 188 | if (render_times_taken[0] > max_process_time) { |
| 190 | max_time = 0; | 189 | max_time = 0; |
| 191 | } | 190 | } |
diff --git a/src/audio_core/renderer/adsp/command_list_processor.cpp b/src/audio_core/renderer/adsp/command_list_processor.cpp index 7a300d216..3a0f1ae38 100644 --- a/src/audio_core/renderer/adsp/command_list_processor.cpp +++ b/src/audio_core/renderer/adsp/command_list_processor.cpp | |||
| @@ -9,7 +9,6 @@ | |||
| 9 | #include "common/settings.h" | 9 | #include "common/settings.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/core_timing.h" | 11 | #include "core/core_timing.h" |
| 12 | #include "core/core_timing_util.h" | ||
| 13 | #include "core/memory.h" | 12 | #include "core/memory.h" |
| 14 | 13 | ||
| 15 | namespace AudioCore::AudioRenderer::ADSP { | 14 | namespace AudioCore::AudioRenderer::ADSP { |
diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp index 985958b03..4a881547f 100644 --- a/src/audio_core/renderer/command/performance/performance.cpp +++ b/src/audio_core/renderer/command/performance/performance.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include "audio_core/renderer/command/performance/performance.h" | 5 | #include "audio_core/renderer/command/performance/performance.h" |
| 6 | #include "core/core.h" | 6 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 7 | #include "core/core_timing.h" |
| 8 | #include "core/core_timing_util.h" | ||
| 9 | 8 | ||
| 10 | namespace AudioCore::AudioRenderer { | 9 | namespace AudioCore::AudioRenderer { |
| 11 | 10 | ||
| @@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) { | |||
| 18 | auto base{entry_address.translated_address}; | 17 | auto base{entry_address.translated_address}; |
| 19 | if (state == PerformanceState::Start) { | 18 | if (state == PerformanceState::Start) { |
| 20 | auto start_time_ptr{reinterpret_cast<u32*>(base + entry_address.entry_start_time_offset)}; | 19 | auto start_time_ptr{reinterpret_cast<u32*>(base + entry_address.entry_start_time_offset)}; |
| 21 | *start_time_ptr = static_cast<u32>( | 20 | *start_time_ptr = |
| 22 | Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - | 21 | static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time - |
| 23 | processor.start_time - processor.current_processing_time) | 22 | processor.current_processing_time); |
| 24 | .count()); | ||
| 25 | } else if (state == PerformanceState::Stop) { | 23 | } else if (state == PerformanceState::Stop) { |
| 26 | auto processed_time_ptr{ | 24 | auto processed_time_ptr{ |
| 27 | reinterpret_cast<u32*>(base + entry_address.entry_processed_time_offset)}; | 25 | reinterpret_cast<u32*>(base + entry_address.entry_processed_time_offset)}; |
| 28 | auto entry_count_ptr{ | 26 | auto entry_count_ptr{ |
| 29 | reinterpret_cast<u32*>(base + entry_address.header_entry_count_offset)}; | 27 | reinterpret_cast<u32*>(base + entry_address.header_entry_count_offset)}; |
| 30 | 28 | ||
| 31 | *processed_time_ptr = static_cast<u32>( | 29 | *processed_time_ptr = |
| 32 | Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - | 30 | static_cast<u32>(processor.system->CoreTiming().GetClockTicks() - processor.start_time - |
| 33 | processor.start_time - processor.current_processing_time) | 31 | processor.current_processing_time); |
| 34 | .count()); | ||
| 35 | (*entry_count_ptr)++; | 32 | (*entry_count_ptr)++; |
| 36 | } | 33 | } |
| 37 | } | 34 | } |
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index f44fedfd5..9a718a9cc 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include "common/settings.h" | 15 | #include "common/settings.h" |
| 16 | #include "core/core.h" | 16 | #include "core/core.h" |
| 17 | #include "core/core_timing.h" | 17 | #include "core/core_timing.h" |
| 18 | #include "core/core_timing_util.h" | ||
| 19 | 18 | ||
| 20 | namespace AudioCore::Sink { | 19 | namespace AudioCore::Sink { |
| 21 | 20 | ||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) | |||
| 172 | x64/cpu_wait.h | 172 | x64/cpu_wait.h |
| 173 | x64/native_clock.cpp | 173 | x64/native_clock.cpp |
| 174 | x64/native_clock.h | 174 | x64/native_clock.h |
| 175 | x64/rdtsc.cpp | ||
| 176 | x64/rdtsc.h | ||
| 175 | x64/xbyak_abi.h | 177 | x64/xbyak_abi.h |
| 176 | x64/xbyak_util.h | 178 | x64/xbyak_util.h |
| 177 | ) | 179 | ) |
diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp | |||
| @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { | |||
| 28 | // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. | 28 | // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. |
| 29 | static constexpr s64 Multiplier = 100; | 29 | static constexpr s64 Multiplier = 100; |
| 30 | // Convert Windows epoch to Unix epoch. | 30 | // Convert Windows epoch to Unix epoch. |
| 31 | static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; | 31 | static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; |
| 32 | 32 | ||
| 33 | FILETIME filetime; | 33 | FILETIME filetime; |
| 34 | GetSystemTimePreciseAsFileTime(&filetime); | 34 | GetSystemTimePreciseAsFileTime(&filetime); |
| 35 | return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + | 35 | return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + |
| 36 | static_cast<s64>(filetime.dwLowDateTime)) - | 36 | static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); |
| 37 | WindowsEpochToUnixEpochNS; | ||
| 38 | } | 37 | } |
| 39 | #endif | 38 | #endif |
| 40 | 39 | ||
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp | |||
| @@ -2,88 +2,75 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/steady_clock.h" | 4 | #include "common/steady_clock.h" |
| 5 | #include "common/uint128.h" | ||
| 6 | #include "common/wall_clock.h" | 5 | #include "common/wall_clock.h" |
| 7 | 6 | ||
| 8 | #ifdef ARCHITECTURE_x86_64 | 7 | #ifdef ARCHITECTURE_x86_64 |
| 9 | #include "common/x64/cpu_detect.h" | 8 | #include "common/x64/cpu_detect.h" |
| 10 | #include "common/x64/native_clock.h" | 9 | #include "common/x64/native_clock.h" |
| 10 | #include "common/x64/rdtsc.h" | ||
| 11 | #endif | 11 | #endif |
| 12 | 12 | ||
| 13 | namespace Common { | 13 | namespace Common { |
| 14 | 14 | ||
| 15 | class StandardWallClock final : public WallClock { | 15 | class StandardWallClock final : public WallClock { |
| 16 | public: | 16 | public: |
| 17 | explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) | 17 | explicit StandardWallClock() : start_time{SteadyClock::Now()} {} |
| 18 | : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, | ||
| 19 | start_time{SteadyClock::Now()} {} | ||
| 20 | 18 | ||
| 21 | std::chrono::nanoseconds GetTimeNS() override { | 19 | std::chrono::nanoseconds GetTimeNS() const override { |
| 22 | return SteadyClock::Now() - start_time; | 20 | return SteadyClock::Now() - start_time; |
| 23 | } | 21 | } |
| 24 | 22 | ||
| 25 | std::chrono::microseconds GetTimeUS() override { | 23 | std::chrono::microseconds GetTimeUS() const override { |
| 26 | return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); | 24 | return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den); |
| 27 | } | 25 | } |
| 28 | 26 | ||
| 29 | std::chrono::milliseconds GetTimeMS() override { | 27 | std::chrono::milliseconds GetTimeMS() const override { |
| 30 | return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); | 28 | return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den); |
| 31 | } | 29 | } |
| 32 | 30 | ||
| 33 | u64 GetClockCycles() override { | 31 | u64 GetCNTPCT() const override { |
| 34 | const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); | 32 | return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; |
| 35 | return Common::Divide128On32(temp, NS_RATIO).first; | ||
| 36 | } | 33 | } |
| 37 | 34 | ||
| 38 | u64 GetCPUCycles() override { | 35 | u64 GetGPUTick() const override { |
| 39 | const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); | 36 | return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; |
| 40 | return Common::Divide128On32(temp, NS_RATIO).first; | ||
| 41 | } | 37 | } |
| 42 | 38 | ||
| 43 | void Pause([[maybe_unused]] bool is_paused) override { | 39 | u64 GetHostTicksNow() const override { |
| 44 | // Do nothing in this clock type. | 40 | return static_cast<u64>(SteadyClock::Now().time_since_epoch().count()); |
| 41 | } | ||
| 42 | |||
| 43 | u64 GetHostTicksElapsed() const override { | ||
| 44 | return static_cast<u64>(GetTimeNS().count()); | ||
| 45 | } | ||
| 46 | |||
| 47 | bool IsNative() const override { | ||
| 48 | return false; | ||
| 45 | } | 49 | } |
| 46 | 50 | ||
| 47 | private: | 51 | private: |
| 48 | SteadyClock::time_point start_time; | 52 | SteadyClock::time_point start_time; |
| 49 | }; | 53 | }; |
| 50 | 54 | ||
| 55 | std::unique_ptr<WallClock> CreateOptimalClock() { | ||
| 51 | #ifdef ARCHITECTURE_x86_64 | 56 | #ifdef ARCHITECTURE_x86_64 |
| 52 | |||
| 53 | std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||
| 54 | u64 emulated_clock_frequency) { | ||
| 55 | const auto& caps = GetCPUCaps(); | 57 | const auto& caps = GetCPUCaps(); |
| 56 | u64 rtsc_frequency = 0; | ||
| 57 | if (caps.invariant_tsc) { | ||
| 58 | rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); | ||
| 59 | } | ||
| 60 | 58 | ||
| 61 | // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: | 59 | if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { |
| 62 | // - A nanosecond | 60 | return std::make_unique<X64::NativeClock>(caps.tsc_frequency); |
| 63 | // - The emulated CPU frequency | ||
| 64 | // - The emulated clock counter frequency (CNTFRQ) | ||
| 65 | if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || | ||
| 66 | rtsc_frequency <= emulated_clock_frequency) { | ||
| 67 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, | ||
| 68 | emulated_clock_frequency); | ||
| 69 | } else { | 61 | } else { |
| 70 | return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, | 62 | // Fallback to StandardWallClock if the hardware TSC |
| 71 | rtsc_frequency); | 63 | // - Is not invariant |
| 64 | // - Is not more precise than GPUTickFreq | ||
| 65 | return std::make_unique<StandardWallClock>(); | ||
| 72 | } | 66 | } |
| 73 | } | ||
| 74 | |||
| 75 | #else | 67 | #else |
| 76 | 68 | return std::make_unique<StandardWallClock>(); | |
| 77 | std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||
| 78 | u64 emulated_clock_frequency) { | ||
| 79 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||
| 80 | } | ||
| 81 | |||
| 82 | #endif | 69 | #endif |
| 70 | } | ||
| 83 | 71 | ||
| 84 | std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | 72 | std::unique_ptr<WallClock> CreateStandardWallClock() { |
| 85 | u64 emulated_clock_frequency) { | 73 | return std::make_unique<StandardWallClock>(); |
| 86 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||
| 87 | } | 74 | } |
| 88 | 75 | ||
| 89 | } // namespace Common | 76 | } // namespace Common |
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..f45d3d8c5 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <ratio> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | 11 | ||
| @@ -12,50 +13,82 @@ namespace Common { | |||
| 12 | 13 | ||
| 13 | class WallClock { | 14 | class WallClock { |
| 14 | public: | 15 | public: |
| 15 | static constexpr u64 NS_RATIO = 1'000'000'000; | 16 | static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz |
| 16 | static constexpr u64 US_RATIO = 1'000'000; | 17 | static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz |
| 17 | static constexpr u64 MS_RATIO = 1'000; | 18 | static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz |
| 18 | 19 | ||
| 19 | virtual ~WallClock() = default; | 20 | virtual ~WallClock() = default; |
| 20 | 21 | ||
| 21 | /// Returns current wall time in nanoseconds | 22 | /// @returns The time in nanoseconds since the construction of this clock. |
| 22 | [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; | 23 | virtual std::chrono::nanoseconds GetTimeNS() const = 0; |
| 23 | 24 | ||
| 24 | /// Returns current wall time in microseconds | 25 | /// @returns The time in microseconds since the construction of this clock. |
| 25 | [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; | 26 | virtual std::chrono::microseconds GetTimeUS() const = 0; |
| 26 | 27 | ||
| 27 | /// Returns current wall time in milliseconds | 28 | /// @returns The time in milliseconds since the construction of this clock. |
| 28 | [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; | 29 | virtual std::chrono::milliseconds GetTimeMS() const = 0; |
| 29 | 30 | ||
| 30 | /// Returns current wall time in emulated clock cycles | 31 | /// @returns The guest CNTPCT ticks since the construction of this clock. |
| 31 | [[nodiscard]] virtual u64 GetClockCycles() = 0; | 32 | virtual u64 GetCNTPCT() const = 0; |
| 32 | 33 | ||
| 33 | /// Returns current wall time in emulated cpu cycles | 34 | /// @returns The guest GPU ticks since the construction of this clock. |
| 34 | [[nodiscard]] virtual u64 GetCPUCycles() = 0; | 35 | virtual u64 GetGPUTick() const = 0; |
| 35 | 36 | ||
| 36 | virtual void Pause(bool is_paused) = 0; | 37 | /// @returns The raw host timer ticks since an indeterminate epoch. |
| 38 | virtual u64 GetHostTicksNow() const = 0; | ||
| 37 | 39 | ||
| 38 | /// Tells if the wall clock, uses the host CPU's hardware clock | 40 | /// @returns The raw host timer ticks since the construction of this clock. |
| 39 | [[nodiscard]] bool IsNative() const { | 41 | virtual u64 GetHostTicksElapsed() const = 0; |
| 40 | return is_native; | 42 | |
| 43 | /// @returns Whether the clock directly uses the host's hardware clock. | ||
| 44 | virtual bool IsNative() const = 0; | ||
| 45 | |||
| 46 | static inline u64 NSToCNTPCT(u64 ns) { | ||
| 47 | return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; | ||
| 48 | } | ||
| 49 | |||
| 50 | static inline u64 NSToGPUTick(u64 ns) { | ||
| 51 | return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; | ||
| 52 | } | ||
| 53 | |||
| 54 | // Cycle Timing | ||
| 55 | |||
| 56 | static inline u64 CPUTickToNS(u64 cpu_tick) { | ||
| 57 | return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den; | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline u64 CPUTickToUS(u64 cpu_tick) { | ||
| 61 | return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den; | ||
| 62 | } | ||
| 63 | |||
| 64 | static inline u64 CPUTickToCNTPCT(u64 cpu_tick) { | ||
| 65 | return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den; | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline u64 CPUTickToGPUTick(u64 cpu_tick) { | ||
| 69 | return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; | ||
| 41 | } | 70 | } |
| 42 | 71 | ||
| 43 | protected: | 72 | protected: |
| 44 | explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) | 73 | using NsRatio = std::nano; |
| 45 | : emulated_cpu_frequency{emulated_cpu_frequency_}, | 74 | using UsRatio = std::micro; |
| 46 | emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} | 75 | using MsRatio = std::milli; |
| 76 | |||
| 77 | using NsToUsRatio = std::ratio_divide<std::nano, std::micro>; | ||
| 78 | using NsToMsRatio = std::ratio_divide<std::nano, std::milli>; | ||
| 79 | using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>; | ||
| 80 | using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>; | ||
| 47 | 81 | ||
| 48 | u64 emulated_cpu_frequency; | 82 | // Cycle Timing |
| 49 | u64 emulated_clock_frequency; | ||
| 50 | 83 | ||
| 51 | private: | 84 | using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>; |
| 52 | bool is_native; | 85 | using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>; |
| 86 | using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>; | ||
| 87 | using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>; | ||
| 53 | }; | 88 | }; |
| 54 | 89 | ||
| 55 | [[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | 90 | std::unique_ptr<WallClock> CreateOptimalClock(); |
| 56 | u64 emulated_clock_frequency); | ||
| 57 | 91 | ||
| 58 | [[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | 92 | std::unique_ptr<WallClock> CreateStandardWallClock(); |
| 59 | u64 emulated_clock_frequency); | ||
| 60 | 93 | ||
| 61 | } // namespace Common | 94 | } // namespace Common |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "common/x64/cpu_detect.h" | 16 | #include "common/x64/cpu_detect.h" |
| 17 | #include "common/x64/rdtsc.h" | ||
| 17 | 18 | ||
| 18 | #ifdef _WIN32 | 19 | #ifdef _WIN32 |
| 19 | #include <windows.h> | 20 | #include <windows.h> |
| @@ -187,6 +188,8 @@ static CPUCaps Detect() { | |||
| 187 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * | 188 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * |
| 188 | caps.tsc_crystal_ratio_numerator / | 189 | caps.tsc_crystal_ratio_numerator / |
| 189 | caps.tsc_crystal_ratio_denominator; | 190 | caps.tsc_crystal_ratio_denominator; |
| 191 | } else { | ||
| 192 | caps.tsc_frequency = X64::EstimateRDTSCFrequency(); | ||
| 190 | } | 193 | } |
| 191 | } | 194 | } |
| 192 | 195 | ||
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -9,19 +9,11 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/x64/cpu_detect.h" | 10 | #include "common/x64/cpu_detect.h" |
| 11 | #include "common/x64/cpu_wait.h" | 11 | #include "common/x64/cpu_wait.h" |
| 12 | #include "common/x64/rdtsc.h" | ||
| 12 | 13 | ||
| 13 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 14 | 15 | ||
| 15 | #ifdef _MSC_VER | 16 | #ifdef _MSC_VER |
| 16 | __forceinline static u64 FencedRDTSC() { | ||
| 17 | _mm_lfence(); | ||
| 18 | _ReadWriteBarrier(); | ||
| 19 | const u64 result = __rdtsc(); | ||
| 20 | _mm_lfence(); | ||
| 21 | _ReadWriteBarrier(); | ||
| 22 | return result; | ||
| 23 | } | ||
| 24 | |||
| 25 | __forceinline static void TPAUSE() { | 17 | __forceinline static void TPAUSE() { |
| 26 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. |
| 27 | // For reference: | 19 | // For reference: |
| @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { | |||
| 32 | _tpause(0, FencedRDTSC() + PauseCycles); | 24 | _tpause(0, FencedRDTSC() + PauseCycles); |
| 33 | } | 25 | } |
| 34 | #else | 26 | #else |
| 35 | static u64 FencedRDTSC() { | ||
| 36 | u64 eax; | ||
| 37 | u64 edx; | ||
| 38 | asm volatile("lfence\n\t" | ||
| 39 | "rdtsc\n\t" | ||
| 40 | "lfence\n\t" | ||
| 41 | : "=a"(eax), "=d"(edx)); | ||
| 42 | return (edx << 32) | eax; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void TPAUSE() { | 27 | static void TPAUSE() { |
| 46 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 28 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. |
| 47 | // For reference: | 29 | // For reference: |
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp | |||
| @@ -1,164 +1,50 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <array> | ||
| 5 | #include <chrono> | ||
| 6 | #include <thread> | ||
| 7 | |||
| 8 | #include "common/atomic_ops.h" | ||
| 9 | #include "common/steady_clock.h" | ||
| 10 | #include "common/uint128.h" | 4 | #include "common/uint128.h" |
| 11 | #include "common/x64/native_clock.h" | 5 | #include "common/x64/native_clock.h" |
| 6 | #include "common/x64/rdtsc.h" | ||
| 12 | 7 | ||
| 13 | #ifdef _MSC_VER | 8 | namespace Common::X64 { |
| 14 | #include <intrin.h> | ||
| 15 | #endif | ||
| 16 | |||
| 17 | namespace Common { | ||
| 18 | 9 | ||
| 19 | #ifdef _MSC_VER | 10 | NativeClock::NativeClock(u64 rdtsc_frequency_) |
| 20 | __forceinline static u64 FencedRDTSC() { | 11 | : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, |
| 21 | _mm_lfence(); | 12 | ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, |
| 22 | _ReadWriteBarrier(); | 13 | us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, |
| 23 | const u64 result = __rdtsc(); | 14 | ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, |
| 24 | _mm_lfence(); | 15 | cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, |
| 25 | _ReadWriteBarrier(); | 16 | gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} |
| 26 | return result; | ||
| 27 | } | ||
| 28 | #else | ||
| 29 | static u64 FencedRDTSC() { | ||
| 30 | u64 eax; | ||
| 31 | u64 edx; | ||
| 32 | asm volatile("lfence\n\t" | ||
| 33 | "rdtsc\n\t" | ||
| 34 | "lfence\n\t" | ||
| 35 | : "=a"(eax), "=d"(edx)); | ||
| 36 | return (edx << 32) | eax; | ||
| 37 | } | ||
| 38 | #endif | ||
| 39 | 17 | ||
| 40 | template <u64 Nearest> | 18 | std::chrono::nanoseconds NativeClock::GetTimeNS() const { |
| 41 | static u64 RoundToNearest(u64 value) { | 19 | return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; |
| 42 | const auto mod = value % Nearest; | ||
| 43 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 44 | } | 20 | } |
| 45 | 21 | ||
| 46 | u64 EstimateRDTSCFrequency() { | 22 | std::chrono::microseconds NativeClock::GetTimeUS() const { |
| 47 | // Discard the first result measuring the rdtsc. | 23 | return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; |
| 48 | FencedRDTSC(); | ||
| 49 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 50 | FencedRDTSC(); | ||
| 51 | |||
| 52 | // Get the current time. | ||
| 53 | const auto start_time = Common::RealTimeClock::Now(); | ||
| 54 | const u64 tsc_start = FencedRDTSC(); | ||
| 55 | // Wait for 250 milliseconds. | ||
| 56 | std::this_thread::sleep_for(std::chrono::milliseconds{250}); | ||
| 57 | const auto end_time = Common::RealTimeClock::Now(); | ||
| 58 | const u64 tsc_end = FencedRDTSC(); | ||
| 59 | // Calculate differences. | ||
| 60 | const u64 timer_diff = static_cast<u64>( | ||
| 61 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 62 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 63 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 64 | return RoundToNearest<1000>(tsc_freq); | ||
| 65 | } | 24 | } |
| 66 | 25 | ||
| 67 | namespace X64 { | 26 | std::chrono::milliseconds NativeClock::GetTimeMS() const { |
| 68 | NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, | 27 | return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; |
| 69 | u64 rtsc_frequency_) | ||
| 70 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ | ||
| 71 | rtsc_frequency_} { | ||
| 72 | // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. | ||
| 73 | time_sync_thread = std::jthread{[this](std::stop_token token) { | ||
| 74 | // Get the current time. | ||
| 75 | const auto start_time = Common::RealTimeClock::Now(); | ||
| 76 | const u64 tsc_start = FencedRDTSC(); | ||
| 77 | // Wait for 10 seconds. | ||
| 78 | if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { | ||
| 79 | return; | ||
| 80 | } | ||
| 81 | const auto end_time = Common::RealTimeClock::Now(); | ||
| 82 | const u64 tsc_end = FencedRDTSC(); | ||
| 83 | // Calculate differences. | ||
| 84 | const u64 timer_diff = static_cast<u64>( | ||
| 85 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 86 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 87 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 88 | rtsc_frequency = tsc_freq; | ||
| 89 | CalculateAndSetFactors(); | ||
| 90 | }}; | ||
| 91 | |||
| 92 | time_point.inner.last_measure = FencedRDTSC(); | ||
| 93 | time_point.inner.accumulated_ticks = 0U; | ||
| 94 | CalculateAndSetFactors(); | ||
| 95 | } | 28 | } |
| 96 | 29 | ||
| 97 | u64 NativeClock::GetRTSC() { | 30 | u64 NativeClock::GetCNTPCT() const { |
| 98 | TimePoint new_time_point{}; | 31 | return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); |
| 99 | TimePoint current_time_point{}; | ||
| 100 | |||
| 101 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 102 | do { | ||
| 103 | const u64 current_measure = FencedRDTSC(); | ||
| 104 | u64 diff = current_measure - current_time_point.inner.last_measure; | ||
| 105 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) | ||
| 106 | new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure | ||
| 107 | ? current_measure | ||
| 108 | : current_time_point.inner.last_measure; | ||
| 109 | new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; | ||
| 110 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 111 | current_time_point.pack, current_time_point.pack)); | ||
| 112 | return new_time_point.inner.accumulated_ticks; | ||
| 113 | } | 32 | } |
| 114 | 33 | ||
| 115 | void NativeClock::Pause(bool is_paused) { | 34 | u64 NativeClock::GetGPUTick() const { |
| 116 | if (!is_paused) { | 35 | return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); |
| 117 | TimePoint current_time_point{}; | ||
| 118 | TimePoint new_time_point{}; | ||
| 119 | |||
| 120 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 121 | do { | ||
| 122 | new_time_point.pack = current_time_point.pack; | ||
| 123 | new_time_point.inner.last_measure = FencedRDTSC(); | ||
| 124 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 125 | current_time_point.pack, current_time_point.pack)); | ||
| 126 | } | ||
| 127 | } | 36 | } |
| 128 | 37 | ||
| 129 | std::chrono::nanoseconds NativeClock::GetTimeNS() { | 38 | u64 NativeClock::GetHostTicksNow() const { |
| 130 | const u64 rtsc_value = GetRTSC(); | 39 | return FencedRDTSC(); |
| 131 | return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; | ||
| 132 | } | 40 | } |
| 133 | 41 | ||
| 134 | std::chrono::microseconds NativeClock::GetTimeUS() { | 42 | u64 NativeClock::GetHostTicksElapsed() const { |
| 135 | const u64 rtsc_value = GetRTSC(); | 43 | return FencedRDTSC() - start_ticks; |
| 136 | return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; | ||
| 137 | } | 44 | } |
| 138 | 45 | ||
| 139 | std::chrono::milliseconds NativeClock::GetTimeMS() { | 46 | bool NativeClock::IsNative() const { |
| 140 | const u64 rtsc_value = GetRTSC(); | 47 | return true; |
| 141 | return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; | ||
| 142 | } | 48 | } |
| 143 | 49 | ||
| 144 | u64 NativeClock::GetClockCycles() { | 50 | } // namespace Common::X64 |
| 145 | const u64 rtsc_value = GetRTSC(); | ||
| 146 | return MultiplyHigh(rtsc_value, clock_rtsc_factor); | ||
| 147 | } | ||
| 148 | |||
| 149 | u64 NativeClock::GetCPUCycles() { | ||
| 150 | const u64 rtsc_value = GetRTSC(); | ||
| 151 | return MultiplyHigh(rtsc_value, cpu_rtsc_factor); | ||
| 152 | } | ||
| 153 | |||
| 154 | void NativeClock::CalculateAndSetFactors() { | ||
| 155 | ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); | ||
| 156 | us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); | ||
| 157 | ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); | ||
| 158 | clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); | ||
| 159 | cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); | ||
| 160 | } | ||
| 161 | |||
| 162 | } // namespace X64 | ||
| 163 | |||
| 164 | } // namespace Common | ||
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h | |||
| @@ -3,58 +3,39 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "common/polyfill_thread.h" | ||
| 7 | #include "common/wall_clock.h" | 6 | #include "common/wall_clock.h" |
| 8 | 7 | ||
| 9 | namespace Common { | 8 | namespace Common::X64 { |
| 10 | 9 | ||
| 11 | namespace X64 { | ||
| 12 | class NativeClock final : public WallClock { | 10 | class NativeClock final : public WallClock { |
| 13 | public: | 11 | public: |
| 14 | explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, | 12 | explicit NativeClock(u64 rdtsc_frequency_); |
| 15 | u64 rtsc_frequency_); | ||
| 16 | 13 | ||
| 17 | std::chrono::nanoseconds GetTimeNS() override; | 14 | std::chrono::nanoseconds GetTimeNS() const override; |
| 18 | 15 | ||
| 19 | std::chrono::microseconds GetTimeUS() override; | 16 | std::chrono::microseconds GetTimeUS() const override; |
| 20 | 17 | ||
| 21 | std::chrono::milliseconds GetTimeMS() override; | 18 | std::chrono::milliseconds GetTimeMS() const override; |
| 22 | 19 | ||
| 23 | u64 GetClockCycles() override; | 20 | u64 GetCNTPCT() const override; |
| 24 | 21 | ||
| 25 | u64 GetCPUCycles() override; | 22 | u64 GetGPUTick() const override; |
| 26 | 23 | ||
| 27 | void Pause(bool is_paused) override; | 24 | u64 GetHostTicksNow() const override; |
| 28 | 25 | ||
| 29 | private: | 26 | u64 GetHostTicksElapsed() const override; |
| 30 | u64 GetRTSC(); | ||
| 31 | |||
| 32 | void CalculateAndSetFactors(); | ||
| 33 | |||
| 34 | union alignas(16) TimePoint { | ||
| 35 | TimePoint() : pack{} {} | ||
| 36 | u128 pack{}; | ||
| 37 | struct Inner { | ||
| 38 | u64 last_measure{}; | ||
| 39 | u64 accumulated_ticks{}; | ||
| 40 | } inner; | ||
| 41 | }; | ||
| 42 | |||
| 43 | TimePoint time_point; | ||
| 44 | 27 | ||
| 45 | // factors | 28 | bool IsNative() const override; |
| 46 | u64 clock_rtsc_factor{}; | ||
| 47 | u64 cpu_rtsc_factor{}; | ||
| 48 | u64 ns_rtsc_factor{}; | ||
| 49 | u64 us_rtsc_factor{}; | ||
| 50 | u64 ms_rtsc_factor{}; | ||
| 51 | 29 | ||
| 52 | u64 rtsc_frequency; | 30 | private: |
| 53 | 31 | u64 start_ticks; | |
| 54 | std::jthread time_sync_thread; | 32 | u64 rdtsc_frequency; |
| 33 | |||
| 34 | u64 ns_rdtsc_factor; | ||
| 35 | u64 us_rdtsc_factor; | ||
| 36 | u64 ms_rdtsc_factor; | ||
| 37 | u64 cntpct_rdtsc_factor; | ||
| 38 | u64 gputick_rdtsc_factor; | ||
| 55 | }; | 39 | }; |
| 56 | } // namespace X64 | ||
| 57 | |||
| 58 | u64 EstimateRDTSCFrequency(); | ||
| 59 | 40 | ||
| 60 | } // namespace Common | 41 | } // namespace Common::X64 |
diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <thread> | ||
| 5 | |||
| 6 | #include "common/steady_clock.h" | ||
| 7 | #include "common/uint128.h" | ||
| 8 | #include "common/x64/rdtsc.h" | ||
| 9 | |||
| 10 | namespace Common::X64 { | ||
| 11 | |||
| 12 | template <u64 Nearest> | ||
| 13 | static u64 RoundToNearest(u64 value) { | ||
| 14 | const auto mod = value % Nearest; | ||
| 15 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 16 | } | ||
| 17 | |||
| 18 | u64 EstimateRDTSCFrequency() { | ||
| 19 | // Discard the first result measuring the rdtsc. | ||
| 20 | FencedRDTSC(); | ||
| 21 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 22 | FencedRDTSC(); | ||
| 23 | |||
| 24 | // Get the current time. | ||
| 25 | const auto start_time = RealTimeClock::Now(); | ||
| 26 | const u64 tsc_start = FencedRDTSC(); | ||
| 27 | // Wait for 100 milliseconds. | ||
| 28 | std::this_thread::sleep_for(std::chrono::milliseconds{100}); | ||
| 29 | const auto end_time = RealTimeClock::Now(); | ||
| 30 | const u64 tsc_end = FencedRDTSC(); | ||
| 31 | // Calculate differences. | ||
| 32 | const u64 timer_diff = static_cast<u64>( | ||
| 33 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 34 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 35 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 36 | return RoundToNearest<100'000>(tsc_freq); | ||
| 37 | } | ||
| 38 | |||
| 39 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #ifdef _MSC_VER | ||
| 7 | #include <intrin.h> | ||
| 8 | #endif | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | #ifdef _MSC_VER | ||
| 15 | __forceinline static u64 FencedRDTSC() { | ||
| 16 | _mm_lfence(); | ||
| 17 | _ReadWriteBarrier(); | ||
| 18 | const u64 result = __rdtsc(); | ||
| 19 | _mm_lfence(); | ||
| 20 | _ReadWriteBarrier(); | ||
| 21 | return result; | ||
| 22 | } | ||
| 23 | #else | ||
| 24 | static inline u64 FencedRDTSC() { | ||
| 25 | u64 eax; | ||
| 26 | u64 edx; | ||
| 27 | asm volatile("lfence\n\t" | ||
| 28 | "rdtsc\n\t" | ||
| 29 | "lfence\n\t" | ||
| 30 | : "=a"(eax), "=d"(edx)); | ||
| 31 | return (edx << 32) | eax; | ||
| 32 | } | ||
| 33 | #endif | ||
| 34 | |||
| 35 | u64 EstimateRDTSCFrequency(); | ||
| 36 | |||
| 37 | } // namespace Common::X64 | ||
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 227c431bc..3655b8478 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -14,7 +14,6 @@ add_library(core STATIC | |||
| 14 | core.h | 14 | core.h |
| 15 | core_timing.cpp | 15 | core_timing.cpp |
| 16 | core_timing.h | 16 | core_timing.h |
| 17 | core_timing_util.h | ||
| 18 | cpu_manager.cpp | 17 | cpu_manager.cpp |
| 19 | cpu_manager.h | 18 | cpu_manager.h |
| 20 | crypto/aes_util.cpp | 19 | crypto/aes_util.cpp |
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 4f2692b05..4f0a3f8ea 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp | |||
| @@ -16,12 +16,11 @@ | |||
| 16 | 16 | ||
| 17 | #include "common/microprofile.h" | 17 | #include "common/microprofile.h" |
| 18 | #include "core/core_timing.h" | 18 | #include "core/core_timing.h" |
| 19 | #include "core/core_timing_util.h" | ||
| 20 | #include "core/hardware_properties.h" | 19 | #include "core/hardware_properties.h" |
| 21 | 20 | ||
| 22 | namespace Core::Timing { | 21 | namespace Core::Timing { |
| 23 | 22 | ||
| 24 | constexpr s64 MAX_SLICE_LENGTH = 4000; | 23 | constexpr s64 MAX_SLICE_LENGTH = 10000; |
| 25 | 24 | ||
| 26 | std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { | 25 | std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { |
| 27 | return std::make_shared<EventType>(std::move(callback), std::move(name)); | 26 | return std::make_shared<EventType>(std::move(callback), std::move(name)); |
| @@ -45,9 +44,7 @@ struct CoreTiming::Event { | |||
| 45 | } | 44 | } |
| 46 | }; | 45 | }; |
| 47 | 46 | ||
| 48 | CoreTiming::CoreTiming() | 47 | CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {} |
| 49 | : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, | ||
| 50 | event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} | ||
| 51 | 48 | ||
| 52 | CoreTiming::~CoreTiming() { | 49 | CoreTiming::~CoreTiming() { |
| 53 | Reset(); | 50 | Reset(); |
| @@ -68,7 +65,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) { | |||
| 68 | on_thread_init = std::move(on_thread_init_); | 65 | on_thread_init = std::move(on_thread_init_); |
| 69 | event_fifo_id = 0; | 66 | event_fifo_id = 0; |
| 70 | shutting_down = false; | 67 | shutting_down = false; |
| 71 | ticks = 0; | 68 | cpu_ticks = 0; |
| 72 | const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds) | 69 | const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds) |
| 73 | -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; | 70 | -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; |
| 74 | ev_lost = CreateEvent("_lost_event", empty_timed_callback); | 71 | ev_lost = CreateEvent("_lost_event", empty_timed_callback); |
| @@ -173,38 +170,30 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, | |||
| 173 | } | 170 | } |
| 174 | 171 | ||
| 175 | void CoreTiming::AddTicks(u64 ticks_to_add) { | 172 | void CoreTiming::AddTicks(u64 ticks_to_add) { |
| 176 | ticks += ticks_to_add; | 173 | cpu_ticks += ticks_to_add; |
| 177 | downcount -= static_cast<s64>(ticks); | 174 | downcount -= static_cast<s64>(cpu_ticks); |
| 178 | } | 175 | } |
| 179 | 176 | ||
| 180 | void CoreTiming::Idle() { | 177 | void CoreTiming::Idle() { |
| 181 | if (!event_queue.empty()) { | 178 | cpu_ticks += 1000U; |
| 182 | const u64 next_event_time = event_queue.front().time; | ||
| 183 | const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; | ||
| 184 | if (next_ticks > ticks) { | ||
| 185 | ticks = next_ticks; | ||
| 186 | } | ||
| 187 | return; | ||
| 188 | } | ||
| 189 | ticks += 1000U; | ||
| 190 | } | 179 | } |
| 191 | 180 | ||
| 192 | void CoreTiming::ResetTicks() { | 181 | void CoreTiming::ResetTicks() { |
| 193 | downcount = MAX_SLICE_LENGTH; | 182 | downcount = MAX_SLICE_LENGTH; |
| 194 | } | 183 | } |
| 195 | 184 | ||
| 196 | u64 CoreTiming::GetCPUTicks() const { | 185 | u64 CoreTiming::GetClockTicks() const { |
| 197 | if (is_multicore) [[likely]] { | 186 | if (is_multicore) [[likely]] { |
| 198 | return cpu_clock->GetCPUCycles(); | 187 | return clock->GetCNTPCT(); |
| 199 | } | 188 | } |
| 200 | return ticks; | 189 | return Common::WallClock::CPUTickToCNTPCT(cpu_ticks); |
| 201 | } | 190 | } |
| 202 | 191 | ||
| 203 | u64 CoreTiming::GetClockTicks() const { | 192 | u64 CoreTiming::GetGPUTicks() const { |
| 204 | if (is_multicore) [[likely]] { | 193 | if (is_multicore) [[likely]] { |
| 205 | return cpu_clock->GetClockCycles(); | 194 | return clock->GetGPUTick(); |
| 206 | } | 195 | } |
| 207 | return CpuCyclesToClockCycles(ticks); | 196 | return Common::WallClock::CPUTickToGPUTick(cpu_ticks); |
| 208 | } | 197 | } |
| 209 | 198 | ||
| 210 | std::optional<s64> CoreTiming::Advance() { | 199 | std::optional<s64> CoreTiming::Advance() { |
| @@ -297,9 +286,7 @@ void CoreTiming::ThreadLoop() { | |||
| 297 | } | 286 | } |
| 298 | 287 | ||
| 299 | paused_set = true; | 288 | paused_set = true; |
| 300 | event_clock->Pause(true); | ||
| 301 | pause_event.Wait(); | 289 | pause_event.Wait(); |
| 302 | event_clock->Pause(false); | ||
| 303 | } | 290 | } |
| 304 | } | 291 | } |
| 305 | 292 | ||
| @@ -315,25 +302,18 @@ void CoreTiming::Reset() { | |||
| 315 | has_started = false; | 302 | has_started = false; |
| 316 | } | 303 | } |
| 317 | 304 | ||
| 318 | std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { | ||
| 319 | if (is_multicore) [[likely]] { | ||
| 320 | return cpu_clock->GetTimeNS(); | ||
| 321 | } | ||
| 322 | return CyclesToNs(ticks); | ||
| 323 | } | ||
| 324 | |||
| 325 | std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { | 305 | std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { |
| 326 | if (is_multicore) [[likely]] { | 306 | if (is_multicore) [[likely]] { |
| 327 | return event_clock->GetTimeNS(); | 307 | return clock->GetTimeNS(); |
| 328 | } | 308 | } |
| 329 | return CyclesToNs(ticks); | 309 | return std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)}; |
| 330 | } | 310 | } |
| 331 | 311 | ||
| 332 | std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { | 312 | std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { |
| 333 | if (is_multicore) [[likely]] { | 313 | if (is_multicore) [[likely]] { |
| 334 | return event_clock->GetTimeUS(); | 314 | return clock->GetTimeUS(); |
| 335 | } | 315 | } |
| 336 | return CyclesToUs(ticks); | 316 | return std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)}; |
| 337 | } | 317 | } |
| 338 | 318 | ||
| 339 | } // namespace Core::Timing | 319 | } // namespace Core::Timing |
diff --git a/src/core/core_timing.h b/src/core/core_timing.h index e7c4a949f..10db1de55 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h | |||
| @@ -116,14 +116,11 @@ public: | |||
| 116 | return downcount; | 116 | return downcount; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | /// Returns current time in emulated CPU cycles | 119 | /// Returns the current CNTPCT tick value. |
| 120 | u64 GetCPUTicks() const; | ||
| 121 | |||
| 122 | /// Returns current time in emulated in Clock cycles | ||
| 123 | u64 GetClockTicks() const; | 120 | u64 GetClockTicks() const; |
| 124 | 121 | ||
| 125 | /// Returns current time in nanoseconds. | 122 | /// Returns the current GPU tick value. |
| 126 | std::chrono::nanoseconds GetCPUTimeNs() const; | 123 | u64 GetGPUTicks() const; |
| 127 | 124 | ||
| 128 | /// Returns current time in microseconds. | 125 | /// Returns current time in microseconds. |
| 129 | std::chrono::microseconds GetGlobalTimeUs() const; | 126 | std::chrono::microseconds GetGlobalTimeUs() const; |
| @@ -142,8 +139,7 @@ private: | |||
| 142 | 139 | ||
| 143 | void Reset(); | 140 | void Reset(); |
| 144 | 141 | ||
| 145 | std::unique_ptr<Common::WallClock> cpu_clock; | 142 | std::unique_ptr<Common::WallClock> clock; |
| 146 | std::unique_ptr<Common::WallClock> event_clock; | ||
| 147 | 143 | ||
| 148 | s64 global_timer = 0; | 144 | s64 global_timer = 0; |
| 149 | 145 | ||
| @@ -171,7 +167,7 @@ private: | |||
| 171 | s64 pause_end_time{}; | 167 | s64 pause_end_time{}; |
| 172 | 168 | ||
| 173 | /// Cycle timing | 169 | /// Cycle timing |
| 174 | u64 ticks{}; | 170 | u64 cpu_ticks{}; |
| 175 | s64 downcount{}; | 171 | s64 downcount{}; |
| 176 | }; | 172 | }; |
| 177 | 173 | ||
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h deleted file mode 100644 index fe5aaefc7..000000000 --- a/src/core/core_timing_util.h +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <chrono> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "core/hardware_properties.h" | ||
| 10 | |||
| 11 | namespace Core::Timing { | ||
| 12 | |||
| 13 | namespace detail { | ||
| 14 | constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000; | ||
| 15 | constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000; | ||
| 16 | } // namespace detail | ||
| 17 | |||
| 18 | [[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) { | ||
| 19 | return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED; | ||
| 20 | } | ||
| 21 | |||
| 22 | [[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) { | ||
| 23 | return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000; | ||
| 24 | } | ||
| 25 | |||
| 26 | [[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) { | ||
| 27 | return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000; | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) { | ||
| 31 | return static_cast<u64>(ms.count()) * detail::CNTFREQ_ADJUSTED; | ||
| 32 | } | ||
| 33 | |||
| 34 | [[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) { | ||
| 35 | return us.count() * detail::CNTFREQ_ADJUSTED / 1000; | ||
| 36 | } | ||
| 37 | |||
| 38 | [[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) { | ||
| 39 | return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000; | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) { | ||
| 43 | return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED; | ||
| 44 | } | ||
| 45 | |||
| 46 | [[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) { | ||
| 47 | return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED); | ||
| 48 | } | ||
| 49 | |||
| 50 | [[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) { | ||
| 51 | return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED); | ||
| 52 | } | ||
| 53 | |||
| 54 | [[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) { | ||
| 55 | return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Core::Timing | ||
diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index faa12b4f0..75ce5a23c 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp | |||
| @@ -184,7 +184,8 @@ u64 KScheduler::UpdateHighestPriorityThread(KThread* highest_thread) { | |||
| 184 | prev_highest_thread != highest_thread) [[likely]] { | 184 | prev_highest_thread != highest_thread) [[likely]] { |
| 185 | if (prev_highest_thread != nullptr) [[likely]] { | 185 | if (prev_highest_thread != nullptr) [[likely]] { |
| 186 | IncrementScheduledCount(prev_highest_thread); | 186 | IncrementScheduledCount(prev_highest_thread); |
| 187 | prev_highest_thread->SetLastScheduledTick(m_kernel.System().CoreTiming().GetCPUTicks()); | 187 | prev_highest_thread->SetLastScheduledTick( |
| 188 | m_kernel.System().CoreTiming().GetClockTicks()); | ||
| 188 | } | 189 | } |
| 189 | if (m_state.should_count_idle) { | 190 | if (m_state.should_count_idle) { |
| 190 | if (highest_thread != nullptr) [[likely]] { | 191 | if (highest_thread != nullptr) [[likely]] { |
| @@ -351,7 +352,7 @@ void KScheduler::SwitchThread(KThread* next_thread) { | |||
| 351 | 352 | ||
| 352 | // Update the CPU time tracking variables. | 353 | // Update the CPU time tracking variables. |
| 353 | const s64 prev_tick = m_last_context_switch_time; | 354 | const s64 prev_tick = m_last_context_switch_time; |
| 354 | const s64 cur_tick = m_kernel.System().CoreTiming().GetCPUTicks(); | 355 | const s64 cur_tick = m_kernel.System().CoreTiming().GetClockTicks(); |
| 355 | const s64 tick_diff = cur_tick - prev_tick; | 356 | const s64 tick_diff = cur_tick - prev_tick; |
| 356 | cur_thread->AddCpuTime(m_core_id, tick_diff); | 357 | cur_thread->AddCpuTime(m_core_id, tick_diff); |
| 357 | if (cur_process != nullptr) { | 358 | if (cur_process != nullptr) { |
diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp index 2b2c878b5..445cdd87b 100644 --- a/src/core/hle/kernel/svc/svc_info.cpp +++ b/src/core/hle/kernel/svc/svc_info.cpp | |||
| @@ -199,9 +199,9 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle | |||
| 199 | if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { | 199 | if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { |
| 200 | const u64 thread_ticks = current_thread->GetCpuTime(); | 200 | const u64 thread_ticks = current_thread->GetCpuTime(); |
| 201 | 201 | ||
| 202 | out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); | 202 | out_ticks = thread_ticks + (core_timing.GetClockTicks() - prev_ctx_ticks); |
| 203 | } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { | 203 | } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { |
| 204 | out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; | 204 | out_ticks = core_timing.GetClockTicks() - prev_ctx_ticks; |
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | *result = out_ticks; | 207 | *result = out_ticks; |
diff --git a/src/core/hle/kernel/svc/svc_tick.cpp b/src/core/hle/kernel/svc/svc_tick.cpp index 561336482..7dd7c6e51 100644 --- a/src/core/hle/kernel/svc/svc_tick.cpp +++ b/src/core/hle/kernel/svc/svc_tick.cpp | |||
| @@ -12,16 +12,8 @@ namespace Kernel::Svc { | |||
| 12 | int64_t GetSystemTick(Core::System& system) { | 12 | int64_t GetSystemTick(Core::System& system) { |
| 13 | LOG_TRACE(Kernel_SVC, "called"); | 13 | LOG_TRACE(Kernel_SVC, "called"); |
| 14 | 14 | ||
| 15 | auto& core_timing = system.CoreTiming(); | ||
| 16 | |||
| 17 | // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) | 15 | // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) |
| 18 | const u64 result{core_timing.GetClockTicks()}; | 16 | return static_cast<int64_t>(system.CoreTiming().GetClockTicks()); |
| 19 | |||
| 20 | if (!system.Kernel().IsMulticore()) { | ||
| 21 | core_timing.AddTicks(400U); | ||
| 22 | } | ||
| 23 | |||
| 24 | return static_cast<int64_t>(result); | ||
| 25 | } | 17 | } |
| 26 | 18 | ||
| 27 | int64_t GetSystemTick64(Core::System& system) { | 19 | int64_t GetSystemTick64(Core::System& system) { |
diff --git a/src/core/hle/service/hid/hidbus.cpp b/src/core/hle/service/hid/hidbus.cpp index 5604a6fda..80aac221b 100644 --- a/src/core/hle/service/hid/hidbus.cpp +++ b/src/core/hle/service/hid/hidbus.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include "common/settings.h" | 5 | #include "common/settings.h" |
| 6 | #include "core/core.h" | 6 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 7 | #include "core/core_timing.h" |
| 8 | #include "core/core_timing_util.h" | ||
| 9 | #include "core/hid/hid_types.h" | 8 | #include "core/hid/hid_types.h" |
| 10 | #include "core/hle/kernel/k_event.h" | 9 | #include "core/hle/kernel/k_event.h" |
| 11 | #include "core/hle/kernel/k_readable_event.h" | 10 | #include "core/hle/kernel/k_readable_event.h" |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 5a5b2e305..0fe242e9d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -51,8 +51,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | |||
| 51 | stride, format, transform, crop_rect}; | 51 | stride, format, transform, crop_rect}; |
| 52 | 52 | ||
| 53 | system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); | 53 | system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); |
| 54 | system.GetPerfStats().EndSystemFrame(); | ||
| 55 | system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); | 54 | system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); |
| 55 | system.GetPerfStats().EndSystemFrame(); | ||
| 56 | system.GetPerfStats().BeginSystemFrame(); | 56 | system.GetPerfStats().BeginSystemFrame(); |
| 57 | } | 57 | } |
| 58 | 58 | ||
diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index da2d5890f..b41c6240c 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp | |||
| @@ -70,7 +70,8 @@ Nvnflinger::Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_ | |||
| 70 | [this](std::uintptr_t, s64 time, | 70 | [this](std::uintptr_t, s64 time, |
| 71 | std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> { | 71 | std::chrono::nanoseconds ns_late) -> std::optional<std::chrono::nanoseconds> { |
| 72 | vsync_signal.store(true); | 72 | vsync_signal.store(true); |
| 73 | vsync_signal.notify_all(); | 73 | { const auto lock_guard = Lock(); } |
| 74 | vsync_signal.notify_one(); | ||
| 74 | return std::chrono::nanoseconds(GetNextTicks()); | 75 | return std::chrono::nanoseconds(GetNextTicks()); |
| 75 | }); | 76 | }); |
| 76 | 77 | ||
diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h index e6293ffb9..9fc01ea90 100644 --- a/src/core/hle/service/time/clock_types.h +++ b/src/core/hle/service/time/clock_types.h | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <ratio> | ||
| 7 | |||
| 6 | #include "common/common_funcs.h" | 8 | #include "common/common_funcs.h" |
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 8 | #include "common/uuid.h" | 10 | #include "common/uuid.h" |
| @@ -74,18 +76,19 @@ static_assert(std::is_trivially_copyable_v<ContinuousAdjustmentTimePoint>, | |||
| 74 | /// https://switchbrew.org/wiki/Glue_services#TimeSpanType | 76 | /// https://switchbrew.org/wiki/Glue_services#TimeSpanType |
| 75 | struct TimeSpanType { | 77 | struct TimeSpanType { |
| 76 | s64 nanoseconds{}; | 78 | s64 nanoseconds{}; |
| 77 | static constexpr s64 ns_per_second{1000000000ULL}; | ||
| 78 | 79 | ||
| 79 | s64 ToSeconds() const { | 80 | s64 ToSeconds() const { |
| 80 | return nanoseconds / ns_per_second; | 81 | return nanoseconds / std::nano::den; |
| 81 | } | 82 | } |
| 82 | 83 | ||
| 83 | static TimeSpanType FromSeconds(s64 seconds) { | 84 | static TimeSpanType FromSeconds(s64 seconds) { |
| 84 | return {seconds * ns_per_second}; | 85 | return {seconds * std::nano::den}; |
| 85 | } | 86 | } |
| 86 | 87 | ||
| 87 | static TimeSpanType FromTicks(u64 ticks, u64 frequency) { | 88 | template <u64 Frequency> |
| 88 | return FromSeconds(static_cast<s64>(ticks) / static_cast<s64>(frequency)); | 89 | static TimeSpanType FromTicks(u64 ticks) { |
| 90 | using TicksToNSRatio = std::ratio<std::nano::den, Frequency>; | ||
| 91 | return {static_cast<s64>(ticks * TicksToNSRatio::num / TicksToNSRatio::den)}; | ||
| 89 | } | 92 | } |
| 90 | }; | 93 | }; |
| 91 | static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size"); | 94 | static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size"); |
diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 3dbbb9850..5627b7003 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp | |||
| @@ -10,7 +10,7 @@ namespace Service::Time::Clock { | |||
| 10 | 10 | ||
| 11 | TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { | 11 | TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { |
| 12 | const TimeSpanType ticks_time_span{ | 12 | const TimeSpanType ticks_time_span{ |
| 13 | TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; | 13 | TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())}; |
| 14 | TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; | 14 | TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; |
| 15 | 15 | ||
| 16 | if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { | 16 | if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { |
diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 27600413e..0d9fb3143 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp | |||
| @@ -10,7 +10,7 @@ namespace Service::Time::Clock { | |||
| 10 | 10 | ||
| 11 | SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { | 11 | SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { |
| 12 | const TimeSpanType ticks_time_span{ | 12 | const TimeSpanType ticks_time_span{ |
| 13 | TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; | 13 | TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>(system.CoreTiming().GetClockTicks())}; |
| 14 | 14 | ||
| 15 | return {ticks_time_span.ToSeconds(), GetClockSourceId()}; | 15 | return {ticks_time_span.ToSeconds(), GetClockSourceId()}; |
| 16 | } | 16 | } |
diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 868be60c5..7197ca30f 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp | |||
| @@ -240,8 +240,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(HLERequestCon | |||
| 240 | const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; | 240 | const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; |
| 241 | 241 | ||
| 242 | if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { | 242 | if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { |
| 243 | const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), | 243 | const auto ticks{Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>( |
| 244 | Core::Hardware::CNTFREQ)}; | 244 | system.CoreTiming().GetClockTicks())}; |
| 245 | const s64 base_time_point{context.offset + current_time_point.time_point - | 245 | const s64 base_time_point{context.offset + current_time_point.time_point - |
| 246 | ticks.ToSeconds()}; | 246 | ticks.ToSeconds()}; |
| 247 | IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; | 247 | IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; |
diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index ce1c85bcc..a00676669 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp | |||
| @@ -21,8 +21,9 @@ SharedMemory::~SharedMemory() = default; | |||
| 21 | 21 | ||
| 22 | void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id, | 22 | void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id, |
| 23 | Clock::TimeSpanType current_time_point) { | 23 | Clock::TimeSpanType current_time_point) { |
| 24 | const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( | 24 | const Clock::TimeSpanType ticks_time_span{ |
| 25 | system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; | 25 | Clock::TimeSpanType::FromTicks<Core::Hardware::CNTFREQ>( |
| 26 | system.CoreTiming().GetClockTicks())}; | ||
| 26 | const Clock::SteadyClockContext context{ | 27 | const Clock::SteadyClockContext context{ |
| 27 | static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), | 28 | static_cast<u64>(current_time_point.nanoseconds - ticks_time_span.nanoseconds), |
| 28 | clock_source_id}; | 29 | clock_source_id}; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 456f733cf..db385076d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -193,18 +193,13 @@ struct GPU::Impl { | |||
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | [[nodiscard]] u64 GetTicks() const { | 195 | [[nodiscard]] u64 GetTicks() const { |
| 196 | // This values were reversed engineered by fincs from NVN | 196 | u64 gpu_tick = system.CoreTiming().GetGPUTicks(); |
| 197 | // The gpu clock is reported in units of 385/625 nanoseconds | ||
| 198 | constexpr u64 gpu_ticks_num = 384; | ||
| 199 | constexpr u64 gpu_ticks_den = 625; | ||
| 200 | 197 | ||
| 201 | u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); | ||
| 202 | if (Settings::values.use_fast_gpu_time.GetValue()) { | 198 | if (Settings::values.use_fast_gpu_time.GetValue()) { |
| 203 | nanoseconds /= 256; | 199 | gpu_tick /= 256; |
| 204 | } | 200 | } |
| 205 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | 201 | |
| 206 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | 202 | return gpu_tick; |
| 207 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 208 | } | 203 | } |
| 209 | 204 | ||
| 210 | [[nodiscard]] bool IsAsync() const { | 205 | [[nodiscard]] bool IsAsync() const { |