diff options
| author | 2023-04-22 22:55:03 -0400 | |
|---|---|---|
| committer | 2023-06-07 21:44:42 -0400 | |
| commit | dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b (patch) | |
| tree | 82538e7a48fa6fb6a9a5d51aef925586195c21ed | |
| parent | Merge pull request #10650 from qurious-pixel/android_tv (diff) | |
| download | yuzu-dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b.tar.gz yuzu-dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b.tar.xz yuzu-dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b.zip | |
x64: Deduplicate RDTSC usage
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 3 | ||||
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 20 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.cpp | 39 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.h | 37 |
5 files changed, 82 insertions, 19 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) | |||
| 172 | x64/cpu_wait.h | 172 | x64/cpu_wait.h |
| 173 | x64/native_clock.cpp | 173 | x64/native_clock.cpp |
| 174 | x64/native_clock.h | 174 | x64/native_clock.h |
| 175 | x64/rdtsc.cpp | ||
| 176 | x64/rdtsc.h | ||
| 175 | x64/xbyak_abi.h | 177 | x64/xbyak_abi.h |
| 176 | x64/xbyak_util.h | 178 | x64/xbyak_util.h |
| 177 | ) | 179 | ) |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "common/x64/cpu_detect.h" | 16 | #include "common/x64/cpu_detect.h" |
| 17 | #include "common/x64/rdtsc.h" | ||
| 17 | 18 | ||
| 18 | #ifdef _WIN32 | 19 | #ifdef _WIN32 |
| 19 | #include <windows.h> | 20 | #include <windows.h> |
| @@ -187,6 +188,8 @@ static CPUCaps Detect() { | |||
| 187 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * | 188 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * |
| 188 | caps.tsc_crystal_ratio_numerator / | 189 | caps.tsc_crystal_ratio_numerator / |
| 189 | caps.tsc_crystal_ratio_denominator; | 190 | caps.tsc_crystal_ratio_denominator; |
| 191 | } else { | ||
| 192 | caps.tsc_frequency = X64::EstimateRDTSCFrequency(); | ||
| 190 | } | 193 | } |
| 191 | } | 194 | } |
| 192 | 195 | ||
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -9,19 +9,11 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/x64/cpu_detect.h" | 10 | #include "common/x64/cpu_detect.h" |
| 11 | #include "common/x64/cpu_wait.h" | 11 | #include "common/x64/cpu_wait.h" |
| 12 | #include "common/x64/rdtsc.h" | ||
| 12 | 13 | ||
| 13 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 14 | 15 | ||
| 15 | #ifdef _MSC_VER | 16 | #ifdef _MSC_VER |
| 16 | __forceinline static u64 FencedRDTSC() { | ||
| 17 | _mm_lfence(); | ||
| 18 | _ReadWriteBarrier(); | ||
| 19 | const u64 result = __rdtsc(); | ||
| 20 | _mm_lfence(); | ||
| 21 | _ReadWriteBarrier(); | ||
| 22 | return result; | ||
| 23 | } | ||
| 24 | |||
| 25 | __forceinline static void TPAUSE() { | 17 | __forceinline static void TPAUSE() { |
| 26 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. |
| 27 | // For reference: | 19 | // For reference: |
| @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { | |||
| 32 | _tpause(0, FencedRDTSC() + PauseCycles); | 24 | _tpause(0, FencedRDTSC() + PauseCycles); |
| 33 | } | 25 | } |
| 34 | #else | 26 | #else |
| 35 | static u64 FencedRDTSC() { | ||
| 36 | u64 eax; | ||
| 37 | u64 edx; | ||
| 38 | asm volatile("lfence\n\t" | ||
| 39 | "rdtsc\n\t" | ||
| 40 | "lfence\n\t" | ||
| 41 | : "=a"(eax), "=d"(edx)); | ||
| 42 | return (edx << 32) | eax; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void TPAUSE() { | 27 | static void TPAUSE() { |
| 46 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 28 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. |
| 47 | // For reference: | 29 | // For reference: |
diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <thread> | ||
| 5 | |||
| 6 | #include "common/steady_clock.h" | ||
| 7 | #include "common/uint128.h" | ||
| 8 | #include "common/x64/rdtsc.h" | ||
| 9 | |||
| 10 | namespace Common::X64 { | ||
| 11 | |||
| 12 | template <u64 Nearest> | ||
| 13 | static u64 RoundToNearest(u64 value) { | ||
| 14 | const auto mod = value % Nearest; | ||
| 15 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 16 | } | ||
| 17 | |||
| 18 | u64 EstimateRDTSCFrequency() { | ||
| 19 | // Discard the first result measuring the rdtsc. | ||
| 20 | FencedRDTSC(); | ||
| 21 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 22 | FencedRDTSC(); | ||
| 23 | |||
| 24 | // Get the current time. | ||
| 25 | const auto start_time = RealTimeClock::Now(); | ||
| 26 | const u64 tsc_start = FencedRDTSC(); | ||
| 27 | // Wait for 100 milliseconds. | ||
| 28 | std::this_thread::sleep_for(std::chrono::milliseconds{100}); | ||
| 29 | const auto end_time = RealTimeClock::Now(); | ||
| 30 | const u64 tsc_end = FencedRDTSC(); | ||
| 31 | // Calculate differences. | ||
| 32 | const u64 timer_diff = static_cast<u64>( | ||
| 33 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 34 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 35 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 36 | return RoundToNearest<100'000>(tsc_freq); | ||
| 37 | } | ||
| 38 | |||
| 39 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #ifdef _MSC_VER | ||
| 7 | #include <intrin.h> | ||
| 8 | #endif | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | #ifdef _MSC_VER | ||
| 15 | __forceinline static u64 FencedRDTSC() { | ||
| 16 | _mm_lfence(); | ||
| 17 | _ReadWriteBarrier(); | ||
| 18 | const u64 result = __rdtsc(); | ||
| 19 | _mm_lfence(); | ||
| 20 | _ReadWriteBarrier(); | ||
| 21 | return result; | ||
| 22 | } | ||
| 23 | #else | ||
| 24 | static inline u64 FencedRDTSC() { | ||
| 25 | u64 eax; | ||
| 26 | u64 edx; | ||
| 27 | asm volatile("lfence\n\t" | ||
| 28 | "rdtsc\n\t" | ||
| 29 | "lfence\n\t" | ||
| 30 | : "=a"(eax), "=d"(edx)); | ||
| 31 | return (edx << 32) | eax; | ||
| 32 | } | ||
| 33 | #endif | ||
| 34 | |||
| 35 | u64 EstimateRDTSCFrequency(); | ||
| 36 | |||
| 37 | } // namespace Common::X64 | ||