diff options
Diffstat (limited to 'src/common/x64')
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 123 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 79 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 61 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 2 |
4 files changed, 159 insertions, 106 deletions
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index fbeacc7e2..f5296b32a 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -1,8 +1,11 @@ | |||
| 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project | 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Project Licensed under GPLv2 or any later version Refer to the license.txt file included. |
| 3 | // Refer to the license.txt file included. | ||
| 4 | 3 | ||
| 4 | #include <array> | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <iterator> | ||
| 7 | #include <string_view> | ||
| 8 | #include "common/bit_util.h" | ||
| 6 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 7 | #include "common/x64/cpu_detect.h" | 10 | #include "common/x64/cpu_detect.h" |
| 8 | 11 | ||
| @@ -17,7 +20,7 @@ | |||
| 17 | // clang-format on | 20 | // clang-format on |
| 18 | #endif | 21 | #endif |
| 19 | 22 | ||
| 20 | static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { | 23 | static inline void __cpuidex(int info[4], u32 function_id, u32 subfunction_id) { |
| 21 | #if defined(__DragonFly__) || defined(__FreeBSD__) | 24 | #if defined(__DragonFly__) || defined(__FreeBSD__) |
| 22 | // Despite the name, this is just do_cpuid() with ECX as second input. | 25 | // Despite the name, this is just do_cpuid() with ECX as second input. |
| 23 | cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); | 26 | cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info); |
| @@ -30,7 +33,7 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { | |||
| 30 | #endif | 33 | #endif |
| 31 | } | 34 | } |
| 32 | 35 | ||
| 33 | static inline void __cpuid(int info[4], int function_id) { | 36 | static inline void __cpuid(int info[4], u32 function_id) { |
| 34 | return __cpuidex(info, function_id, 0); | 37 | return __cpuidex(info, function_id, 0); |
| 35 | } | 38 | } |
| 36 | 39 | ||
| @@ -45,6 +48,17 @@ static inline u64 _xgetbv(u32 index) { | |||
| 45 | 48 | ||
| 46 | namespace Common { | 49 | namespace Common { |
| 47 | 50 | ||
| 51 | CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string) { | ||
| 52 | if (brand_string == "GenuineIntel") { | ||
| 53 | return Manufacturer::Intel; | ||
| 54 | } else if (brand_string == "AuthenticAMD") { | ||
| 55 | return Manufacturer::AMD; | ||
| 56 | } else if (brand_string == "HygonGenuine") { | ||
| 57 | return Manufacturer::Hygon; | ||
| 58 | } | ||
| 59 | return Manufacturer::Unknown; | ||
| 60 | } | ||
| 61 | |||
| 48 | // Detects the various CPU features | 62 | // Detects the various CPU features |
| 49 | static CPUCaps Detect() { | 63 | static CPUCaps Detect() { |
| 50 | CPUCaps caps = {}; | 64 | CPUCaps caps = {}; |
| @@ -53,75 +67,74 @@ static CPUCaps Detect() { | |||
| 53 | // yuzu at all anyway | 67 | // yuzu at all anyway |
| 54 | 68 | ||
| 55 | int cpu_id[4]; | 69 | int cpu_id[4]; |
| 56 | memset(caps.brand_string, 0, sizeof(caps.brand_string)); | ||
| 57 | 70 | ||
| 58 | // Detect CPU's CPUID capabilities and grab CPU string | 71 | // Detect CPU's CPUID capabilities and grab manufacturer string |
| 59 | __cpuid(cpu_id, 0x00000000); | 72 | __cpuid(cpu_id, 0x00000000); |
| 60 | u32 max_std_fn = cpu_id[0]; // EAX | 73 | const u32 max_std_fn = cpu_id[0]; // EAX |
| 61 | |||
| 62 | std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); | ||
| 63 | std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); | ||
| 64 | std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); | ||
| 65 | if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69) | ||
| 66 | caps.manufacturer = Manufacturer::Intel; | ||
| 67 | else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65) | ||
| 68 | caps.manufacturer = Manufacturer::AMD; | ||
| 69 | else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e) | ||
| 70 | caps.manufacturer = Manufacturer::Hygon; | ||
| 71 | else | ||
| 72 | caps.manufacturer = Manufacturer::Unknown; | ||
| 73 | 74 | ||
| 74 | __cpuid(cpu_id, 0x80000000); | 75 | std::memset(caps.brand_string, 0, std::size(caps.brand_string)); |
| 76 | std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(u32)); | ||
| 77 | std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(u32)); | ||
| 78 | std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(u32)); | ||
| 79 | |||
| 80 | caps.manufacturer = CPUCaps::ParseManufacturer(caps.brand_string); | ||
| 75 | 81 | ||
| 76 | u32 max_ex_fn = cpu_id[0]; | 82 | // Set reasonable default cpu string even if brand string not available |
| 83 | std::strncpy(caps.cpu_string, caps.brand_string, std::size(caps.brand_string)); | ||
| 77 | 84 | ||
| 78 | // Set reasonable default brand string even if brand string not available | 85 | __cpuid(cpu_id, 0x80000000); |
| 79 | strcpy(caps.cpu_string, caps.brand_string); | 86 | |
| 87 | const u32 max_ex_fn = cpu_id[0]; | ||
| 80 | 88 | ||
| 81 | // Detect family and other miscellaneous features | 89 | // Detect family and other miscellaneous features |
| 82 | if (max_std_fn >= 1) { | 90 | if (max_std_fn >= 1) { |
| 83 | __cpuid(cpu_id, 0x00000001); | 91 | __cpuid(cpu_id, 0x00000001); |
| 84 | if ((cpu_id[3] >> 25) & 1) | 92 | caps.sse = Common::Bit<25>(cpu_id[3]); |
| 85 | caps.sse = true; | 93 | caps.sse2 = Common::Bit<26>(cpu_id[3]); |
| 86 | if ((cpu_id[3] >> 26) & 1) | 94 | caps.sse3 = Common::Bit<0>(cpu_id[2]); |
| 87 | caps.sse2 = true; | 95 | caps.pclmulqdq = Common::Bit<1>(cpu_id[2]); |
| 88 | if ((cpu_id[2]) & 1) | 96 | caps.ssse3 = Common::Bit<9>(cpu_id[2]); |
| 89 | caps.sse3 = true; | 97 | caps.sse4_1 = Common::Bit<19>(cpu_id[2]); |
| 90 | if ((cpu_id[2] >> 9) & 1) | 98 | caps.sse4_2 = Common::Bit<20>(cpu_id[2]); |
| 91 | caps.ssse3 = true; | 99 | caps.movbe = Common::Bit<22>(cpu_id[2]); |
| 92 | if ((cpu_id[2] >> 19) & 1) | 100 | caps.popcnt = Common::Bit<23>(cpu_id[2]); |
| 93 | caps.sse4_1 = true; | 101 | caps.aes = Common::Bit<25>(cpu_id[2]); |
| 94 | if ((cpu_id[2] >> 20) & 1) | 102 | caps.f16c = Common::Bit<29>(cpu_id[2]); |
| 95 | caps.sse4_2 = true; | ||
| 96 | if ((cpu_id[2] >> 25) & 1) | ||
| 97 | caps.aes = true; | ||
| 98 | 103 | ||
| 99 | // AVX support requires 3 separate checks: | 104 | // AVX support requires 3 separate checks: |
| 100 | // - Is the AVX bit set in CPUID? | 105 | // - Is the AVX bit set in CPUID? |
| 101 | // - Is the XSAVE bit set in CPUID? | 106 | // - Is the XSAVE bit set in CPUID? |
| 102 | // - XGETBV result has the XCR bit set. | 107 | // - XGETBV result has the XCR bit set. |
| 103 | if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) { | 108 | if (Common::Bit<28>(cpu_id[2]) && Common::Bit<27>(cpu_id[2])) { |
| 104 | if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { | 109 | if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) { |
| 105 | caps.avx = true; | 110 | caps.avx = true; |
| 106 | if ((cpu_id[2] >> 12) & 1) | 111 | if (Common::Bit<12>(cpu_id[2])) |
| 107 | caps.fma = true; | 112 | caps.fma = true; |
| 108 | } | 113 | } |
| 109 | } | 114 | } |
| 110 | 115 | ||
| 111 | if (max_std_fn >= 7) { | 116 | if (max_std_fn >= 7) { |
| 112 | __cpuidex(cpu_id, 0x00000007, 0x00000000); | 117 | __cpuidex(cpu_id, 0x00000007, 0x00000000); |
| 113 | // Can't enable AVX2 unless the XSAVE/XGETBV checks above passed | 118 | // Can't enable AVX{2,512} unless the XSAVE/XGETBV checks above passed |
| 114 | if ((cpu_id[1] >> 5) & 1) | 119 | if (caps.avx) { |
| 115 | caps.avx2 = caps.avx; | 120 | caps.avx2 = Common::Bit<5>(cpu_id[1]); |
| 116 | if ((cpu_id[1] >> 3) & 1) | 121 | caps.avx512f = Common::Bit<16>(cpu_id[1]); |
| 117 | caps.bmi1 = true; | 122 | caps.avx512dq = Common::Bit<17>(cpu_id[1]); |
| 118 | if ((cpu_id[1] >> 8) & 1) | 123 | caps.avx512cd = Common::Bit<28>(cpu_id[1]); |
| 119 | caps.bmi2 = true; | 124 | caps.avx512bw = Common::Bit<30>(cpu_id[1]); |
| 120 | // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP) | 125 | caps.avx512vl = Common::Bit<31>(cpu_id[1]); |
| 121 | if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 && | 126 | caps.avx512vbmi = Common::Bit<1>(cpu_id[2]); |
| 122 | (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) { | 127 | caps.avx512bitalg = Common::Bit<12>(cpu_id[2]); |
| 123 | caps.avx512 = caps.avx2; | ||
| 124 | } | 128 | } |
| 129 | |||
| 130 | caps.bmi1 = Common::Bit<3>(cpu_id[1]); | ||
| 131 | caps.bmi2 = Common::Bit<8>(cpu_id[1]); | ||
| 132 | caps.sha = Common::Bit<29>(cpu_id[1]); | ||
| 133 | |||
| 134 | caps.gfni = Common::Bit<8>(cpu_id[2]); | ||
| 135 | |||
| 136 | __cpuidex(cpu_id, 0x00000007, 0x00000001); | ||
| 137 | caps.avx_vnni = caps.avx && Common::Bit<4>(cpu_id[0]); | ||
| 125 | } | 138 | } |
| 126 | } | 139 | } |
| 127 | 140 | ||
| @@ -138,15 +151,13 @@ static CPUCaps Detect() { | |||
| 138 | if (max_ex_fn >= 0x80000001) { | 151 | if (max_ex_fn >= 0x80000001) { |
| 139 | // Check for more features | 152 | // Check for more features |
| 140 | __cpuid(cpu_id, 0x80000001); | 153 | __cpuid(cpu_id, 0x80000001); |
| 141 | if ((cpu_id[2] >> 16) & 1) | 154 | caps.lzcnt = Common::Bit<5>(cpu_id[2]); |
| 142 | caps.fma4 = true; | 155 | caps.fma4 = Common::Bit<16>(cpu_id[2]); |
| 143 | } | 156 | } |
| 144 | 157 | ||
| 145 | if (max_ex_fn >= 0x80000007) { | 158 | if (max_ex_fn >= 0x80000007) { |
| 146 | __cpuid(cpu_id, 0x80000007); | 159 | __cpuid(cpu_id, 0x80000007); |
| 147 | if (cpu_id[3] & (1 << 8)) { | 160 | caps.invariant_tsc = Common::Bit<8>(cpu_id[3]); |
| 148 | caps.invariant_tsc = true; | ||
| 149 | } | ||
| 150 | } | 161 | } |
| 151 | 162 | ||
| 152 | if (max_std_fn >= 0x16) { | 163 | if (max_std_fn >= 0x16) { |
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index e3b63302e..40c48b132 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -1,42 +1,65 @@ | |||
| 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project | 1 | // Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project / 2022 Yuzu Emulator |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Project Project Licensed under GPLv2 or any later version Refer to the license.txt file included. |
| 3 | // Refer to the license.txt file included. | ||
| 4 | 3 | ||
| 5 | #pragma once | 4 | #pragma once |
| 6 | 5 | ||
| 7 | namespace Common { | 6 | #include <string_view> |
| 7 | #include "common/common_types.h" | ||
| 8 | 8 | ||
| 9 | enum class Manufacturer : u32 { | 9 | namespace Common { |
| 10 | Intel = 0, | ||
| 11 | AMD = 1, | ||
| 12 | Hygon = 2, | ||
| 13 | Unknown = 3, | ||
| 14 | }; | ||
| 15 | 10 | ||
| 16 | /// x86/x64 CPU capabilities that may be detected by this module | 11 | /// x86/x64 CPU capabilities that may be detected by this module |
| 17 | struct CPUCaps { | 12 | struct CPUCaps { |
| 13 | |||
| 14 | enum class Manufacturer : u8 { | ||
| 15 | Unknown = 0, | ||
| 16 | Intel = 1, | ||
| 17 | AMD = 2, | ||
| 18 | Hygon = 3, | ||
| 19 | }; | ||
| 20 | |||
| 21 | static Manufacturer ParseManufacturer(std::string_view brand_string); | ||
| 22 | |||
| 18 | Manufacturer manufacturer; | 23 | Manufacturer manufacturer; |
| 19 | char cpu_string[0x21]; | 24 | char brand_string[13]; |
| 20 | char brand_string[0x41]; | 25 | |
| 21 | bool sse; | 26 | char cpu_string[48]; |
| 22 | bool sse2; | 27 | |
| 23 | bool sse3; | ||
| 24 | bool ssse3; | ||
| 25 | bool sse4_1; | ||
| 26 | bool sse4_2; | ||
| 27 | bool lzcnt; | ||
| 28 | bool avx; | ||
| 29 | bool avx2; | ||
| 30 | bool avx512; | ||
| 31 | bool bmi1; | ||
| 32 | bool bmi2; | ||
| 33 | bool fma; | ||
| 34 | bool fma4; | ||
| 35 | bool aes; | ||
| 36 | bool invariant_tsc; | ||
| 37 | u32 base_frequency; | 28 | u32 base_frequency; |
| 38 | u32 max_frequency; | 29 | u32 max_frequency; |
| 39 | u32 bus_frequency; | 30 | u32 bus_frequency; |
| 31 | |||
| 32 | bool sse : 1; | ||
| 33 | bool sse2 : 1; | ||
| 34 | bool sse3 : 1; | ||
| 35 | bool ssse3 : 1; | ||
| 36 | bool sse4_1 : 1; | ||
| 37 | bool sse4_2 : 1; | ||
| 38 | |||
| 39 | bool avx : 1; | ||
| 40 | bool avx_vnni : 1; | ||
| 41 | bool avx2 : 1; | ||
| 42 | bool avx512f : 1; | ||
| 43 | bool avx512dq : 1; | ||
| 44 | bool avx512cd : 1; | ||
| 45 | bool avx512bw : 1; | ||
| 46 | bool avx512vl : 1; | ||
| 47 | bool avx512vbmi : 1; | ||
| 48 | bool avx512bitalg : 1; | ||
| 49 | |||
| 50 | bool aes : 1; | ||
| 51 | bool bmi1 : 1; | ||
| 52 | bool bmi2 : 1; | ||
| 53 | bool f16c : 1; | ||
| 54 | bool fma : 1; | ||
| 55 | bool fma4 : 1; | ||
| 56 | bool gfni : 1; | ||
| 57 | bool invariant_tsc : 1; | ||
| 58 | bool lzcnt : 1; | ||
| 59 | bool movbe : 1; | ||
| 60 | bool pclmulqdq : 1; | ||
| 61 | bool popcnt : 1; | ||
| 62 | bool sha : 1; | ||
| 40 | }; | 63 | }; |
| 41 | 64 | ||
| 42 | /** | 65 | /** |
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 91b842829..7fd9d22f8 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp | |||
| @@ -4,33 +4,55 @@ | |||
| 4 | 4 | ||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <limits> | ||
| 8 | #include <mutex> | ||
| 9 | #include <thread> | 7 | #include <thread> |
| 10 | 8 | ||
| 11 | #include "common/atomic_ops.h" | 9 | #include "common/atomic_ops.h" |
| 12 | #include "common/uint128.h" | 10 | #include "common/uint128.h" |
| 13 | #include "common/x64/native_clock.h" | 11 | #include "common/x64/native_clock.h" |
| 14 | 12 | ||
| 13 | #ifdef _MSC_VER | ||
| 14 | #include <intrin.h> | ||
| 15 | #endif | ||
| 16 | |||
| 15 | namespace Common { | 17 | namespace Common { |
| 16 | 18 | ||
| 19 | #ifdef _MSC_VER | ||
| 20 | __forceinline static u64 FencedRDTSC() { | ||
| 21 | _mm_lfence(); | ||
| 22 | _ReadWriteBarrier(); | ||
| 23 | const u64 result = __rdtsc(); | ||
| 24 | _mm_lfence(); | ||
| 25 | _ReadWriteBarrier(); | ||
| 26 | return result; | ||
| 27 | } | ||
| 28 | #else | ||
| 29 | static u64 FencedRDTSC() { | ||
| 30 | u64 result; | ||
| 31 | asm volatile("lfence\n\t" | ||
| 32 | "rdtsc\n\t" | ||
| 33 | "shl $32, %%rdx\n\t" | ||
| 34 | "or %%rdx, %0\n\t" | ||
| 35 | "lfence" | ||
| 36 | : "=a"(result) | ||
| 37 | : | ||
| 38 | : "rdx", "memory", "cc"); | ||
| 39 | return result; | ||
| 40 | } | ||
| 41 | #endif | ||
| 42 | |||
| 17 | u64 EstimateRDTSCFrequency() { | 43 | u64 EstimateRDTSCFrequency() { |
| 18 | // Discard the first result measuring the rdtsc. | 44 | // Discard the first result measuring the rdtsc. |
| 19 | _mm_mfence(); | 45 | FencedRDTSC(); |
| 20 | __rdtsc(); | ||
| 21 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | 46 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); |
| 22 | _mm_mfence(); | 47 | FencedRDTSC(); |
| 23 | __rdtsc(); | ||
| 24 | 48 | ||
| 25 | // Get the current time. | 49 | // Get the current time. |
| 26 | const auto start_time = std::chrono::steady_clock::now(); | 50 | const auto start_time = std::chrono::steady_clock::now(); |
| 27 | _mm_mfence(); | 51 | const u64 tsc_start = FencedRDTSC(); |
| 28 | const u64 tsc_start = __rdtsc(); | ||
| 29 | // Wait for 200 milliseconds. | 52 | // Wait for 200 milliseconds. |
| 30 | std::this_thread::sleep_for(std::chrono::milliseconds{200}); | 53 | std::this_thread::sleep_for(std::chrono::milliseconds{200}); |
| 31 | const auto end_time = std::chrono::steady_clock::now(); | 54 | const auto end_time = std::chrono::steady_clock::now(); |
| 32 | _mm_mfence(); | 55 | const u64 tsc_end = FencedRDTSC(); |
| 33 | const u64 tsc_end = __rdtsc(); | ||
| 34 | // Calculate differences. | 56 | // Calculate differences. |
| 35 | const u64 timer_diff = static_cast<u64>( | 57 | const u64 timer_diff = static_cast<u64>( |
| 36 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | 58 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); |
| @@ -44,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen | |||
| 44 | u64 rtsc_frequency_) | 66 | u64 rtsc_frequency_) |
| 45 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ | 67 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ |
| 46 | rtsc_frequency_} { | 68 | rtsc_frequency_} { |
| 47 | _mm_mfence(); | 69 | time_point.inner.last_measure = FencedRDTSC(); |
| 48 | time_point.inner.last_measure = __rdtsc(); | ||
| 49 | time_point.inner.accumulated_ticks = 0U; | 70 | time_point.inner.accumulated_ticks = 0U; |
| 50 | ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); | 71 | ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); |
| 51 | us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); | 72 | us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); |
| @@ -57,10 +78,10 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen | |||
| 57 | u64 NativeClock::GetRTSC() { | 78 | u64 NativeClock::GetRTSC() { |
| 58 | TimePoint new_time_point{}; | 79 | TimePoint new_time_point{}; |
| 59 | TimePoint current_time_point{}; | 80 | TimePoint current_time_point{}; |
| 81 | |||
| 82 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 60 | do { | 83 | do { |
| 61 | current_time_point.pack = time_point.pack; | 84 | const u64 current_measure = FencedRDTSC(); |
| 62 | _mm_mfence(); | ||
| 63 | const u64 current_measure = __rdtsc(); | ||
| 64 | u64 diff = current_measure - current_time_point.inner.last_measure; | 85 | u64 diff = current_measure - current_time_point.inner.last_measure; |
| 65 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) | 86 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) |
| 66 | new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure | 87 | new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure |
| @@ -68,7 +89,7 @@ u64 NativeClock::GetRTSC() { | |||
| 68 | : current_time_point.inner.last_measure; | 89 | : current_time_point.inner.last_measure; |
| 69 | new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; | 90 | new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; |
| 70 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | 91 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, |
| 71 | current_time_point.pack)); | 92 | current_time_point.pack, current_time_point.pack)); |
| 72 | /// The clock cannot be more precise than the guest timer, remove the lower bits | 93 | /// The clock cannot be more precise than the guest timer, remove the lower bits |
| 73 | return new_time_point.inner.accumulated_ticks & inaccuracy_mask; | 94 | return new_time_point.inner.accumulated_ticks & inaccuracy_mask; |
| 74 | } | 95 | } |
| @@ -77,13 +98,13 @@ void NativeClock::Pause(bool is_paused) { | |||
| 77 | if (!is_paused) { | 98 | if (!is_paused) { |
| 78 | TimePoint current_time_point{}; | 99 | TimePoint current_time_point{}; |
| 79 | TimePoint new_time_point{}; | 100 | TimePoint new_time_point{}; |
| 101 | |||
| 102 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 80 | do { | 103 | do { |
| 81 | current_time_point.pack = time_point.pack; | ||
| 82 | new_time_point.pack = current_time_point.pack; | 104 | new_time_point.pack = current_time_point.pack; |
| 83 | _mm_mfence(); | 105 | new_time_point.inner.last_measure = FencedRDTSC(); |
| 84 | new_time_point.inner.last_measure = __rdtsc(); | ||
| 85 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | 106 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, |
| 86 | current_time_point.pack)); | 107 | current_time_point.pack, current_time_point.pack)); |
| 87 | } | 108 | } |
| 88 | } | 109 | } |
| 89 | 110 | ||
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 7cbd400d2..2c3082ea0 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h | |||
| @@ -4,8 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include "common/wall_clock.h" | 7 | #include "common/wall_clock.h" |
| 10 | 8 | ||
| 11 | namespace Common { | 9 | namespace Common { |