diff options
Diffstat (limited to 'src/common')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/fs/fs.cpp | 14 | ||||
| -rw-r--r-- | src/common/fs/fs_types.h | 2 | ||||
| -rw-r--r-- | src/common/input.h | 45 | ||||
| -rw-r--r-- | src/common/ring_buffer.h | 3 | ||||
| -rw-r--r-- | src/common/scratch_buffer.h | 9 | ||||
| -rw-r--r-- | src/common/settings.cpp | 19 | ||||
| -rw-r--r-- | src/common/settings.h | 12 | ||||
| -rw-r--r-- | src/common/steady_clock.cpp | 5 | ||||
| -rw-r--r-- | src/common/telemetry.cpp | 1 | ||||
| -rw-r--r-- | src/common/thread.h | 2 | ||||
| -rw-r--r-- | src/common/wall_clock.cpp | 77 | ||||
| -rw-r--r-- | src/common/wall_clock.h | 89 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.cpp | 4 | ||||
| -rw-r--r-- | src/common/x64/cpu_detect.h | 1 | ||||
| -rw-r--r-- | src/common/x64/cpu_wait.cpp | 70 | ||||
| -rw-r--r-- | src/common/x64/native_clock.cpp | 166 | ||||
| -rw-r--r-- | src/common/x64/native_clock.h | 59 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.cpp | 39 | ||||
| -rw-r--r-- | src/common/x64/rdtsc.h | 37 |
20 files changed, 353 insertions, 303 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) | |||
| 172 | x64/cpu_wait.h | 172 | x64/cpu_wait.h |
| 173 | x64/native_clock.cpp | 173 | x64/native_clock.cpp |
| 174 | x64/native_clock.h | 174 | x64/native_clock.h |
| 175 | x64/rdtsc.cpp | ||
| 176 | x64/rdtsc.h | ||
| 175 | x64/xbyak_abi.h | 177 | x64/xbyak_abi.h |
| 176 | x64/xbyak_util.h | 178 | x64/xbyak_util.h |
| 177 | ) | 179 | ) |
diff --git a/src/common/fs/fs.cpp b/src/common/fs/fs.cpp index 6d66c926d..36e67c145 100644 --- a/src/common/fs/fs.cpp +++ b/src/common/fs/fs.cpp | |||
| @@ -436,7 +436,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable | |||
| 436 | 436 | ||
| 437 | if (True(filter & DirEntryFilter::File) && | 437 | if (True(filter & DirEntryFilter::File) && |
| 438 | entry.status().type() == fs::file_type::regular) { | 438 | entry.status().type() == fs::file_type::regular) { |
| 439 | if (!callback(entry.path())) { | 439 | if (!callback(entry)) { |
| 440 | callback_error = true; | 440 | callback_error = true; |
| 441 | break; | 441 | break; |
| 442 | } | 442 | } |
| @@ -444,7 +444,7 @@ void IterateDirEntries(const std::filesystem::path& path, const DirEntryCallable | |||
| 444 | 444 | ||
| 445 | if (True(filter & DirEntryFilter::Directory) && | 445 | if (True(filter & DirEntryFilter::Directory) && |
| 446 | entry.status().type() == fs::file_type::directory) { | 446 | entry.status().type() == fs::file_type::directory) { |
| 447 | if (!callback(entry.path())) { | 447 | if (!callback(entry)) { |
| 448 | callback_error = true; | 448 | callback_error = true; |
| 449 | break; | 449 | break; |
| 450 | } | 450 | } |
| @@ -493,7 +493,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, | |||
| 493 | 493 | ||
| 494 | if (True(filter & DirEntryFilter::File) && | 494 | if (True(filter & DirEntryFilter::File) && |
| 495 | entry.status().type() == fs::file_type::regular) { | 495 | entry.status().type() == fs::file_type::regular) { |
| 496 | if (!callback(entry.path())) { | 496 | if (!callback(entry)) { |
| 497 | callback_error = true; | 497 | callback_error = true; |
| 498 | break; | 498 | break; |
| 499 | } | 499 | } |
| @@ -501,7 +501,7 @@ void IterateDirEntriesRecursively(const std::filesystem::path& path, | |||
| 501 | 501 | ||
| 502 | if (True(filter & DirEntryFilter::Directory) && | 502 | if (True(filter & DirEntryFilter::Directory) && |
| 503 | entry.status().type() == fs::file_type::directory) { | 503 | entry.status().type() == fs::file_type::directory) { |
| 504 | if (!callback(entry.path())) { | 504 | if (!callback(entry)) { |
| 505 | callback_error = true; | 505 | callback_error = true; |
| 506 | break; | 506 | break; |
| 507 | } | 507 | } |
| @@ -605,6 +605,12 @@ fs::file_type GetEntryType(const fs::path& path) { | |||
| 605 | } | 605 | } |
| 606 | 606 | ||
| 607 | u64 GetSize(const fs::path& path) { | 607 | u64 GetSize(const fs::path& path) { |
| 608 | #ifdef ANDROID | ||
| 609 | if (Android::IsContentUri(path)) { | ||
| 610 | return Android::GetSize(path); | ||
| 611 | } | ||
| 612 | #endif | ||
| 613 | |||
| 608 | std::error_code ec; | 614 | std::error_code ec; |
| 609 | 615 | ||
| 610 | const auto file_size = fs::file_size(path, ec); | 616 | const auto file_size = fs::file_size(path, ec); |
diff --git a/src/common/fs/fs_types.h b/src/common/fs/fs_types.h index 5a4090c19..900f85d24 100644 --- a/src/common/fs/fs_types.h +++ b/src/common/fs/fs_types.h | |||
| @@ -66,6 +66,6 @@ DECLARE_ENUM_FLAG_OPERATORS(DirEntryFilter); | |||
| 66 | * @returns A boolean value. | 66 | * @returns A boolean value. |
| 67 | * Return true to indicate whether the callback is successful, false otherwise. | 67 | * Return true to indicate whether the callback is successful, false otherwise. |
| 68 | */ | 68 | */ |
| 69 | using DirEntryCallable = std::function<bool(const std::filesystem::path& path)>; | 69 | using DirEntryCallable = std::function<bool(const std::filesystem::directory_entry& entry)>; |
| 70 | 70 | ||
| 71 | } // namespace Common::FS | 71 | } // namespace Common::FS |
diff --git a/src/common/input.h b/src/common/input.h index 66fb15f0a..2c4ccea22 100644 --- a/src/common/input.h +++ b/src/common/input.h | |||
| @@ -75,8 +75,10 @@ enum class DriverResult { | |||
| 75 | ErrorWritingData, | 75 | ErrorWritingData, |
| 76 | NoDeviceDetected, | 76 | NoDeviceDetected, |
| 77 | InvalidHandle, | 77 | InvalidHandle, |
| 78 | InvalidParameters, | ||
| 78 | NotSupported, | 79 | NotSupported, |
| 79 | Disabled, | 80 | Disabled, |
| 81 | Delayed, | ||
| 80 | Unknown, | 82 | Unknown, |
| 81 | }; | 83 | }; |
| 82 | 84 | ||
| @@ -86,7 +88,7 @@ enum class NfcState { | |||
| 86 | NewAmiibo, | 88 | NewAmiibo, |
| 87 | WaitingForAmiibo, | 89 | WaitingForAmiibo, |
| 88 | AmiiboRemoved, | 90 | AmiiboRemoved, |
| 89 | NotAnAmiibo, | 91 | InvalidTagType, |
| 90 | NotSupported, | 92 | NotSupported, |
| 91 | WrongDeviceState, | 93 | WrongDeviceState, |
| 92 | WriteFailed, | 94 | WriteFailed, |
| @@ -218,8 +220,22 @@ struct CameraStatus { | |||
| 218 | }; | 220 | }; |
| 219 | 221 | ||
| 220 | struct NfcStatus { | 222 | struct NfcStatus { |
| 221 | NfcState state{}; | 223 | NfcState state{NfcState::Unknown}; |
| 222 | std::vector<u8> data{}; | 224 | u8 uuid_length; |
| 225 | u8 protocol; | ||
| 226 | u8 tag_type; | ||
| 227 | std::array<u8, 10> uuid; | ||
| 228 | }; | ||
| 229 | |||
| 230 | struct MifareData { | ||
| 231 | u8 command; | ||
| 232 | u8 sector; | ||
| 233 | std::array<u8, 0x6> key; | ||
| 234 | std::array<u8, 0x10> data; | ||
| 235 | }; | ||
| 236 | |||
| 237 | struct MifareRequest { | ||
| 238 | std::array<MifareData, 0x10> data; | ||
| 223 | }; | 239 | }; |
| 224 | 240 | ||
| 225 | // List of buttons to be passed to Qt that can be translated | 241 | // List of buttons to be passed to Qt that can be translated |
| @@ -294,7 +310,7 @@ struct CallbackStatus { | |||
| 294 | BatteryStatus battery_status{}; | 310 | BatteryStatus battery_status{}; |
| 295 | VibrationStatus vibration_status{}; | 311 | VibrationStatus vibration_status{}; |
| 296 | CameraFormat camera_status{CameraFormat::None}; | 312 | CameraFormat camera_status{CameraFormat::None}; |
| 297 | NfcState nfc_status{NfcState::Unknown}; | 313 | NfcStatus nfc_status{}; |
| 298 | std::vector<u8> raw_data{}; | 314 | std::vector<u8> raw_data{}; |
| 299 | }; | 315 | }; |
| 300 | 316 | ||
| @@ -356,9 +372,30 @@ public: | |||
| 356 | return NfcState::NotSupported; | 372 | return NfcState::NotSupported; |
| 357 | } | 373 | } |
| 358 | 374 | ||
| 375 | virtual NfcState StartNfcPolling() { | ||
| 376 | return NfcState::NotSupported; | ||
| 377 | } | ||
| 378 | |||
| 379 | virtual NfcState StopNfcPolling() { | ||
| 380 | return NfcState::NotSupported; | ||
| 381 | } | ||
| 382 | |||
| 383 | virtual NfcState ReadAmiiboData([[maybe_unused]] std::vector<u8>& out_data) { | ||
| 384 | return NfcState::NotSupported; | ||
| 385 | } | ||
| 386 | |||
| 359 | virtual NfcState WriteNfcData([[maybe_unused]] const std::vector<u8>& data) { | 387 | virtual NfcState WriteNfcData([[maybe_unused]] const std::vector<u8>& data) { |
| 360 | return NfcState::NotSupported; | 388 | return NfcState::NotSupported; |
| 361 | } | 389 | } |
| 390 | |||
| 391 | virtual NfcState ReadMifareData([[maybe_unused]] const MifareRequest& request, | ||
| 392 | [[maybe_unused]] MifareRequest& out_data) { | ||
| 393 | return NfcState::NotSupported; | ||
| 394 | } | ||
| 395 | |||
| 396 | virtual NfcState WriteMifareData([[maybe_unused]] const MifareRequest& request) { | ||
| 397 | return NfcState::NotSupported; | ||
| 398 | } | ||
| 362 | }; | 399 | }; |
| 363 | 400 | ||
| 364 | /// An abstract class template for a factory that can create input devices. | 401 | /// An abstract class template for a factory that can create input devices. |
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h index 4c328ab44..416680d44 100644 --- a/src/common/ring_buffer.h +++ b/src/common/ring_buffer.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <cstring> | 10 | #include <cstring> |
| 11 | #include <new> | 11 | #include <new> |
| 12 | #include <span> | ||
| 12 | #include <type_traits> | 13 | #include <type_traits> |
| 13 | #include <vector> | 14 | #include <vector> |
| 14 | 15 | ||
| @@ -53,7 +54,7 @@ public: | |||
| 53 | return push_count; | 54 | return push_count; |
| 54 | } | 55 | } |
| 55 | 56 | ||
| 56 | std::size_t Push(const std::vector<T>& input) { | 57 | std::size_t Push(const std::span<T> input) { |
| 57 | return Push(input.data(), input.size()); | 58 | return Push(input.data(), input.size()); |
| 58 | } | 59 | } |
| 59 | 60 | ||
diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index a69a5a7af..6fe907953 100644 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h | |||
| @@ -3,6 +3,9 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <iterator> | ||
| 7 | |||
| 8 | #include "common/concepts.h" | ||
| 6 | #include "common/make_unique_for_overwrite.h" | 9 | #include "common/make_unique_for_overwrite.h" |
| 7 | 10 | ||
| 8 | namespace Common { | 11 | namespace Common { |
| @@ -16,6 +19,12 @@ namespace Common { | |||
| 16 | template <typename T> | 19 | template <typename T> |
| 17 | class ScratchBuffer { | 20 | class ScratchBuffer { |
| 18 | public: | 21 | public: |
| 22 | using iterator = T*; | ||
| 23 | using const_iterator = const T*; | ||
| 24 | using value_type = T; | ||
| 25 | using element_type = T; | ||
| 26 | using iterator_category = std::contiguous_iterator_tag; | ||
| 27 | |||
| 19 | ScratchBuffer() = default; | 28 | ScratchBuffer() = default; |
| 20 | 29 | ||
| 21 | explicit ScratchBuffer(size_t initial_capacity) | 30 | explicit ScratchBuffer(size_t initial_capacity) |
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 66dffc9bf..6cbbea1b2 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -1,8 +1,11 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <version> | ||
| 4 | #if __cpp_lib_chrono >= 201907L | 5 | #if __cpp_lib_chrono >= 201907L |
| 5 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <exception> | ||
| 8 | #include <stdexcept> | ||
| 6 | #endif | 9 | #endif |
| 7 | #include <string_view> | 10 | #include <string_view> |
| 8 | 11 | ||
| @@ -25,9 +28,19 @@ std::string GetTimeZoneString() { | |||
| 25 | if (time_zone_index == 0) { // Auto | 28 | if (time_zone_index == 0) { // Auto |
| 26 | #if __cpp_lib_chrono >= 201907L | 29 | #if __cpp_lib_chrono >= 201907L |
| 27 | const struct std::chrono::tzdb& time_zone_data = std::chrono::get_tzdb(); | 30 | const struct std::chrono::tzdb& time_zone_data = std::chrono::get_tzdb(); |
| 28 | const std::chrono::time_zone* current_zone = time_zone_data.current_zone(); | 31 | try { |
| 29 | std::string_view current_zone_name = current_zone->name(); | 32 | const std::chrono::time_zone* current_zone = time_zone_data.current_zone(); |
| 30 | location_name = current_zone_name; | 33 | std::string_view current_zone_name = current_zone->name(); |
| 34 | location_name = current_zone_name; | ||
| 35 | } catch (std::runtime_error& runtime_error) { | ||
| 36 | // VCRUNTIME will throw a runtime_error if the operating system's selected time zone | ||
| 37 | // cannot be found | ||
| 38 | location_name = Common::TimeZone::FindSystemTimeZone(); | ||
| 39 | LOG_WARNING(Common, | ||
| 40 | "Error occurred when trying to determine system time zone:\n{}\nFalling " | ||
| 41 | "back to hour offset \"{}\"", | ||
| 42 | runtime_error.what(), location_name); | ||
| 43 | } | ||
| 31 | #else | 44 | #else |
| 32 | location_name = Common::TimeZone::FindSystemTimeZone(); | 45 | location_name = Common::TimeZone::FindSystemTimeZone(); |
| 33 | #endif | 46 | #endif |
diff --git a/src/common/settings.h b/src/common/settings.h index 9682281b0..ae5ed93d8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -483,6 +483,7 @@ struct Values { | |||
| 483 | AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, | 483 | AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3, |
| 484 | "astc_recompression"}; | 484 | "astc_recompression"}; |
| 485 | SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; | 485 | SwitchableSetting<bool> use_video_framerate{false, "use_video_framerate"}; |
| 486 | SwitchableSetting<bool> barrier_feedback_loops{true, "barrier_feedback_loops"}; | ||
| 486 | 487 | ||
| 487 | SwitchableSetting<u8> bg_red{0, "bg_red"}; | 488 | SwitchableSetting<u8> bg_red{0, "bg_red"}; |
| 488 | SwitchableSetting<u8> bg_green{0, "bg_green"}; | 489 | SwitchableSetting<u8> bg_green{0, "bg_green"}; |
| @@ -524,9 +525,16 @@ struct Values { | |||
| 524 | Setting<bool> tas_loop{false, "tas_loop"}; | 525 | Setting<bool> tas_loop{false, "tas_loop"}; |
| 525 | 526 | ||
| 526 | Setting<bool> mouse_panning{false, "mouse_panning"}; | 527 | Setting<bool> mouse_panning{false, "mouse_panning"}; |
| 527 | Setting<u8, true> mouse_panning_sensitivity{50, 1, 100, "mouse_panning_sensitivity"}; | 528 | Setting<u8, true> mouse_panning_x_sensitivity{50, 1, 100, "mouse_panning_x_sensitivity"}; |
| 528 | Setting<bool> mouse_enabled{false, "mouse_enabled"}; | 529 | Setting<u8, true> mouse_panning_y_sensitivity{50, 1, 100, "mouse_panning_y_sensitivity"}; |
| 530 | Setting<u8, true> mouse_panning_deadzone_x_counterweight{ | ||
| 531 | 0, 0, 100, "mouse_panning_deadzone_x_counterweight"}; | ||
| 532 | Setting<u8, true> mouse_panning_deadzone_y_counterweight{ | ||
| 533 | 0, 0, 100, "mouse_panning_deadzone_y_counterweight"}; | ||
| 534 | Setting<u8, true> mouse_panning_decay_strength{22, 0, 100, "mouse_panning_decay_strength"}; | ||
| 535 | Setting<u8, true> mouse_panning_min_decay{5, 0, 100, "mouse_panning_min_decay"}; | ||
| 529 | 536 | ||
| 537 | Setting<bool> mouse_enabled{false, "mouse_enabled"}; | ||
| 530 | Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"}; | 538 | Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"}; |
| 531 | Setting<bool> keyboard_enabled{false, "keyboard_enabled"}; | 539 | Setting<bool> keyboard_enabled{false, "keyboard_enabled"}; |
| 532 | 540 | ||
diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp | |||
| @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { | |||
| 28 | // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. | 28 | // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. |
| 29 | static constexpr s64 Multiplier = 100; | 29 | static constexpr s64 Multiplier = 100; |
| 30 | // Convert Windows epoch to Unix epoch. | 30 | // Convert Windows epoch to Unix epoch. |
| 31 | static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; | 31 | static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; |
| 32 | 32 | ||
| 33 | FILETIME filetime; | 33 | FILETIME filetime; |
| 34 | GetSystemTimePreciseAsFileTime(&filetime); | 34 | GetSystemTimePreciseAsFileTime(&filetime); |
| 35 | return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + | 35 | return Multiplier * ((static_cast<s64>(filetime.dwHighDateTime) << 32) + |
| 36 | static_cast<s64>(filetime.dwLowDateTime)) - | 36 | static_cast<s64>(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); |
| 37 | WindowsEpochToUnixEpochNS; | ||
| 38 | } | 37 | } |
| 39 | #endif | 38 | #endif |
| 40 | 39 | ||
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index 91352912d..929ed67e4 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp | |||
| @@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) { | |||
| 93 | add_field("CPU_Extension_x64_GFNI", caps.gfni); | 93 | add_field("CPU_Extension_x64_GFNI", caps.gfni); |
| 94 | add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); | 94 | add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); |
| 95 | add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); | 95 | add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); |
| 96 | add_field("CPU_Extension_x64_MONITORX", caps.monitorx); | ||
| 96 | add_field("CPU_Extension_x64_MOVBE", caps.movbe); | 97 | add_field("CPU_Extension_x64_MOVBE", caps.movbe); |
| 97 | add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); | 98 | add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); |
| 98 | add_field("CPU_Extension_x64_POPCNT", caps.popcnt); | 99 | add_field("CPU_Extension_x64_POPCNT", caps.popcnt); |
diff --git a/src/common/thread.h b/src/common/thread.h index 8ae169b4e..c6976fb6c 100644 --- a/src/common/thread.h +++ b/src/common/thread.h | |||
| @@ -55,7 +55,7 @@ public: | |||
| 55 | is_set = false; | 55 | is_set = false; |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | [[nodiscard]] bool IsSet() { | 58 | [[nodiscard]] bool IsSet() const { |
| 59 | return is_set; | 59 | return is_set; |
| 60 | } | 60 | } |
| 61 | 61 | ||
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp | |||
| @@ -2,88 +2,75 @@ | |||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/steady_clock.h" | 4 | #include "common/steady_clock.h" |
| 5 | #include "common/uint128.h" | ||
| 6 | #include "common/wall_clock.h" | 5 | #include "common/wall_clock.h" |
| 7 | 6 | ||
| 8 | #ifdef ARCHITECTURE_x86_64 | 7 | #ifdef ARCHITECTURE_x86_64 |
| 9 | #include "common/x64/cpu_detect.h" | 8 | #include "common/x64/cpu_detect.h" |
| 10 | #include "common/x64/native_clock.h" | 9 | #include "common/x64/native_clock.h" |
| 10 | #include "common/x64/rdtsc.h" | ||
| 11 | #endif | 11 | #endif |
| 12 | 12 | ||
| 13 | namespace Common { | 13 | namespace Common { |
| 14 | 14 | ||
| 15 | class StandardWallClock final : public WallClock { | 15 | class StandardWallClock final : public WallClock { |
| 16 | public: | 16 | public: |
| 17 | explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) | 17 | explicit StandardWallClock() : start_time{SteadyClock::Now()} {} |
| 18 | : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, | ||
| 19 | start_time{SteadyClock::Now()} {} | ||
| 20 | 18 | ||
| 21 | std::chrono::nanoseconds GetTimeNS() override { | 19 | std::chrono::nanoseconds GetTimeNS() const override { |
| 22 | return SteadyClock::Now() - start_time; | 20 | return SteadyClock::Now() - start_time; |
| 23 | } | 21 | } |
| 24 | 22 | ||
| 25 | std::chrono::microseconds GetTimeUS() override { | 23 | std::chrono::microseconds GetTimeUS() const override { |
| 26 | return std::chrono::duration_cast<std::chrono::microseconds>(GetTimeNS()); | 24 | return static_cast<std::chrono::microseconds>(GetHostTicksElapsed() / NsToUsRatio::den); |
| 27 | } | 25 | } |
| 28 | 26 | ||
| 29 | std::chrono::milliseconds GetTimeMS() override { | 27 | std::chrono::milliseconds GetTimeMS() const override { |
| 30 | return std::chrono::duration_cast<std::chrono::milliseconds>(GetTimeNS()); | 28 | return static_cast<std::chrono::milliseconds>(GetHostTicksElapsed() / NsToMsRatio::den); |
| 31 | } | 29 | } |
| 32 | 30 | ||
| 33 | u64 GetClockCycles() override { | 31 | u64 GetCNTPCT() const override { |
| 34 | const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); | 32 | return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; |
| 35 | return Common::Divide128On32(temp, NS_RATIO).first; | ||
| 36 | } | 33 | } |
| 37 | 34 | ||
| 38 | u64 GetCPUCycles() override { | 35 | u64 GetGPUTick() const override { |
| 39 | const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); | 36 | return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; |
| 40 | return Common::Divide128On32(temp, NS_RATIO).first; | ||
| 41 | } | 37 | } |
| 42 | 38 | ||
| 43 | void Pause([[maybe_unused]] bool is_paused) override { | 39 | u64 GetHostTicksNow() const override { |
| 44 | // Do nothing in this clock type. | 40 | return static_cast<u64>(SteadyClock::Now().time_since_epoch().count()); |
| 41 | } | ||
| 42 | |||
| 43 | u64 GetHostTicksElapsed() const override { | ||
| 44 | return static_cast<u64>(GetTimeNS().count()); | ||
| 45 | } | ||
| 46 | |||
| 47 | bool IsNative() const override { | ||
| 48 | return false; | ||
| 45 | } | 49 | } |
| 46 | 50 | ||
| 47 | private: | 51 | private: |
| 48 | SteadyClock::time_point start_time; | 52 | SteadyClock::time_point start_time; |
| 49 | }; | 53 | }; |
| 50 | 54 | ||
| 55 | std::unique_ptr<WallClock> CreateOptimalClock() { | ||
| 51 | #ifdef ARCHITECTURE_x86_64 | 56 | #ifdef ARCHITECTURE_x86_64 |
| 52 | |||
| 53 | std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||
| 54 | u64 emulated_clock_frequency) { | ||
| 55 | const auto& caps = GetCPUCaps(); | 57 | const auto& caps = GetCPUCaps(); |
| 56 | u64 rtsc_frequency = 0; | ||
| 57 | if (caps.invariant_tsc) { | ||
| 58 | rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); | ||
| 59 | } | ||
| 60 | 58 | ||
| 61 | // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: | 59 | if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { |
| 62 | // - A nanosecond | 60 | return std::make_unique<X64::NativeClock>(caps.tsc_frequency); |
| 63 | // - The emulated CPU frequency | ||
| 64 | // - The emulated clock counter frequency (CNTFRQ) | ||
| 65 | if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || | ||
| 66 | rtsc_frequency <= emulated_clock_frequency) { | ||
| 67 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, | ||
| 68 | emulated_clock_frequency); | ||
| 69 | } else { | 61 | } else { |
| 70 | return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency, | 62 | // Fallback to StandardWallClock if the hardware TSC |
| 71 | rtsc_frequency); | 63 | // - Is not invariant |
| 64 | // - Is not more precise than GPUTickFreq | ||
| 65 | return std::make_unique<StandardWallClock>(); | ||
| 72 | } | 66 | } |
| 73 | } | ||
| 74 | |||
| 75 | #else | 67 | #else |
| 76 | 68 | return std::make_unique<StandardWallClock>(); | |
| 77 | std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | ||
| 78 | u64 emulated_clock_frequency) { | ||
| 79 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||
| 80 | } | ||
| 81 | |||
| 82 | #endif | 69 | #endif |
| 70 | } | ||
| 83 | 71 | ||
| 84 | std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | 72 | std::unique_ptr<WallClock> CreateStandardWallClock() { |
| 85 | u64 emulated_clock_frequency) { | 73 | return std::make_unique<StandardWallClock>(); |
| 86 | return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); | ||
| 87 | } | 74 | } |
| 88 | 75 | ||
| 89 | } // namespace Common | 76 | } // namespace Common |
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..f45d3d8c5 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <ratio> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | 11 | ||
| @@ -12,50 +13,82 @@ namespace Common { | |||
| 12 | 13 | ||
| 13 | class WallClock { | 14 | class WallClock { |
| 14 | public: | 15 | public: |
| 15 | static constexpr u64 NS_RATIO = 1'000'000'000; | 16 | static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz |
| 16 | static constexpr u64 US_RATIO = 1'000'000; | 17 | static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz |
| 17 | static constexpr u64 MS_RATIO = 1'000; | 18 | static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz |
| 18 | 19 | ||
| 19 | virtual ~WallClock() = default; | 20 | virtual ~WallClock() = default; |
| 20 | 21 | ||
| 21 | /// Returns current wall time in nanoseconds | 22 | /// @returns The time in nanoseconds since the construction of this clock. |
| 22 | [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; | 23 | virtual std::chrono::nanoseconds GetTimeNS() const = 0; |
| 23 | 24 | ||
| 24 | /// Returns current wall time in microseconds | 25 | /// @returns The time in microseconds since the construction of this clock. |
| 25 | [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; | 26 | virtual std::chrono::microseconds GetTimeUS() const = 0; |
| 26 | 27 | ||
| 27 | /// Returns current wall time in milliseconds | 28 | /// @returns The time in milliseconds since the construction of this clock. |
| 28 | [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; | 29 | virtual std::chrono::milliseconds GetTimeMS() const = 0; |
| 29 | 30 | ||
| 30 | /// Returns current wall time in emulated clock cycles | 31 | /// @returns The guest CNTPCT ticks since the construction of this clock. |
| 31 | [[nodiscard]] virtual u64 GetClockCycles() = 0; | 32 | virtual u64 GetCNTPCT() const = 0; |
| 32 | 33 | ||
| 33 | /// Returns current wall time in emulated cpu cycles | 34 | /// @returns The guest GPU ticks since the construction of this clock. |
| 34 | [[nodiscard]] virtual u64 GetCPUCycles() = 0; | 35 | virtual u64 GetGPUTick() const = 0; |
| 35 | 36 | ||
| 36 | virtual void Pause(bool is_paused) = 0; | 37 | /// @returns The raw host timer ticks since an indeterminate epoch. |
| 38 | virtual u64 GetHostTicksNow() const = 0; | ||
| 37 | 39 | ||
| 38 | /// Tells if the wall clock, uses the host CPU's hardware clock | 40 | /// @returns The raw host timer ticks since the construction of this clock. |
| 39 | [[nodiscard]] bool IsNative() const { | 41 | virtual u64 GetHostTicksElapsed() const = 0; |
| 40 | return is_native; | 42 | |
| 43 | /// @returns Whether the clock directly uses the host's hardware clock. | ||
| 44 | virtual bool IsNative() const = 0; | ||
| 45 | |||
| 46 | static inline u64 NSToCNTPCT(u64 ns) { | ||
| 47 | return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; | ||
| 48 | } | ||
| 49 | |||
| 50 | static inline u64 NSToGPUTick(u64 ns) { | ||
| 51 | return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; | ||
| 52 | } | ||
| 53 | |||
| 54 | // Cycle Timing | ||
| 55 | |||
| 56 | static inline u64 CPUTickToNS(u64 cpu_tick) { | ||
| 57 | return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den; | ||
| 58 | } | ||
| 59 | |||
| 60 | static inline u64 CPUTickToUS(u64 cpu_tick) { | ||
| 61 | return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den; | ||
| 62 | } | ||
| 63 | |||
| 64 | static inline u64 CPUTickToCNTPCT(u64 cpu_tick) { | ||
| 65 | return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den; | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline u64 CPUTickToGPUTick(u64 cpu_tick) { | ||
| 69 | return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; | ||
| 41 | } | 70 | } |
| 42 | 71 | ||
| 43 | protected: | 72 | protected: |
| 44 | explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) | 73 | using NsRatio = std::nano; |
| 45 | : emulated_cpu_frequency{emulated_cpu_frequency_}, | 74 | using UsRatio = std::micro; |
| 46 | emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} | 75 | using MsRatio = std::milli; |
| 76 | |||
| 77 | using NsToUsRatio = std::ratio_divide<std::nano, std::micro>; | ||
| 78 | using NsToMsRatio = std::ratio_divide<std::nano, std::milli>; | ||
| 79 | using NsToCNTPCTRatio = std::ratio<CNTFRQ, std::nano::den>; | ||
| 80 | using NsToGPUTickRatio = std::ratio<GPUTickFreq, std::nano::den>; | ||
| 47 | 81 | ||
| 48 | u64 emulated_cpu_frequency; | 82 | // Cycle Timing |
| 49 | u64 emulated_clock_frequency; | ||
| 50 | 83 | ||
| 51 | private: | 84 | using CPUTickToNsRatio = std::ratio<std::nano::den, CPUTickFreq>; |
| 52 | bool is_native; | 85 | using CPUTickToUsRatio = std::ratio<std::micro::den, CPUTickFreq>; |
| 86 | using CPUTickToCNTPCTRatio = std::ratio<CNTFRQ, CPUTickFreq>; | ||
| 87 | using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>; | ||
| 53 | }; | 88 | }; |
| 54 | 89 | ||
| 55 | [[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, | 90 | std::unique_ptr<WallClock> CreateOptimalClock(); |
| 56 | u64 emulated_clock_frequency); | ||
| 57 | 91 | ||
| 58 | [[nodiscard]] std::unique_ptr<WallClock> CreateStandardWallClock(u64 emulated_cpu_frequency, | 92 | std::unique_ptr<WallClock> CreateStandardWallClock(); |
| 59 | u64 emulated_clock_frequency); | ||
| 60 | 93 | ||
| 61 | } // namespace Common | 94 | } // namespace Common |
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..780120a5b 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "common/x64/cpu_detect.h" | 16 | #include "common/x64/cpu_detect.h" |
| 17 | #include "common/x64/rdtsc.h" | ||
| 17 | 18 | ||
| 18 | #ifdef _WIN32 | 19 | #ifdef _WIN32 |
| 19 | #include <windows.h> | 20 | #include <windows.h> |
| @@ -167,6 +168,7 @@ static CPUCaps Detect() { | |||
| 167 | __cpuid(cpu_id, 0x80000001); | 168 | __cpuid(cpu_id, 0x80000001); |
| 168 | caps.lzcnt = Common::Bit<5>(cpu_id[2]); | 169 | caps.lzcnt = Common::Bit<5>(cpu_id[2]); |
| 169 | caps.fma4 = Common::Bit<16>(cpu_id[2]); | 170 | caps.fma4 = Common::Bit<16>(cpu_id[2]); |
| 171 | caps.monitorx = Common::Bit<29>(cpu_id[2]); | ||
| 170 | } | 172 | } |
| 171 | 173 | ||
| 172 | if (max_ex_fn >= 0x80000007) { | 174 | if (max_ex_fn >= 0x80000007) { |
| @@ -187,6 +189,8 @@ static CPUCaps Detect() { | |||
| 187 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * | 189 | caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) * |
| 188 | caps.tsc_crystal_ratio_numerator / | 190 | caps.tsc_crystal_ratio_numerator / |
| 189 | caps.tsc_crystal_ratio_denominator; | 191 | caps.tsc_crystal_ratio_denominator; |
| 192 | } else { | ||
| 193 | caps.tsc_frequency = X64::EstimateRDTSCFrequency(); | ||
| 190 | } | 194 | } |
| 191 | } | 195 | } |
| 192 | 196 | ||
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 8253944d6..756459417 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h | |||
| @@ -63,6 +63,7 @@ struct CPUCaps { | |||
| 63 | bool gfni : 1; | 63 | bool gfni : 1; |
| 64 | bool invariant_tsc : 1; | 64 | bool invariant_tsc : 1; |
| 65 | bool lzcnt : 1; | 65 | bool lzcnt : 1; |
| 66 | bool monitorx : 1; | ||
| 66 | bool movbe : 1; | 67 | bool movbe : 1; |
| 67 | bool pclmulqdq : 1; | 68 | bool pclmulqdq : 1; |
| 68 | bool popcnt : 1; | 69 | bool popcnt : 1; |
diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..41d385f59 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp | |||
| @@ -9,58 +9,64 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/x64/cpu_detect.h" | 10 | #include "common/x64/cpu_detect.h" |
| 11 | #include "common/x64/cpu_wait.h" | 11 | #include "common/x64/cpu_wait.h" |
| 12 | #include "common/x64/rdtsc.h" | ||
| 12 | 13 | ||
| 13 | namespace Common::X64 { | 14 | namespace Common::X64 { |
| 14 | 15 | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | ||
| 19 | // For reference: | ||
| 20 | // At 1 GHz, 100K cycles is 100us | ||
| 21 | // At 2 GHz, 100K cycles is 50us | ||
| 22 | // At 4 GHz, 100K cycles is 25us | ||
| 23 | constexpr auto PauseCycles = 100'000U; | ||
| 24 | |||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 15 | #ifdef _MSC_VER | 27 | #ifdef _MSC_VER |
| 16 | __forceinline static u64 FencedRDTSC() { | 28 | __forceinline static void TPAUSE() { |
| 17 | _mm_lfence(); | 29 | static constexpr auto RequestC02State = 0U; |
| 18 | _ReadWriteBarrier(); | 30 | _tpause(RequestC02State, FencedRDTSC() + PauseCycles); |
| 19 | const u64 result = __rdtsc(); | ||
| 20 | _mm_lfence(); | ||
| 21 | _ReadWriteBarrier(); | ||
| 22 | return result; | ||
| 23 | } | 31 | } |
| 24 | 32 | ||
| 25 | __forceinline static void TPAUSE() { | 33 | __forceinline static void MWAITX() { |
| 26 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 34 | static constexpr auto EnableWaitTimeFlag = 1U << 1; |
| 27 | // For reference: | 35 | static constexpr auto RequestC1State = 0U; |
| 28 | // At 1 GHz, 100K cycles is 100us | 36 | |
| 29 | // At 2 GHz, 100K cycles is 50us | 37 | // monitor_var should be aligned to a cache line. |
| 30 | // At 4 GHz, 100K cycles is 25us | 38 | alignas(64) u64 monitor_var{}; |
| 31 | static constexpr auto PauseCycles = 100'000; | 39 | _mm_monitorx(&monitor_var, 0, 0); |
| 32 | _tpause(0, FencedRDTSC() + PauseCycles); | 40 | _mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles); |
| 33 | } | 41 | } |
| 34 | #else | 42 | #else |
| 35 | static u64 FencedRDTSC() { | ||
| 36 | u64 eax; | ||
| 37 | u64 edx; | ||
| 38 | asm volatile("lfence\n\t" | ||
| 39 | "rdtsc\n\t" | ||
| 40 | "lfence\n\t" | ||
| 41 | : "=a"(eax), "=d"(edx)); | ||
| 42 | return (edx << 32) | eax; | ||
| 43 | } | ||
| 44 | |||
| 45 | static void TPAUSE() { | 43 | static void TPAUSE() { |
| 46 | // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. | 44 | static constexpr auto RequestC02State = 0U; |
| 47 | // For reference: | ||
| 48 | // At 1 GHz, 100K cycles is 100us | ||
| 49 | // At 2 GHz, 100K cycles is 50us | ||
| 50 | // At 4 GHz, 100K cycles is 25us | ||
| 51 | static constexpr auto PauseCycles = 100'000; | ||
| 52 | const auto tsc = FencedRDTSC() + PauseCycles; | 45 | const auto tsc = FencedRDTSC() + PauseCycles; |
| 53 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); | 46 | const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); |
| 54 | const auto edx = static_cast<u32>(tsc >> 32); | 47 | const auto edx = static_cast<u32>(tsc >> 32); |
| 55 | asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); | 48 | asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax)); |
| 49 | } | ||
| 50 | |||
| 51 | static void MWAITX() { | ||
| 52 | static constexpr auto EnableWaitTimeFlag = 1U << 1; | ||
| 53 | static constexpr auto RequestC1State = 0U; | ||
| 54 | |||
| 55 | // monitor_var should be aligned to a cache line. | ||
| 56 | alignas(64) u64 monitor_var{}; | ||
| 57 | asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0)); | ||
| 58 | asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag)); | ||
| 56 | } | 59 | } |
| 57 | #endif | 60 | #endif |
| 58 | 61 | ||
| 59 | void MicroSleep() { | 62 | void MicroSleep() { |
| 60 | static const bool has_waitpkg = GetCPUCaps().waitpkg; | 63 | static const bool has_waitpkg = GetCPUCaps().waitpkg; |
| 64 | static const bool has_monitorx = GetCPUCaps().monitorx; | ||
| 61 | 65 | ||
| 62 | if (has_waitpkg) { | 66 | if (has_waitpkg) { |
| 63 | TPAUSE(); | 67 | TPAUSE(); |
| 68 | } else if (has_monitorx) { | ||
| 69 | MWAITX(); | ||
| 64 | } else { | 70 | } else { |
| 65 | std::this_thread::yield(); | 71 | std::this_thread::yield(); |
| 66 | } | 72 | } |
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp | |||
| @@ -1,164 +1,50 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #include <array> | ||
| 5 | #include <chrono> | ||
| 6 | #include <thread> | ||
| 7 | |||
| 8 | #include "common/atomic_ops.h" | ||
| 9 | #include "common/steady_clock.h" | ||
| 10 | #include "common/uint128.h" | 4 | #include "common/uint128.h" |
| 11 | #include "common/x64/native_clock.h" | 5 | #include "common/x64/native_clock.h" |
| 6 | #include "common/x64/rdtsc.h" | ||
| 12 | 7 | ||
| 13 | #ifdef _MSC_VER | 8 | namespace Common::X64 { |
| 14 | #include <intrin.h> | ||
| 15 | #endif | ||
| 16 | |||
| 17 | namespace Common { | ||
| 18 | 9 | ||
| 19 | #ifdef _MSC_VER | 10 | NativeClock::NativeClock(u64 rdtsc_frequency_) |
| 20 | __forceinline static u64 FencedRDTSC() { | 11 | : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, |
| 21 | _mm_lfence(); | 12 | ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, |
| 22 | _ReadWriteBarrier(); | 13 | us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, |
| 23 | const u64 result = __rdtsc(); | 14 | ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, |
| 24 | _mm_lfence(); | 15 | cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, |
| 25 | _ReadWriteBarrier(); | 16 | gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} |
| 26 | return result; | ||
| 27 | } | ||
| 28 | #else | ||
| 29 | static u64 FencedRDTSC() { | ||
| 30 | u64 eax; | ||
| 31 | u64 edx; | ||
| 32 | asm volatile("lfence\n\t" | ||
| 33 | "rdtsc\n\t" | ||
| 34 | "lfence\n\t" | ||
| 35 | : "=a"(eax), "=d"(edx)); | ||
| 36 | return (edx << 32) | eax; | ||
| 37 | } | ||
| 38 | #endif | ||
| 39 | 17 | ||
| 40 | template <u64 Nearest> | 18 | std::chrono::nanoseconds NativeClock::GetTimeNS() const { |
| 41 | static u64 RoundToNearest(u64 value) { | 19 | return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; |
| 42 | const auto mod = value % Nearest; | ||
| 43 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 44 | } | 20 | } |
| 45 | 21 | ||
| 46 | u64 EstimateRDTSCFrequency() { | 22 | std::chrono::microseconds NativeClock::GetTimeUS() const { |
| 47 | // Discard the first result measuring the rdtsc. | 23 | return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; |
| 48 | FencedRDTSC(); | ||
| 49 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 50 | FencedRDTSC(); | ||
| 51 | |||
| 52 | // Get the current time. | ||
| 53 | const auto start_time = Common::RealTimeClock::Now(); | ||
| 54 | const u64 tsc_start = FencedRDTSC(); | ||
| 55 | // Wait for 250 milliseconds. | ||
| 56 | std::this_thread::sleep_for(std::chrono::milliseconds{250}); | ||
| 57 | const auto end_time = Common::RealTimeClock::Now(); | ||
| 58 | const u64 tsc_end = FencedRDTSC(); | ||
| 59 | // Calculate differences. | ||
| 60 | const u64 timer_diff = static_cast<u64>( | ||
| 61 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 62 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 63 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 64 | return RoundToNearest<1000>(tsc_freq); | ||
| 65 | } | 24 | } |
| 66 | 25 | ||
| 67 | namespace X64 { | 26 | std::chrono::milliseconds NativeClock::GetTimeMS() const { |
| 68 | NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, | 27 | return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; |
| 69 | u64 rtsc_frequency_) | ||
| 70 | : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ | ||
| 71 | rtsc_frequency_} { | ||
| 72 | // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. | ||
| 73 | time_sync_thread = std::jthread{[this](std::stop_token token) { | ||
| 74 | // Get the current time. | ||
| 75 | const auto start_time = Common::RealTimeClock::Now(); | ||
| 76 | const u64 tsc_start = FencedRDTSC(); | ||
| 77 | // Wait for 10 seconds. | ||
| 78 | if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { | ||
| 79 | return; | ||
| 80 | } | ||
| 81 | const auto end_time = Common::RealTimeClock::Now(); | ||
| 82 | const u64 tsc_end = FencedRDTSC(); | ||
| 83 | // Calculate differences. | ||
| 84 | const u64 timer_diff = static_cast<u64>( | ||
| 85 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 86 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 87 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 88 | rtsc_frequency = tsc_freq; | ||
| 89 | CalculateAndSetFactors(); | ||
| 90 | }}; | ||
| 91 | |||
| 92 | time_point.inner.last_measure = FencedRDTSC(); | ||
| 93 | time_point.inner.accumulated_ticks = 0U; | ||
| 94 | CalculateAndSetFactors(); | ||
| 95 | } | 28 | } |
| 96 | 29 | ||
| 97 | u64 NativeClock::GetRTSC() { | 30 | u64 NativeClock::GetCNTPCT() const { |
| 98 | TimePoint new_time_point{}; | 31 | return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); |
| 99 | TimePoint current_time_point{}; | ||
| 100 | |||
| 101 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 102 | do { | ||
| 103 | const u64 current_measure = FencedRDTSC(); | ||
| 104 | u64 diff = current_measure - current_time_point.inner.last_measure; | ||
| 105 | diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0) | ||
| 106 | new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure | ||
| 107 | ? current_measure | ||
| 108 | : current_time_point.inner.last_measure; | ||
| 109 | new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; | ||
| 110 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 111 | current_time_point.pack, current_time_point.pack)); | ||
| 112 | return new_time_point.inner.accumulated_ticks; | ||
| 113 | } | 32 | } |
| 114 | 33 | ||
| 115 | void NativeClock::Pause(bool is_paused) { | 34 | u64 NativeClock::GetGPUTick() const { |
| 116 | if (!is_paused) { | 35 | return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); |
| 117 | TimePoint current_time_point{}; | ||
| 118 | TimePoint new_time_point{}; | ||
| 119 | |||
| 120 | current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); | ||
| 121 | do { | ||
| 122 | new_time_point.pack = current_time_point.pack; | ||
| 123 | new_time_point.inner.last_measure = FencedRDTSC(); | ||
| 124 | } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, | ||
| 125 | current_time_point.pack, current_time_point.pack)); | ||
| 126 | } | ||
| 127 | } | 36 | } |
| 128 | 37 | ||
| 129 | std::chrono::nanoseconds NativeClock::GetTimeNS() { | 38 | u64 NativeClock::GetHostTicksNow() const { |
| 130 | const u64 rtsc_value = GetRTSC(); | 39 | return FencedRDTSC(); |
| 131 | return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; | ||
| 132 | } | 40 | } |
| 133 | 41 | ||
| 134 | std::chrono::microseconds NativeClock::GetTimeUS() { | 42 | u64 NativeClock::GetHostTicksElapsed() const { |
| 135 | const u64 rtsc_value = GetRTSC(); | 43 | return FencedRDTSC() - start_ticks; |
| 136 | return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; | ||
| 137 | } | 44 | } |
| 138 | 45 | ||
| 139 | std::chrono::milliseconds NativeClock::GetTimeMS() { | 46 | bool NativeClock::IsNative() const { |
| 140 | const u64 rtsc_value = GetRTSC(); | 47 | return true; |
| 141 | return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; | ||
| 142 | } | 48 | } |
| 143 | 49 | ||
| 144 | u64 NativeClock::GetClockCycles() { | 50 | } // namespace Common::X64 |
| 145 | const u64 rtsc_value = GetRTSC(); | ||
| 146 | return MultiplyHigh(rtsc_value, clock_rtsc_factor); | ||
| 147 | } | ||
| 148 | |||
| 149 | u64 NativeClock::GetCPUCycles() { | ||
| 150 | const u64 rtsc_value = GetRTSC(); | ||
| 151 | return MultiplyHigh(rtsc_value, cpu_rtsc_factor); | ||
| 152 | } | ||
| 153 | |||
| 154 | void NativeClock::CalculateAndSetFactors() { | ||
| 155 | ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); | ||
| 156 | us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); | ||
| 157 | ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); | ||
| 158 | clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); | ||
| 159 | cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); | ||
| 160 | } | ||
| 161 | |||
| 162 | } // namespace X64 | ||
| 163 | |||
| 164 | } // namespace Common | ||
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h | |||
| @@ -3,58 +3,39 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "common/polyfill_thread.h" | ||
| 7 | #include "common/wall_clock.h" | 6 | #include "common/wall_clock.h" |
| 8 | 7 | ||
| 9 | namespace Common { | 8 | namespace Common::X64 { |
| 10 | 9 | ||
| 11 | namespace X64 { | ||
| 12 | class NativeClock final : public WallClock { | 10 | class NativeClock final : public WallClock { |
| 13 | public: | 11 | public: |
| 14 | explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, | 12 | explicit NativeClock(u64 rdtsc_frequency_); |
| 15 | u64 rtsc_frequency_); | ||
| 16 | 13 | ||
| 17 | std::chrono::nanoseconds GetTimeNS() override; | 14 | std::chrono::nanoseconds GetTimeNS() const override; |
| 18 | 15 | ||
| 19 | std::chrono::microseconds GetTimeUS() override; | 16 | std::chrono::microseconds GetTimeUS() const override; |
| 20 | 17 | ||
| 21 | std::chrono::milliseconds GetTimeMS() override; | 18 | std::chrono::milliseconds GetTimeMS() const override; |
| 22 | 19 | ||
| 23 | u64 GetClockCycles() override; | 20 | u64 GetCNTPCT() const override; |
| 24 | 21 | ||
| 25 | u64 GetCPUCycles() override; | 22 | u64 GetGPUTick() const override; |
| 26 | 23 | ||
| 27 | void Pause(bool is_paused) override; | 24 | u64 GetHostTicksNow() const override; |
| 28 | 25 | ||
| 29 | private: | 26 | u64 GetHostTicksElapsed() const override; |
| 30 | u64 GetRTSC(); | ||
| 31 | |||
| 32 | void CalculateAndSetFactors(); | ||
| 33 | |||
| 34 | union alignas(16) TimePoint { | ||
| 35 | TimePoint() : pack{} {} | ||
| 36 | u128 pack{}; | ||
| 37 | struct Inner { | ||
| 38 | u64 last_measure{}; | ||
| 39 | u64 accumulated_ticks{}; | ||
| 40 | } inner; | ||
| 41 | }; | ||
| 42 | |||
| 43 | TimePoint time_point; | ||
| 44 | 27 | ||
| 45 | // factors | 28 | bool IsNative() const override; |
| 46 | u64 clock_rtsc_factor{}; | ||
| 47 | u64 cpu_rtsc_factor{}; | ||
| 48 | u64 ns_rtsc_factor{}; | ||
| 49 | u64 us_rtsc_factor{}; | ||
| 50 | u64 ms_rtsc_factor{}; | ||
| 51 | 29 | ||
| 52 | u64 rtsc_frequency; | 30 | private: |
| 53 | 31 | u64 start_ticks; | |
| 54 | std::jthread time_sync_thread; | 32 | u64 rdtsc_frequency; |
| 33 | |||
| 34 | u64 ns_rdtsc_factor; | ||
| 35 | u64 us_rdtsc_factor; | ||
| 36 | u64 ms_rdtsc_factor; | ||
| 37 | u64 cntpct_rdtsc_factor; | ||
| 38 | u64 gputick_rdtsc_factor; | ||
| 55 | }; | 39 | }; |
| 56 | } // namespace X64 | ||
| 57 | |||
| 58 | u64 EstimateRDTSCFrequency(); | ||
| 59 | 40 | ||
| 60 | } // namespace Common | 41 | } // namespace Common::X64 |
diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <thread> | ||
| 5 | |||
| 6 | #include "common/steady_clock.h" | ||
| 7 | #include "common/uint128.h" | ||
| 8 | #include "common/x64/rdtsc.h" | ||
| 9 | |||
| 10 | namespace Common::X64 { | ||
| 11 | |||
| 12 | template <u64 Nearest> | ||
| 13 | static u64 RoundToNearest(u64 value) { | ||
| 14 | const auto mod = value % Nearest; | ||
| 15 | return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); | ||
| 16 | } | ||
| 17 | |||
| 18 | u64 EstimateRDTSCFrequency() { | ||
| 19 | // Discard the first result measuring the rdtsc. | ||
| 20 | FencedRDTSC(); | ||
| 21 | std::this_thread::sleep_for(std::chrono::milliseconds{1}); | ||
| 22 | FencedRDTSC(); | ||
| 23 | |||
| 24 | // Get the current time. | ||
| 25 | const auto start_time = RealTimeClock::Now(); | ||
| 26 | const u64 tsc_start = FencedRDTSC(); | ||
| 27 | // Wait for 100 milliseconds. | ||
| 28 | std::this_thread::sleep_for(std::chrono::milliseconds{100}); | ||
| 29 | const auto end_time = RealTimeClock::Now(); | ||
| 30 | const u64 tsc_end = FencedRDTSC(); | ||
| 31 | // Calculate differences. | ||
| 32 | const u64 timer_diff = static_cast<u64>( | ||
| 33 | std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count()); | ||
| 34 | const u64 tsc_diff = tsc_end - tsc_start; | ||
| 35 | const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); | ||
| 36 | return RoundToNearest<100'000>(tsc_freq); | ||
| 37 | } | ||
| 38 | |||
| 39 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #ifdef _MSC_VER | ||
| 7 | #include <intrin.h> | ||
| 8 | #endif | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | #ifdef _MSC_VER | ||
| 15 | __forceinline static u64 FencedRDTSC() { | ||
| 16 | _mm_lfence(); | ||
| 17 | _ReadWriteBarrier(); | ||
| 18 | const u64 result = __rdtsc(); | ||
| 19 | _mm_lfence(); | ||
| 20 | _ReadWriteBarrier(); | ||
| 21 | return result; | ||
| 22 | } | ||
| 23 | #else | ||
| 24 | static inline u64 FencedRDTSC() { | ||
| 25 | u64 eax; | ||
| 26 | u64 edx; | ||
| 27 | asm volatile("lfence\n\t" | ||
| 28 | "rdtsc\n\t" | ||
| 29 | "lfence\n\t" | ||
| 30 | : "=a"(eax), "=d"(edx)); | ||
| 31 | return (edx << 32) | eax; | ||
| 32 | } | ||
| 33 | #endif | ||
| 34 | |||
| 35 | u64 EstimateRDTSCFrequency(); | ||
| 36 | |||
| 37 | } // namespace Common::X64 | ||