diff options
Diffstat (limited to 'src/core/core_timing.cpp')
| -rw-r--r-- | src/core/core_timing.cpp | 256 |
1 files changed, 142 insertions, 114 deletions
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 46d4178c4..5c83c41a4 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp | |||
| @@ -1,29 +1,27 @@ | |||
| 1 | // Copyright 2008 Dolphin Emulator Project / 2017 Citra Emulator Project | 1 | // Copyright 2020 yuzu Emulator Project |
| 2 | // Licensed under GPLv2+ | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core_timing.h" | ||
| 6 | |||
| 7 | #include <algorithm> | 5 | #include <algorithm> |
| 8 | #include <mutex> | 6 | #include <mutex> |
| 9 | #include <string> | 7 | #include <string> |
| 10 | #include <tuple> | 8 | #include <tuple> |
| 11 | 9 | ||
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/thread.h" | 11 | #include "common/microprofile.h" |
| 12 | #include "core/core_timing.h" | ||
| 14 | #include "core/core_timing_util.h" | 13 | #include "core/core_timing_util.h" |
| 15 | #include "core/hardware_properties.h" | ||
| 16 | 14 | ||
| 17 | namespace Core::Timing { | 15 | namespace Core::Timing { |
| 18 | 16 | ||
| 19 | constexpr int MAX_SLICE_LENGTH = 10000; | 17 | constexpr u64 MAX_SLICE_LENGTH = 4000; |
| 20 | 18 | ||
| 21 | std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { | 19 | std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) { |
| 22 | return std::make_shared<EventType>(std::move(callback), std::move(name)); | 20 | return std::make_shared<EventType>(std::move(callback), std::move(name)); |
| 23 | } | 21 | } |
| 24 | 22 | ||
| 25 | struct CoreTiming::Event { | 23 | struct CoreTiming::Event { |
| 26 | s64 time; | 24 | u64 time; |
| 27 | u64 fifo_order; | 25 | u64 fifo_order; |
| 28 | u64 userdata; | 26 | u64 userdata; |
| 29 | std::weak_ptr<EventType> type; | 27 | std::weak_ptr<EventType> type; |
| @@ -39,51 +37,90 @@ struct CoreTiming::Event { | |||
| 39 | } | 37 | } |
| 40 | }; | 38 | }; |
| 41 | 39 | ||
| 42 | CoreTiming::CoreTiming() = default; | 40 | CoreTiming::CoreTiming() { |
| 43 | CoreTiming::~CoreTiming() = default; | 41 | clock = |
| 42 | Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ); | ||
| 43 | } | ||
| 44 | 44 | ||
| 45 | void CoreTiming::Initialize() { | 45 | CoreTiming::~CoreTiming() = default; |
| 46 | downcounts.fill(MAX_SLICE_LENGTH); | ||
| 47 | time_slice.fill(MAX_SLICE_LENGTH); | ||
| 48 | slice_length = MAX_SLICE_LENGTH; | ||
| 49 | global_timer = 0; | ||
| 50 | idled_cycles = 0; | ||
| 51 | current_context = 0; | ||
| 52 | 46 | ||
| 53 | // The time between CoreTiming being initialized and the first call to Advance() is considered | 47 | void CoreTiming::ThreadEntry(CoreTiming& instance) { |
| 54 | // the slice boundary between slice -1 and slice 0. Dispatcher loops must call Advance() before | 48 | constexpr char name[] = "yuzu:HostTiming"; |
| 55 | // executing the first cycle of each slice to prepare the slice length and downcount for | 49 | MicroProfileOnThreadCreate(name); |
| 56 | // that slice. | 50 | Common::SetCurrentThreadName(name); |
| 57 | is_global_timer_sane = true; | 51 | Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh); |
| 52 | instance.on_thread_init(); | ||
| 53 | instance.ThreadLoop(); | ||
| 54 | } | ||
| 58 | 55 | ||
| 56 | void CoreTiming::Initialize(std::function<void(void)>&& on_thread_init_) { | ||
| 57 | on_thread_init = std::move(on_thread_init_); | ||
| 59 | event_fifo_id = 0; | 58 | event_fifo_id = 0; |
| 60 | 59 | shutting_down = false; | |
| 60 | ticks = 0; | ||
| 61 | const auto empty_timed_callback = [](u64, s64) {}; | 61 | const auto empty_timed_callback = [](u64, s64) {}; |
| 62 | ev_lost = CreateEvent("_lost_event", empty_timed_callback); | 62 | ev_lost = CreateEvent("_lost_event", empty_timed_callback); |
| 63 | if (is_multicore) { | ||
| 64 | timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); | ||
| 65 | } | ||
| 63 | } | 66 | } |
| 64 | 67 | ||
| 65 | void CoreTiming::Shutdown() { | 68 | void CoreTiming::Shutdown() { |
| 69 | paused = true; | ||
| 70 | shutting_down = true; | ||
| 71 | pause_event.Set(); | ||
| 72 | event.Set(); | ||
| 73 | if (timer_thread) { | ||
| 74 | timer_thread->join(); | ||
| 75 | } | ||
| 66 | ClearPendingEvents(); | 76 | ClearPendingEvents(); |
| 77 | timer_thread.reset(); | ||
| 78 | has_started = false; | ||
| 67 | } | 79 | } |
| 68 | 80 | ||
| 69 | void CoreTiming::ScheduleEvent(s64 cycles_into_future, const std::shared_ptr<EventType>& event_type, | 81 | void CoreTiming::Pause(bool is_paused) { |
| 70 | u64 userdata) { | 82 | paused = is_paused; |
| 71 | std::lock_guard guard{inner_mutex}; | 83 | pause_event.Set(); |
| 72 | const s64 timeout = GetTicks() + cycles_into_future; | 84 | } |
| 73 | 85 | ||
| 74 | // If this event needs to be scheduled before the next advance(), force one early | 86 | void CoreTiming::SyncPause(bool is_paused) { |
| 75 | if (!is_global_timer_sane) { | 87 | if (is_paused == paused && paused_set == paused) { |
| 76 | ForceExceptionCheck(cycles_into_future); | 88 | return; |
| 89 | } | ||
| 90 | Pause(is_paused); | ||
| 91 | if (timer_thread) { | ||
| 92 | if (!is_paused) { | ||
| 93 | pause_event.Set(); | ||
| 94 | } | ||
| 95 | event.Set(); | ||
| 96 | while (paused_set != is_paused) | ||
| 97 | ; | ||
| 77 | } | 98 | } |
| 99 | } | ||
| 78 | 100 | ||
| 79 | event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); | 101 | bool CoreTiming::IsRunning() const { |
| 102 | return !paused_set; | ||
| 103 | } | ||
| 80 | 104 | ||
| 81 | std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); | 105 | bool CoreTiming::HasPendingEvents() const { |
| 106 | return !(wait_set && event_queue.empty()); | ||
| 82 | } | 107 | } |
| 83 | 108 | ||
| 84 | void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { | 109 | void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type, |
| 85 | std::lock_guard guard{inner_mutex}; | 110 | u64 userdata) { |
| 111 | { | ||
| 112 | std::scoped_lock scope{basic_lock}; | ||
| 113 | const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future); | ||
| 114 | |||
| 115 | event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); | ||
| 86 | 116 | ||
| 117 | std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); | ||
| 118 | } | ||
| 119 | event.Set(); | ||
| 120 | } | ||
| 121 | |||
| 122 | void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) { | ||
| 123 | std::scoped_lock scope{basic_lock}; | ||
| 87 | const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { | 124 | const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { |
| 88 | return e.type.lock().get() == event_type.get() && e.userdata == userdata; | 125 | return e.type.lock().get() == event_type.get() && e.userdata == userdata; |
| 89 | }); | 126 | }); |
| @@ -95,21 +132,39 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u | |||
| 95 | } | 132 | } |
| 96 | } | 133 | } |
| 97 | 134 | ||
| 98 | u64 CoreTiming::GetTicks() const { | 135 | void CoreTiming::AddTicks(u64 ticks) { |
| 99 | u64 ticks = static_cast<u64>(global_timer); | 136 | this->ticks += ticks; |
| 100 | if (!is_global_timer_sane) { | 137 | downcount -= ticks; |
| 101 | ticks += accumulated_ticks; | 138 | } |
| 139 | |||
| 140 | void CoreTiming::Idle() { | ||
| 141 | if (!event_queue.empty()) { | ||
| 142 | const u64 next_event_time = event_queue.front().time; | ||
| 143 | const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; | ||
| 144 | if (next_ticks > ticks) { | ||
| 145 | ticks = next_ticks; | ||
| 146 | } | ||
| 147 | return; | ||
| 102 | } | 148 | } |
| 103 | return ticks; | 149 | ticks += 1000U; |
| 104 | } | 150 | } |
| 105 | 151 | ||
| 106 | u64 CoreTiming::GetIdleTicks() const { | 152 | void CoreTiming::ResetTicks() { |
| 107 | return static_cast<u64>(idled_cycles); | 153 | downcount = MAX_SLICE_LENGTH; |
| 108 | } | 154 | } |
| 109 | 155 | ||
| 110 | void CoreTiming::AddTicks(u64 ticks) { | 156 | u64 CoreTiming::GetCPUTicks() const { |
| 111 | accumulated_ticks += ticks; | 157 | if (is_multicore) { |
| 112 | downcounts[current_context] -= static_cast<s64>(ticks); | 158 | return clock->GetCPUCycles(); |
| 159 | } | ||
| 160 | return ticks; | ||
| 161 | } | ||
| 162 | |||
| 163 | u64 CoreTiming::GetClockTicks() const { | ||
| 164 | if (is_multicore) { | ||
| 165 | return clock->GetClockCycles(); | ||
| 166 | } | ||
| 167 | return CpuCyclesToClockCycles(ticks); | ||
| 113 | } | 168 | } |
| 114 | 169 | ||
| 115 | void CoreTiming::ClearPendingEvents() { | 170 | void CoreTiming::ClearPendingEvents() { |
| @@ -117,7 +172,7 @@ void CoreTiming::ClearPendingEvents() { | |||
| 117 | } | 172 | } |
| 118 | 173 | ||
| 119 | void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { | 174 | void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { |
| 120 | std::lock_guard guard{inner_mutex}; | 175 | basic_lock.lock(); |
| 121 | 176 | ||
| 122 | const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { | 177 | const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { |
| 123 | return e.type.lock().get() == event_type.get(); | 178 | return e.type.lock().get() == event_type.get(); |
| @@ -128,99 +183,72 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) { | |||
| 128 | event_queue.erase(itr, event_queue.end()); | 183 | event_queue.erase(itr, event_queue.end()); |
| 129 | std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); | 184 | std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>()); |
| 130 | } | 185 | } |
| 186 | basic_lock.unlock(); | ||
| 131 | } | 187 | } |
| 132 | 188 | ||
| 133 | void CoreTiming::ForceExceptionCheck(s64 cycles) { | 189 | std::optional<s64> CoreTiming::Advance() { |
| 134 | cycles = std::max<s64>(0, cycles); | 190 | std::scoped_lock advance_scope{advance_lock}; |
| 135 | if (downcounts[current_context] <= cycles) { | 191 | std::scoped_lock basic_scope{basic_lock}; |
| 136 | return; | 192 | global_timer = GetGlobalTimeNs().count(); |
| 137 | } | ||
| 138 | |||
| 139 | // downcount is always (much) smaller than MAX_INT so we can safely cast cycles to an int | ||
| 140 | // here. Account for cycles already executed by adjusting the g.slice_length | ||
| 141 | downcounts[current_context] = static_cast<int>(cycles); | ||
| 142 | } | ||
| 143 | |||
| 144 | std::optional<u64> CoreTiming::NextAvailableCore(const s64 needed_ticks) const { | ||
| 145 | const u64 original_context = current_context; | ||
| 146 | u64 next_context = (original_context + 1) % num_cpu_cores; | ||
| 147 | while (next_context != original_context) { | ||
| 148 | if (time_slice[next_context] >= needed_ticks) { | ||
| 149 | return {next_context}; | ||
| 150 | } else if (time_slice[next_context] >= 0) { | ||
| 151 | return std::nullopt; | ||
| 152 | } | ||
| 153 | next_context = (next_context + 1) % num_cpu_cores; | ||
| 154 | } | ||
| 155 | return std::nullopt; | ||
| 156 | } | ||
| 157 | |||
| 158 | void CoreTiming::Advance() { | ||
| 159 | std::unique_lock<std::mutex> guard(inner_mutex); | ||
| 160 | |||
| 161 | const u64 cycles_executed = accumulated_ticks; | ||
| 162 | time_slice[current_context] = std::max<s64>(0, time_slice[current_context] - accumulated_ticks); | ||
| 163 | global_timer += cycles_executed; | ||
| 164 | |||
| 165 | is_global_timer_sane = true; | ||
| 166 | 193 | ||
| 167 | while (!event_queue.empty() && event_queue.front().time <= global_timer) { | 194 | while (!event_queue.empty() && event_queue.front().time <= global_timer) { |
| 168 | Event evt = std::move(event_queue.front()); | 195 | Event evt = std::move(event_queue.front()); |
| 169 | std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); | 196 | std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>()); |
| 170 | event_queue.pop_back(); | 197 | event_queue.pop_back(); |
| 171 | inner_mutex.unlock(); | 198 | basic_lock.unlock(); |
| 172 | 199 | ||
| 173 | if (auto event_type{evt.type.lock()}) { | 200 | if (auto event_type{evt.type.lock()}) { |
| 174 | event_type->callback(evt.userdata, global_timer - evt.time); | 201 | event_type->callback(evt.userdata, global_timer - evt.time); |
| 175 | } | 202 | } |
| 176 | 203 | ||
| 177 | inner_mutex.lock(); | 204 | basic_lock.lock(); |
| 205 | global_timer = GetGlobalTimeNs().count(); | ||
| 178 | } | 206 | } |
| 179 | 207 | ||
| 180 | is_global_timer_sane = false; | ||
| 181 | |||
| 182 | // Still events left (scheduled in the future) | ||
| 183 | if (!event_queue.empty()) { | 208 | if (!event_queue.empty()) { |
| 184 | const s64 needed_ticks = | 209 | const s64 next_time = event_queue.front().time - global_timer; |
| 185 | std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); | 210 | return next_time; |
| 186 | const auto next_core = NextAvailableCore(needed_ticks); | 211 | } else { |
| 187 | if (next_core) { | 212 | return std::nullopt; |
| 188 | downcounts[*next_core] = needed_ticks; | ||
| 189 | } | ||
| 190 | } | 213 | } |
| 191 | |||
| 192 | accumulated_ticks = 0; | ||
| 193 | |||
| 194 | downcounts[current_context] = time_slice[current_context]; | ||
| 195 | } | 214 | } |
| 196 | 215 | ||
| 197 | void CoreTiming::ResetRun() { | 216 | void CoreTiming::ThreadLoop() { |
| 198 | downcounts.fill(MAX_SLICE_LENGTH); | 217 | has_started = true; |
| 199 | time_slice.fill(MAX_SLICE_LENGTH); | 218 | while (!shutting_down) { |
| 200 | current_context = 0; | 219 | while (!paused) { |
| 201 | // Still events left (scheduled in the future) | 220 | paused_set = false; |
| 202 | if (!event_queue.empty()) { | 221 | const auto next_time = Advance(); |
| 203 | const s64 needed_ticks = | 222 | if (next_time) { |
| 204 | std::min<s64>(event_queue.front().time - global_timer, MAX_SLICE_LENGTH); | 223 | if (*next_time > 0) { |
| 205 | downcounts[current_context] = needed_ticks; | 224 | std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time); |
| 225 | event.WaitFor(next_time_ns); | ||
| 226 | } | ||
| 227 | } else { | ||
| 228 | wait_set = true; | ||
| 229 | event.Wait(); | ||
| 230 | } | ||
| 231 | wait_set = false; | ||
| 232 | } | ||
| 233 | paused_set = true; | ||
| 234 | clock->Pause(true); | ||
| 235 | pause_event.Wait(); | ||
| 236 | clock->Pause(false); | ||
| 206 | } | 237 | } |
| 207 | |||
| 208 | is_global_timer_sane = false; | ||
| 209 | accumulated_ticks = 0; | ||
| 210 | } | 238 | } |
| 211 | 239 | ||
| 212 | void CoreTiming::Idle() { | 240 | std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { |
| 213 | accumulated_ticks += downcounts[current_context]; | 241 | if (is_multicore) { |
| 214 | idled_cycles += downcounts[current_context]; | 242 | return clock->GetTimeNS(); |
| 215 | downcounts[current_context] = 0; | 243 | } |
| 244 | return CyclesToNs(ticks); | ||
| 216 | } | 245 | } |
| 217 | 246 | ||
| 218 | std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { | 247 | std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { |
| 219 | return std::chrono::microseconds{GetTicks() * 1000000 / Hardware::BASE_CLOCK_RATE}; | 248 | if (is_multicore) { |
| 220 | } | 249 | return clock->GetTimeUS(); |
| 221 | 250 | } | |
| 222 | s64 CoreTiming::GetDowncount() const { | 251 | return CyclesToUs(ticks); |
| 223 | return downcounts[current_context]; | ||
| 224 | } | 252 | } |
| 225 | 253 | ||
| 226 | } // namespace Core::Timing | 254 | } // namespace Core::Timing |