summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
authorGravatar Tony Wasserka2015-03-07 15:30:40 +0100
committerGravatar Tony Wasserka2015-03-07 15:30:40 +0100
commit93e32bce72905ac1bd0a5e75066fda5e6b7bf250 (patch)
tree4530e9d8db22955416543899b6c0e59abf8b9732 /src/common
parentMerge pull request #630 from archshift/swap (diff)
parentProfiler: Implement QPCClock to get better precision on Win32 (diff)
downloadyuzu-93e32bce72905ac1bd0a5e75066fda5e6b7bf250.tar.gz
yuzu-93e32bce72905ac1bd0a5e75066fda5e6b7bf250.tar.xz
yuzu-93e32bce72905ac1bd0a5e75066fda5e6b7bf250.zip
Merge pull request #538 from yuriks/perf-stat
Add profiling infrastructure and widget
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt4
-rw-r--r--src/common/profiler.cpp182
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h108
-rw-r--r--src/common/synchronized_wrapper.h69
-rw-r--r--src/common/thread.h19
6 files changed, 534 insertions, 0 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index b05c35546..daa2d59de 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SRCS
14 mem_arena.cpp 14 mem_arena.cpp
15 memory_util.cpp 15 memory_util.cpp
16 misc.cpp 16 misc.cpp
17 profiler.cpp
17 scm_rev.cpp 18 scm_rev.cpp
18 string_util.cpp 19 string_util.cpp
19 symbols.cpp 20 symbols.cpp
@@ -48,11 +49,14 @@ set(HEADERS
48 mem_arena.h 49 mem_arena.h
49 memory_util.h 50 memory_util.h
50 platform.h 51 platform.h
52 profiler.h
53 profiler_reporting.h
51 scm_rev.h 54 scm_rev.h
52 scope_exit.h 55 scope_exit.h
53 string_util.h 56 string_util.h
54 swap.h 57 swap.h
55 symbols.h 58 symbols.h
59 synchronized_wrapper.h
56 thread.h 60 thread.h
57 thread_queue_list.h 61 thread_queue_list.h
58 thunk.h 62 thunk.h
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
new file mode 100644
index 000000000..65c3df167
--- /dev/null
+++ b/src/common/profiler.cpp
@@ -0,0 +1,182 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/profiler.h"
6#include "common/profiler_reporting.h"
7#include "common/assert.h"
8
9#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
10#define NOMINMAX
11#define WIN32_LEAN_AND_MEAN
12#include <Windows.h> // For QueryPerformanceCounter/Frequency
13#endif
14
15namespace Common {
16namespace Profiling {
17
18#if ENABLE_PROFILING
19thread_local Timer* Timer::current_timer = nullptr;
20#endif
21
22#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
23QPCClock::time_point QPCClock::now() {
24 static LARGE_INTEGER freq;
25 // Use this dummy local static to ensure this gets initialized once.
26 static BOOL dummy = QueryPerformanceFrequency(&freq);
27
28 LARGE_INTEGER ticks;
29 QueryPerformanceCounter(&ticks);
30
31 // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
32 // correct way to approach this would be to just return ticks as a time_point and then subtract
33 // and do this conversion when creating a duration from two time_points, however, as far as I
34 // could tell the C++ requirements for these types are incompatible with this approach.
35 return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
36}
37#endif
38
39TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
40 : accumulated_duration(0) {
41
42 ProfilingManager& manager = GetProfilingManager();
43 category_id = manager.RegisterTimingCategory(this, name);
44 if (parent != nullptr)
45 manager.SetTimingCategoryParent(category_id, parent->category_id);
46}
47
48ProfilingManager::ProfilingManager()
49 : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
50}
51
52unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
53 TimingCategoryInfo info;
54 info.category = category;
55 info.name = name;
56 info.parent = TimingCategoryInfo::NO_PARENT;
57
58 unsigned int id = (unsigned int)timing_categories.size();
59 timing_categories.push_back(std::move(info));
60
61 return id;
62}
63
64void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
65 ASSERT(category < timing_categories.size());
66 ASSERT(parent < timing_categories.size());
67
68 timing_categories[category].parent = parent;
69}
70
71void ProfilingManager::BeginFrame() {
72 this_frame_start = Clock::now();
73}
74
75void ProfilingManager::FinishFrame() {
76 Clock::time_point now = Clock::now();
77
78 results.interframe_time = now - last_frame_end;
79 results.frame_time = now - this_frame_start;
80
81 results.time_per_category.resize(timing_categories.size());
82 for (size_t i = 0; i < timing_categories.size(); ++i) {
83 results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
84 }
85
86 last_frame_end = now;
87}
88
89TimingResultsAggregator::TimingResultsAggregator(size_t window_size)
90 : max_window_size(window_size), window_size(0) {
91 interframe_times.resize(window_size, Duration::zero());
92 frame_times.resize(window_size, Duration::zero());
93}
94
95void TimingResultsAggregator::Clear() {
96 window_size = cursor = 0;
97}
98
99void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
100 size_t old_size = times_per_category.size();
101 if (n == old_size)
102 return;
103
104 times_per_category.resize(n);
105
106 for (size_t i = old_size; i < n; ++i) {
107 times_per_category[i].resize(max_window_size, Duration::zero());
108 }
109}
110
111void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
112 SetNumberOfCategories(frame_result.time_per_category.size());
113
114 interframe_times[cursor] = frame_result.interframe_time;
115 frame_times[cursor] = frame_result.frame_time;
116 for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
117 times_per_category[i][cursor] = frame_result.time_per_category[i];
118 }
119
120 ++cursor;
121 if (cursor == max_window_size)
122 cursor = 0;
123 if (window_size < max_window_size)
124 ++window_size;
125}
126
127static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) {
128 AggregatedDuration result;
129 result.avg = Duration::zero();
130
131 result.min = result.max = (len == 0 ? Duration::zero() : v[0]);
132
133 for (size_t i = 1; i < len; ++i) {
134 Duration value = v[i];
135 result.avg += value;
136 result.min = std::min(result.min, value);
137 result.max = std::max(result.max, value);
138 }
139 if (len != 0)
140 result.avg /= len;
141
142 return result;
143}
144
145static float tof(Common::Profiling::Duration dur) {
146 using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
147 return std::chrono::duration_cast<FloatMs>(dur).count();
148}
149
150AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
151 AggregatedFrameResult result;
152
153 result.interframe_time = AggregateField(interframe_times, window_size);
154 result.frame_time = AggregateField(frame_times, window_size);
155
156 if (result.interframe_time.avg != Duration::zero()) {
157 result.fps = 1000.0f / tof(result.interframe_time.avg);
158 } else {
159 result.fps = 0.0f;
160 }
161
162 result.time_per_category.resize(times_per_category.size());
163 for (size_t i = 0; i < times_per_category.size(); ++i) {
164 result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
165 }
166
167 return result;
168}
169
170ProfilingManager& GetProfilingManager() {
171 // Takes advantage of "magic" static initialization for race-free initialization.
172 static ProfilingManager manager;
173 return manager;
174}
175
176SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() {
177 static SynchronizedWrapper<TimingResultsAggregator> aggregator(30);
178 return SynchronizedRef<TimingResultsAggregator>(aggregator);
179}
180
181} // namespace Profiling
182} // namespace Common
diff --git a/src/common/profiler.h b/src/common/profiler.h
new file mode 100644
index 000000000..3e967b4bc
--- /dev/null
+++ b/src/common/profiler.h
@@ -0,0 +1,152 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9
10#include "common/assert.h"
11#include "common/thread.h"
12
13namespace Common {
14namespace Profiling {
15
16// If this is defined to 0, it turns all Timers into no-ops.
17#ifndef ENABLE_PROFILING
18#define ENABLE_PROFILING 1
19#endif
20
21#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
22// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
23// precision. We manually implement a clock based on QPC to get good results.
24
25struct QPCClock {
26 using duration = std::chrono::microseconds;
27 using time_point = std::chrono::time_point<QPCClock>;
28 using rep = duration::rep;
29 using period = duration::period;
30 static const bool is_steady = false;
31
32 static time_point now();
33};
34
35using Clock = QPCClock;
36#else
37using Clock = std::chrono::high_resolution_clock;
38#endif
39
40using Duration = Clock::duration;
41
42/**
43 * Represents a timing category that measured time can be accounted towards. Should be declared as a
44 * global variable and passed to Timers.
45 */
46class TimingCategory final {
47public:
48 TimingCategory(const char* name, TimingCategory* parent = nullptr);
49
50 unsigned int GetCategoryId() const {
51 return category_id;
52 }
53
54 /// Adds some time to this category. Can safely be called from multiple threads at the same time.
55 void AddTime(Duration amount) {
56 std::atomic_fetch_add_explicit(
57 &accumulated_duration, amount.count(),
58 std::memory_order_relaxed);
59 }
60
61 /**
62 * Atomically retrieves the accumulated measured time for this category and resets the counter
63 * to zero. Can be safely called concurrently with AddTime.
64 */
65 Duration GetAccumulatedTime() {
66 return Duration(std::atomic_exchange_explicit(
67 &accumulated_duration, (Duration::rep)0,
68 std::memory_order_relaxed));
69 }
70
71private:
72 unsigned int category_id;
73 std::atomic<Duration::rep> accumulated_duration;
74};
75
76/**
77 * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
78 * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
79 * appropriately paired.
80 *
81 * When a Timer is started, it automatically pauses a previously running timer on the same thread,
82 * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
83 * double-accounting of time on two categories.
84 */
85class Timer {
86public:
87 Timer(TimingCategory& category) : category(category) {
88 }
89
90 void Start() {
91#if ENABLE_PROFILING
92 ASSERT(!running);
93 previous_timer = current_timer;
94 current_timer = this;
95 if (previous_timer != nullptr)
96 previous_timer->StopTiming();
97
98 StartTiming();
99#endif
100 }
101
102 void Stop() {
103#if ENABLE_PROFILING
104 ASSERT(running);
105 StopTiming();
106
107 if (previous_timer != nullptr)
108 previous_timer->StartTiming();
109 current_timer = previous_timer;
110#endif
111 }
112
113private:
114#if ENABLE_PROFILING
115 void StartTiming() {
116 start = Clock::now();
117 running = true;
118 }
119
120 void StopTiming() {
121 auto duration = Clock::now() - start;
122 running = false;
123 category.AddTime(std::chrono::duration_cast<Duration>(duration));
124 }
125
126 Clock::time_point start;
127 bool running = false;
128
129 Timer* previous_timer;
130 static thread_local Timer* current_timer;
131#endif
132
133 TimingCategory& category;
134};
135
136/**
137 * A Timer that automatically starts timing when created and stops at the end of the scope. Should
138 * be used in the majority of cases.
139 */
140class ScopeTimer : public Timer {
141public:
142 ScopeTimer(TimingCategory& category) : Timer(category) {
143 Start();
144 }
145
146 ~ScopeTimer() {
147 Stop();
148 }
149};
150
151} // namespace Profiling
152} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
new file mode 100644
index 000000000..3abb73315
--- /dev/null
+++ b/src/common/profiler_reporting.h
@@ -0,0 +1,108 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <chrono>
9#include <mutex>
10#include <utility>
11#include <vector>
12
13#include "common/profiler.h"
14#include "common/synchronized_wrapper.h"
15
16namespace Common {
17namespace Profiling {
18
19struct TimingCategoryInfo {
20 static const unsigned int NO_PARENT = -1;
21
22 TimingCategory* category;
23 const char* name;
24 unsigned int parent;
25};
26
27struct ProfilingFrameResult {
28 /// Time since the last delivered frame
29 Duration interframe_time;
30
31 /// Time spent processing a frame, excluding VSync
32 Duration frame_time;
33
34 /// Total amount of time spent inside each category in this frame. Indexed by the category id
35 std::vector<Duration> time_per_category;
36};
37
38class ProfilingManager final {
39public:
40 ProfilingManager();
41
42 unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
43 void SetTimingCategoryParent(unsigned int category, unsigned int parent);
44
45 const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
46 return timing_categories;
47 }
48
49 /// This should be called after swapping screen buffers.
50 void BeginFrame();
51 /// This should be called before swapping screen buffers.
52 void FinishFrame();
53
54 /// Get the timing results from the previous frame. This is updated when you call FinishFrame().
55 const ProfilingFrameResult& GetPreviousFrameResults() const {
56 return results;
57 }
58
59private:
60 std::vector<TimingCategoryInfo> timing_categories;
61 Clock::time_point last_frame_end;
62 Clock::time_point this_frame_start;
63
64 ProfilingFrameResult results;
65};
66
67struct AggregatedDuration {
68 Duration avg, min, max;
69};
70
71struct AggregatedFrameResult {
72 /// Time since the last delivered frame
73 AggregatedDuration interframe_time;
74
75 /// Time spent processing a frame, excluding VSync
76 AggregatedDuration frame_time;
77
78 float fps;
79
80 /// Total amount of time spent inside each category in this frame. Indexed by the category id
81 std::vector<AggregatedDuration> time_per_category;
82};
83
84class TimingResultsAggregator final {
85public:
86 TimingResultsAggregator(size_t window_size);
87
88 void Clear();
89 void SetNumberOfCategories(size_t n);
90
91 void AddFrame(const ProfilingFrameResult& frame_result);
92
93 AggregatedFrameResult GetAggregatedResults() const;
94
95 size_t max_window_size;
96 size_t window_size;
97 size_t cursor;
98
99 std::vector<Duration> interframe_times;
100 std::vector<Duration> frame_times;
101 std::vector<std::vector<Duration>> times_per_category;
102};
103
104ProfilingManager& GetProfilingManager();
105SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator();
106
107} // namespace Profiling
108} // namespace Common
diff --git a/src/common/synchronized_wrapper.h b/src/common/synchronized_wrapper.h
new file mode 100644
index 000000000..946252b8c
--- /dev/null
+++ b/src/common/synchronized_wrapper.h
@@ -0,0 +1,69 @@
1// Copyright 2015 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8
9namespace Common {
10
11/**
12 * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
13 * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
14 * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
15 */
16template <typename T>
17class SynchronizedWrapper {
18public:
19 template <typename... Args>
20 SynchronizedWrapper(Args&&... args) :
21 data(std::forward<Args>(args)...) {
22 }
23
24private:
25 template <typename U>
26 friend class SynchronizedRef;
27
28 std::mutex mutex;
29 T data;
30};
31
32/**
33 * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
34 * greatly reduces the chance that someone will access the wrapped resource without locking the
35 * mutex.
36 */
37template <typename T>
38class SynchronizedRef {
39public:
40 SynchronizedRef(SynchronizedWrapper<T>& wrapper) : wrapper(&wrapper) {
41 wrapper.mutex.lock();
42 }
43
44 SynchronizedRef(SynchronizedRef&) = delete;
45 SynchronizedRef(SynchronizedRef&& o) : wrapper(o.wrapper) {
46 o.wrapper = nullptr;
47 }
48
49 ~SynchronizedRef() {
50 if (wrapper)
51 wrapper->mutex.unlock();
52 }
53
54 SynchronizedRef& operator=(SynchronizedRef&) = delete;
55 SynchronizedRef& operator=(SynchronizedRef&& o) {
56 std::swap(wrapper, o.wrapper);
57 }
58
59 T& operator*() { return wrapper->data; }
60 const T& operator*() const { return wrapper->data; }
61
62 T* operator->() { return &wrapper->data; }
63 const T* operator->() const { return &wrapper->data; }
64
65private:
66 SynchronizedWrapper<T>* wrapper;
67};
68
69} // namespace Common
diff --git a/src/common/thread.h b/src/common/thread.h
index eaf1ba00c..a45728e1e 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -24,6 +24,25 @@
24#include <unistd.h> 24#include <unistd.h>
25#endif 25#endif
26 26
27// Support for C++11's thread_local keyword was surprisingly spotty in compilers until very
28// recently. Fortunately, thread local variables have been well supported for compilers for a while,
29// but with semantics supporting only POD types, so we can use a few defines to get some amount of
30// backwards compat support.
31// WARNING: This only works correctly with POD types.
32#if defined(__clang__)
33# if !__has_feature(cxx_thread_local)
34# define thread_local __thread
35# endif
36#elif defined(__GNUC__)
37# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
38# define thread_local __thread
39# endif
40#elif defined(_MSC_VER)
41# if _MSC_VER < 1900
42# define thread_local __declspec(thread)
43# endif
44#endif
45
27namespace Common 46namespace Common
28{ 47{
29 48