summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt13
-rw-r--r--src/common/fiber.cpp226
-rw-r--r--src/common/fiber.h92
-rw-r--r--src/common/spin_lock.cpp54
-rw-r--r--src/common/spin_lock.h21
-rw-r--r--src/common/telemetry.cpp1
-rw-r--r--src/common/thread.h4
-rw-r--r--src/common/uint128.cpp26
-rw-r--r--src/common/uint128.h3
-rw-r--r--src/common/wall_clock.cpp92
-rw-r--r--src/common/wall_clock.h51
-rw-r--r--src/common/x64/cpu_detect.cpp38
-rw-r--r--src/common/x64/cpu_detect.h13
-rw-r--r--src/common/x64/native_clock.cpp95
-rw-r--r--src/common/x64/native_clock.h41
-rw-r--r--src/common/x64/xbyak_abi.h95
16 files changed, 796 insertions, 69 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c..3cc17d0e9 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
32 DEPENDS 32 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well 34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
35 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
@@ -108,6 +110,8 @@ add_library(common STATIC
108 common_types.h 110 common_types.h
109 dynamic_library.cpp 111 dynamic_library.cpp
110 dynamic_library.h 112 dynamic_library.h
113 fiber.cpp
114 fiber.h
111 file_util.cpp 115 file_util.cpp
112 file_util.h 116 file_util.h
113 hash.h 117 hash.h
@@ -141,6 +145,8 @@ add_library(common STATIC
141 scm_rev.cpp 145 scm_rev.cpp
142 scm_rev.h 146 scm_rev.h
143 scope_exit.h 147 scope_exit.h
148 spin_lock.cpp
149 spin_lock.h
144 string_util.cpp 150 string_util.cpp
145 string_util.h 151 string_util.h
146 swap.h 152 swap.h
@@ -161,6 +167,8 @@ add_library(common STATIC
161 vector_math.h 167 vector_math.h
162 virtual_buffer.cpp 168 virtual_buffer.cpp
163 virtual_buffer.h 169 virtual_buffer.h
170 wall_clock.cpp
171 wall_clock.h
164 web_result.h 172 web_result.h
165 zstd_compression.cpp 173 zstd_compression.cpp
166 zstd_compression.h 174 zstd_compression.h
@@ -171,12 +179,15 @@ if(ARCHITECTURE_x86_64)
171 PRIVATE 179 PRIVATE
172 x64/cpu_detect.cpp 180 x64/cpu_detect.cpp
173 x64/cpu_detect.h 181 x64/cpu_detect.h
182 x64/native_clock.cpp
183 x64/native_clock.h
174 x64/xbyak_abi.h 184 x64/xbyak_abi.h
175 x64/xbyak_util.h 185 x64/xbyak_util.h
176 ) 186 )
177endif() 187endif()
178 188
179create_target_directory_groups(common) 189create_target_directory_groups(common)
190find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
180 191
181target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) 192target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
182target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) 193target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
new file mode 100644
index 000000000..f97ad433b
--- /dev/null
+++ b/src/common/fiber.cpp
@@ -0,0 +1,226 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/fiber.h"
7#if defined(_WIN32) || defined(WIN32)
8#include <windows.h>
9#else
10#include <boost/context/detail/fcontext.hpp>
11#endif
12
13namespace Common {
14
15constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
16
17#if defined(_WIN32) || defined(WIN32)
18
19struct Fiber::FiberImpl {
20 LPVOID handle = nullptr;
21 LPVOID rewind_handle = nullptr;
22};
23
24void Fiber::Start() {
25 ASSERT(previous_fiber != nullptr);
26 previous_fiber->guard.unlock();
27 previous_fiber.reset();
28 entry_point(start_parameter);
29 UNREACHABLE();
30}
31
32void Fiber::OnRewind() {
33 ASSERT(impl->handle != nullptr);
34 DeleteFiber(impl->handle);
35 impl->handle = impl->rewind_handle;
36 impl->rewind_handle = nullptr;
37 rewind_point(rewind_parameter);
38 UNREACHABLE();
39}
40
41void Fiber::FiberStartFunc(void* fiber_parameter) {
42 auto fiber = static_cast<Fiber*>(fiber_parameter);
43 fiber->Start();
44}
45
46void Fiber::RewindStartFunc(void* fiber_parameter) {
47 auto fiber = static_cast<Fiber*>(fiber_parameter);
48 fiber->OnRewind();
49}
50
51Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
52 : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
53 impl = std::make_unique<FiberImpl>();
54 impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
55}
56
57Fiber::Fiber() {
58 impl = std::make_unique<FiberImpl>();
59}
60
61Fiber::~Fiber() {
62 if (released) {
63 return;
64 }
65 // Make sure the Fiber is not being used
66 const bool locked = guard.try_lock();
67 ASSERT_MSG(locked, "Destroying a fiber that's still running");
68 if (locked) {
69 guard.unlock();
70 }
71 DeleteFiber(impl->handle);
72}
73
74void Fiber::Exit() {
75 ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
76 if (!is_thread_fiber) {
77 return;
78 }
79 ConvertFiberToThread();
80 guard.unlock();
81 released = true;
82}
83
84void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
85 rewind_point = std::move(rewind_func);
86 rewind_parameter = start_parameter;
87}
88
89void Fiber::Rewind() {
90 ASSERT(rewind_point);
91 ASSERT(impl->rewind_handle == nullptr);
92 impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
93 SwitchToFiber(impl->rewind_handle);
94}
95
96void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
97 ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
98 ASSERT_MSG(to != nullptr, "Next fiber is null!");
99 to->guard.lock();
100 to->previous_fiber = from;
101 SwitchToFiber(to->impl->handle);
102 ASSERT(from->previous_fiber != nullptr);
103 from->previous_fiber->guard.unlock();
104 from->previous_fiber.reset();
105}
106
107std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
108 std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
109 fiber->guard.lock();
110 fiber->impl->handle = ConvertThreadToFiber(nullptr);
111 fiber->is_thread_fiber = true;
112 return fiber;
113}
114
115#else
116
117struct Fiber::FiberImpl {
118 alignas(64) std::array<u8, default_stack_size> stack;
119 u8* stack_limit;
120 alignas(64) std::array<u8, default_stack_size> rewind_stack;
121 u8* rewind_stack_limit;
122 boost::context::detail::fcontext_t context;
123 boost::context::detail::fcontext_t rewind_context;
124};
125
126void Fiber::Start(boost::context::detail::transfer_t& transfer) {
127 ASSERT(previous_fiber != nullptr);
128 previous_fiber->impl->context = transfer.fctx;
129 previous_fiber->guard.unlock();
130 previous_fiber.reset();
131 entry_point(start_parameter);
132 UNREACHABLE();
133}
134
135void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
136 ASSERT(impl->context != nullptr);
137 impl->context = impl->rewind_context;
138 impl->rewind_context = nullptr;
139 u8* tmp = impl->stack_limit;
140 impl->stack_limit = impl->rewind_stack_limit;
141 impl->rewind_stack_limit = tmp;
142 rewind_point(rewind_parameter);
143 UNREACHABLE();
144}
145
146void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
147 auto fiber = static_cast<Fiber*>(transfer.data);
148 fiber->Start(transfer);
149}
150
151void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
152 auto fiber = static_cast<Fiber*>(transfer.data);
153 fiber->OnRewind(transfer);
154}
155
156Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
157 : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
158 impl = std::make_unique<FiberImpl>();
159 impl->stack_limit = impl->stack.data();
160 impl->rewind_stack_limit = impl->rewind_stack.data();
161 u8* stack_base = impl->stack_limit + default_stack_size;
162 impl->context =
163 boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
164}
165
166void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
167 rewind_point = std::move(rewind_func);
168 rewind_parameter = start_parameter;
169}
170
171Fiber::Fiber() {
172 impl = std::make_unique<FiberImpl>();
173}
174
175Fiber::~Fiber() {
176 if (released) {
177 return;
178 }
179 // Make sure the Fiber is not being used
180 const bool locked = guard.try_lock();
181 ASSERT_MSG(locked, "Destroying a fiber that's still running");
182 if (locked) {
183 guard.unlock();
184 }
185}
186
187void Fiber::Exit() {
188
189 ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
190 if (!is_thread_fiber) {
191 return;
192 }
193 guard.unlock();
194 released = true;
195}
196
197void Fiber::Rewind() {
198 ASSERT(rewind_point);
199 ASSERT(impl->rewind_context == nullptr);
200 u8* stack_base = impl->rewind_stack_limit + default_stack_size;
201 impl->rewind_context =
202 boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
203 boost::context::detail::jump_fcontext(impl->rewind_context, this);
204}
205
206void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
207 ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
208 ASSERT_MSG(to != nullptr, "Next fiber is null!");
209 to->guard.lock();
210 to->previous_fiber = from;
211 auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
212 ASSERT(from->previous_fiber != nullptr);
213 from->previous_fiber->impl->context = transfer.fctx;
214 from->previous_fiber->guard.unlock();
215 from->previous_fiber.reset();
216}
217
218std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
219 std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
220 fiber->guard.lock();
221 fiber->is_thread_fiber = true;
222 return fiber;
223}
224
225#endif
226} // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
new file mode 100644
index 000000000..dafc1100e
--- /dev/null
+++ b/src/common/fiber.h
@@ -0,0 +1,92 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <memory>
9
10#include "common/common_types.h"
11#include "common/spin_lock.h"
12
13#if !defined(_WIN32) && !defined(WIN32)
14namespace boost::context::detail {
15struct transfer_t;
16}
17#endif
18
19namespace Common {
20
21/**
22 * Fiber class
23 * a fiber is a userspace thread with it's own context. They can be used to
24 * implement coroutines, emulated threading systems and certain asynchronous
25 * patterns.
26 *
27 * This class implements fibers at a low level, thus allowing greater freedom
28 * to implement such patterns. This fiber class is 'threadsafe' only one fiber
29 * can be running at a time and threads will be locked while trying to yield to
30 * a running fiber until it yields. WARNING exchanging two running fibers between
31 * threads will cause a deadlock. In order to prevent a deadlock, each thread should
32 * have an intermediary fiber, you switch to the intermediary fiber of the current
33 * thread and then from it switch to the expected fiber. This way you can exchange
34 * 2 fibers within 2 different threads.
35 */
36class Fiber {
37public:
38 Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
39 ~Fiber();
40
41 Fiber(const Fiber&) = delete;
42 Fiber& operator=(const Fiber&) = delete;
43
44 Fiber(Fiber&&) = default;
45 Fiber& operator=(Fiber&&) = default;
46
47 /// Yields control from Fiber 'from' to Fiber 'to'
48 /// Fiber 'from' must be the currently running fiber.
49 static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
50 static std::shared_ptr<Fiber> ThreadToFiber();
51
52 void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
53
54 void Rewind();
55
56 /// Only call from main thread's fiber
57 void Exit();
58
59 /// Changes the start parameter of the fiber. Has no effect if the fiber already started
60 void SetStartParameter(void* new_parameter) {
61 start_parameter = new_parameter;
62 }
63
64private:
65 Fiber();
66
67#if defined(_WIN32) || defined(WIN32)
68 void OnRewind();
69 void Start();
70 static void FiberStartFunc(void* fiber_parameter);
71 static void RewindStartFunc(void* fiber_parameter);
72#else
73 void OnRewind(boost::context::detail::transfer_t& transfer);
74 void Start(boost::context::detail::transfer_t& transfer);
75 static void FiberStartFunc(boost::context::detail::transfer_t transfer);
76 static void RewindStartFunc(boost::context::detail::transfer_t transfer);
77#endif
78
79 struct FiberImpl;
80
81 SpinLock guard{};
82 std::function<void(void*)> entry_point;
83 std::function<void(void*)> rewind_point;
84 void* rewind_parameter{};
85 void* start_parameter{};
86 std::shared_ptr<Fiber> previous_fiber;
87 std::unique_ptr<FiberImpl> impl;
88 bool is_thread_fiber{};
89 bool released{};
90};
91
92} // namespace Common
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
new file mode 100644
index 000000000..c7b46aac6
--- /dev/null
+++ b/src/common/spin_lock.cpp
@@ -0,0 +1,54 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/spin_lock.h"
6
7#if _MSC_VER
8#include <intrin.h>
9#if _M_AMD64
10#define __x86_64__ 1
11#endif
12#if _M_ARM64
13#define __aarch64__ 1
14#endif
15#else
16#if __x86_64__
17#include <xmmintrin.h>
18#endif
19#endif
20
21namespace {
22
23void thread_pause() {
24#if __x86_64__
25 _mm_pause();
26#elif __aarch64__ && _MSC_VER
27 __yield();
28#elif __aarch64__
29 asm("yield");
30#endif
31}
32
33} // namespace
34
35namespace Common {
36
37void SpinLock::lock() {
38 while (lck.test_and_set(std::memory_order_acquire)) {
39 thread_pause();
40 }
41}
42
43void SpinLock::unlock() {
44 lck.clear(std::memory_order_release);
45}
46
47bool SpinLock::try_lock() {
48 if (lck.test_and_set(std::memory_order_acquire)) {
49 return false;
50 }
51 return true;
52}
53
54} // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
new file mode 100644
index 000000000..70282a961
--- /dev/null
+++ b/src/common/spin_lock.h
@@ -0,0 +1,21 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8
9namespace Common {
10
11class SpinLock {
12public:
13 void lock();
14 void unlock();
15 bool try_lock();
16
17private:
18 std::atomic_flag lck = ATOMIC_FLAG_INIT;
19};
20
21} // namespace Common
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 200c6489a..16d42facd 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) {
60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); 60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); 61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); 62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); 64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); 65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); 66 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
diff --git a/src/common/thread.h b/src/common/thread.h
index 2fc071685..127cc7e23 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <mutex> 10#include <mutex>
11#include <thread> 11#include <thread>
12#include "common/common_types.h"
12 13
13namespace Common { 14namespace Common {
14 15
@@ -28,8 +29,7 @@ public:
28 is_set = false; 29 is_set = false;
29 } 30 }
30 31
31 template <class Duration> 32 bool WaitFor(const std::chrono::nanoseconds& time) {
32 bool WaitFor(const std::chrono::duration<Duration>& time) {
33 std::unique_lock lk{mutex}; 33 std::unique_lock lk{mutex};
34 if (!condvar.wait_for(lk, time, [this] { return is_set; })) 34 if (!condvar.wait_for(lk, time, [this] { return is_set; }))
35 return false; 35 return false;
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 32bf56730..16bf7c828 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -6,12 +6,38 @@
6#include <intrin.h> 6#include <intrin.h>
7 7
8#pragma intrinsic(_umul128) 8#pragma intrinsic(_umul128)
9#pragma intrinsic(_udiv128)
9#endif 10#endif
10#include <cstring> 11#include <cstring>
11#include "common/uint128.h" 12#include "common/uint128.h"
12 13
13namespace Common { 14namespace Common {
14 15
16#ifdef _MSC_VER
17
18u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
19 u128 r{};
20 r[0] = _umul128(a, b, &r[1]);
21 u64 remainder;
22#if _MSC_VER < 1923
23 return udiv128(r[1], r[0], d, &remainder);
24#else
25 return _udiv128(r[1], r[0], d, &remainder);
26#endif
27}
28
29#else
30
31u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
32 const u64 diva = a / d;
33 const u64 moda = a % d;
34 const u64 divb = b / d;
35 const u64 modb = b % d;
36 return diva * b + moda * divb + moda * modb / d;
37}
38
39#endif
40
15u128 Multiply64Into128(u64 a, u64 b) { 41u128 Multiply64Into128(u64 a, u64 b) {
16 u128 result; 42 u128 result;
17#ifdef _MSC_VER 43#ifdef _MSC_VER
diff --git a/src/common/uint128.h b/src/common/uint128.h
index a3be2a2cb..503cd2d0c 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -9,6 +9,9 @@
9 9
10namespace Common { 10namespace Common {
11 11
12// This function multiplies 2 u64 values and divides it by a u64 value.
13u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
14
12// This function multiplies 2 u64 values and produces a u128 value; 15// This function multiplies 2 u64 values and produces a u128 value;
13u128 Multiply64Into128(u64 a, u64 b); 16u128 Multiply64Into128(u64 a, u64 b);
14 17
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
new file mode 100644
index 000000000..d4d35f4e7
--- /dev/null
+++ b/src/common/wall_clock.cpp
@@ -0,0 +1,92 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/uint128.h"
6#include "common/wall_clock.h"
7
8#ifdef ARCHITECTURE_x86_64
9#include "common/x64/cpu_detect.h"
10#include "common/x64/native_clock.h"
11#endif
12
13namespace Common {
14
15using base_timer = std::chrono::steady_clock;
16using base_time_point = std::chrono::time_point<base_timer>;
17
18class StandardWallClock : public WallClock {
19public:
20 StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
21 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
22 start_time = base_timer::now();
23 }
24
25 std::chrono::nanoseconds GetTimeNS() override {
26 base_time_point current = base_timer::now();
27 auto elapsed = current - start_time;
28 return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
29 }
30
31 std::chrono::microseconds GetTimeUS() override {
32 base_time_point current = base_timer::now();
33 auto elapsed = current - start_time;
34 return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
35 }
36
37 std::chrono::milliseconds GetTimeMS() override {
38 base_time_point current = base_timer::now();
39 auto elapsed = current - start_time;
40 return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
41 }
42
43 u64 GetClockCycles() override {
44 std::chrono::nanoseconds time_now = GetTimeNS();
45 const u128 temporary =
46 Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
47 return Common::Divide128On32(temporary, 1000000000).first;
48 }
49
50 u64 GetCPUCycles() override {
51 std::chrono::nanoseconds time_now = GetTimeNS();
52 const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
53 return Common::Divide128On32(temporary, 1000000000).first;
54 }
55
56private:
57 base_time_point start_time;
58};
59
60#ifdef ARCHITECTURE_x86_64
61
62std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
63 u32 emulated_clock_frequency) {
64 const auto& caps = GetCPUCaps();
65 u64 rtsc_frequency = 0;
66 if (caps.invariant_tsc) {
67 if (caps.base_frequency != 0) {
68 rtsc_frequency = static_cast<u64>(caps.base_frequency) * 1000000U;
69 }
70 if (rtsc_frequency == 0) {
71 rtsc_frequency = EstimateRDTSCFrequency();
72 }
73 }
74 if (rtsc_frequency == 0) {
75 return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
76 emulated_clock_frequency);
77 } else {
78 return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
79 rtsc_frequency);
80 }
81}
82
83#else
84
85std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
86 u32 emulated_clock_frequency) {
87 return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
88}
89
90#endif
91
92} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
new file mode 100644
index 000000000..ed284cf50
--- /dev/null
+++ b/src/common/wall_clock.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <memory>
9
10#include "common/common_types.h"
11
12namespace Common {
13
14class WallClock {
15public:
16 /// Returns current wall time in nanoseconds
17 virtual std::chrono::nanoseconds GetTimeNS() = 0;
18
19 /// Returns current wall time in microseconds
20 virtual std::chrono::microseconds GetTimeUS() = 0;
21
22 /// Returns current wall time in milliseconds
23 virtual std::chrono::milliseconds GetTimeMS() = 0;
24
25 /// Returns current wall time in emulated clock cycles
26 virtual u64 GetClockCycles() = 0;
27
28 /// Returns current wall time in emulated cpu cycles
29 virtual u64 GetCPUCycles() = 0;
30
31 /// Tells if the wall clock, uses the host CPU's hardware clock
32 bool IsNative() const {
33 return is_native;
34 }
35
36protected:
37 WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
38 : emulated_cpu_frequency{emulated_cpu_frequency},
39 emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
40
41 u64 emulated_cpu_frequency;
42 u64 emulated_clock_frequency;
43
44private:
45 bool is_native;
46};
47
48std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
49 u32 emulated_clock_frequency);
50
51} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..fccd2eee5 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); 62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); 63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); 64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
65 if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
66 caps.manufacturer = Manufacturer::Intel;
67 else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
68 caps.manufacturer = Manufacturer::AMD;
69 else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
70 caps.manufacturer = Manufacturer::Hygon;
71 else
72 caps.manufacturer = Manufacturer::Unknown;
73
74 u32 family = {};
75 u32 model = {};
65 76
66 __cpuid(cpu_id, 0x80000000); 77 __cpuid(cpu_id, 0x80000000);
67 78
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
73 // Detect family and other miscellaneous features 84 // Detect family and other miscellaneous features
74 if (max_std_fn >= 1) { 85 if (max_std_fn >= 1) {
75 __cpuid(cpu_id, 0x00000001); 86 __cpuid(cpu_id, 0x00000001);
87 family = (cpu_id[0] >> 8) & 0xf;
88 model = (cpu_id[0] >> 4) & 0xf;
89 if (family == 0xf) {
90 family += (cpu_id[0] >> 20) & 0xff;
91 }
92 if (family >= 6) {
93 model += ((cpu_id[0] >> 16) & 0xf) << 4;
94 }
76 95
77 if ((cpu_id[3] >> 25) & 1) 96 if ((cpu_id[3] >> 25) & 1)
78 caps.sse = true; 97 caps.sse = true;
@@ -110,6 +129,11 @@ static CPUCaps Detect() {
110 caps.bmi1 = true; 129 caps.bmi1 = true;
111 if ((cpu_id[1] >> 8) & 1) 130 if ((cpu_id[1] >> 8) & 1)
112 caps.bmi2 = true; 131 caps.bmi2 = true;
132 // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
133 if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
134 (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
135 caps.avx512 = caps.avx2;
136 }
113 } 137 }
114 } 138 }
115 139
@@ -130,6 +154,20 @@ static CPUCaps Detect() {
130 caps.fma4 = true; 154 caps.fma4 = true;
131 } 155 }
132 156
157 if (max_ex_fn >= 0x80000007) {
158 __cpuid(cpu_id, 0x80000007);
159 if (cpu_id[3] & (1 << 8)) {
160 caps.invariant_tsc = true;
161 }
162 }
163
164 if (max_std_fn >= 0x16) {
165 __cpuid(cpu_id, 0x16);
166 caps.base_frequency = cpu_id[0];
167 caps.max_frequency = cpu_id[1];
168 caps.bus_frequency = cpu_id[2];
169 }
170
133 return caps; 171 return caps;
134} 172}
135 173
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..e3b63302e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
6 6
7namespace Common { 7namespace Common {
8 8
9enum class Manufacturer : u32 {
10 Intel = 0,
11 AMD = 1,
12 Hygon = 2,
13 Unknown = 3,
14};
15
9/// x86/x64 CPU capabilities that may be detected by this module 16/// x86/x64 CPU capabilities that may be detected by this module
10struct CPUCaps { 17struct CPUCaps {
18 Manufacturer manufacturer;
11 char cpu_string[0x21]; 19 char cpu_string[0x21];
12 char brand_string[0x41]; 20 char brand_string[0x41];
13 bool sse; 21 bool sse;
@@ -19,11 +27,16 @@ struct CPUCaps {
19 bool lzcnt; 27 bool lzcnt;
20 bool avx; 28 bool avx;
21 bool avx2; 29 bool avx2;
30 bool avx512;
22 bool bmi1; 31 bool bmi1;
23 bool bmi2; 32 bool bmi2;
24 bool fma; 33 bool fma;
25 bool fma4; 34 bool fma4;
26 bool aes; 35 bool aes;
36 bool invariant_tsc;
37 u32 base_frequency;
38 u32 max_frequency;
39 u32 bus_frequency;
27}; 40};
28 41
29/** 42/**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..26d4d0ba6
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <thread>
7
8#ifdef _MSC_VER
9#include <intrin.h>
10#else
11#include <x86intrin.h>
12#endif
13
14#include "common/uint128.h"
15#include "common/x64/native_clock.h"
16
17namespace Common {
18
19u64 EstimateRDTSCFrequency() {
20 const auto milli_10 = std::chrono::milliseconds{10};
21 // get current time
22 _mm_mfence();
23 const u64 tscStart = __rdtsc();
24 const auto startTime = std::chrono::high_resolution_clock::now();
25 // wait roughly 3 seconds
26 while (true) {
27 auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
28 std::chrono::high_resolution_clock::now() - startTime);
29 if (milli.count() >= 3000)
30 break;
31 std::this_thread::sleep_for(milli_10);
32 }
33 const auto endTime = std::chrono::high_resolution_clock::now();
34 _mm_mfence();
35 const u64 tscEnd = __rdtsc();
36 // calculate difference
37 const u64 timer_diff =
38 std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
39 const u64 tsc_diff = tscEnd - tscStart;
40 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
41 return tsc_freq;
42}
43
44namespace X64 {
45NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
46 u64 rtsc_frequency)
47 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
48 rtsc_frequency} {
49 _mm_mfence();
50 last_measure = __rdtsc();
51 accumulated_ticks = 0U;
52}
53
54u64 NativeClock::GetRTSC() {
55 rtsc_serialize.lock();
56 _mm_mfence();
57 const u64 current_measure = __rdtsc();
58 u64 diff = current_measure - last_measure;
59 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
60 if (current_measure > last_measure) {
61 last_measure = current_measure;
62 }
63 accumulated_ticks += diff;
64 rtsc_serialize.unlock();
65 return accumulated_ticks;
66}
67
68std::chrono::nanoseconds NativeClock::GetTimeNS() {
69 const u64 rtsc_value = GetRTSC();
70 return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
71}
72
73std::chrono::microseconds NativeClock::GetTimeUS() {
74 const u64 rtsc_value = GetRTSC();
75 return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
76}
77
78std::chrono::milliseconds NativeClock::GetTimeMS() {
79 const u64 rtsc_value = GetRTSC();
80 return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
81}
82
83u64 NativeClock::GetClockCycles() {
84 const u64 rtsc_value = GetRTSC();
85 return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
86}
87
88u64 NativeClock::GetCPUCycles() {
89 const u64 rtsc_value = GetRTSC();
90 return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
91}
92
93} // namespace X64
94
95} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..b58cf9f5a
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/spin_lock.h"
10#include "common/wall_clock.h"
11
12namespace Common {
13
14namespace X64 {
15class NativeClock : public WallClock {
16public:
17 NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
18
19 std::chrono::nanoseconds GetTimeNS() override;
20
21 std::chrono::microseconds GetTimeUS() override;
22
23 std::chrono::milliseconds GetTimeMS() override;
24
25 u64 GetClockCycles() override;
26
27 u64 GetCPUCycles() override;
28
29private:
30 u64 GetRTSC();
31
32 SpinLock rtsc_serialize{};
33 u64 last_measure{};
34 u64 accumulated_ticks{};
35 u64 rtsc_frequency;
36};
37} // namespace X64
38
39u64 EstimateRDTSCFrequency();
40
41} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 794da8a52..a5f5d4fc1 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@
11 11
12namespace Common::X64 { 12namespace Common::X64 {
13 13
14inline int RegToIndex(const Xbyak::Reg& reg) { 14inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind; 15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, 16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers."); 17 "RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); 19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20} 20}
21 21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) { 22inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
23 ASSERT(reg_index < 16); 23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index); 24 return Xbyak::Reg64(static_cast<int>(reg_index));
25} 25}
26 26
27inline Xbyak::Xmm IndexToXmm(int reg_index) { 27inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32); 28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16); 29 return Xbyak::Xmm(static_cast<int>(reg_index - 16));
30} 30}
31 31
32inline Xbyak::Reg IndexToReg(int reg_index) { 32inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
33 if (reg_index < 16) { 33 if (reg_index < 16) {
34 return IndexToReg64(reg_index); 34 return IndexToReg64(reg_index);
35 } else { 35 } else {
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
151 151
152#endif 152#endif
153 153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, 154struct ABIFrameInfo {
155 size_t needed_frame_size, s32* out_subtraction, 155 s32 subtraction;
156 s32* out_xmm_offset) { 156 s32 xmm_offset;
157};
158
159inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
160 size_t needed_frame_size) {
157 const auto count = (regs & ABI_ALL_GPRS).count(); 161 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8; 162 rsp_alignment -= count * 8;
159 size_t subtraction = 0; 163 size_t subtraction = 0;
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
170 rsp_alignment -= subtraction; 174 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF; 175 subtraction += rsp_alignment & 0xF;
172 176
173 *out_subtraction = (s32)subtraction; 177 return ABIFrameInfo{static_cast<s32>(subtraction),
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); 178 static_cast<s32>(subtraction - xmm_base_subtraction)};
175} 179}
176 180
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 181inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) { 182 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset; 183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); 184
181 for (std::size_t i = 0; i < regs.size(); ++i) { 185 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) { 186 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i))); 187 code.push(IndexToReg64(i));
184 } 188 }
185 } 189 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189 190
190 for (int i = 0; i < regs.count(); i++) { 191 if (frame_info.subtraction != 0) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) { 192 code.sub(code.rsp, frame_info.subtraction);
192 code.push(IndexToReg64(i));
193 }
194 } 193 }
195 194
196 for (std::size_t i = 0; i < regs.size(); ++i) { 195 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) { 196 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); 197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
199 xmm_offset += 0x10; 198 frame_info.xmm_offset += 0x10;
200 } 199 }
201 } 200 }
202 201
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
205 204
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 205inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) { 206 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset; 207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210 208
211 for (std::size_t i = 0; i < regs.size(); ++i) { 209 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) { 210 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); 211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
214 xmm_offset += 0x10; 212 frame_info.xmm_offset += 0x10;
215 } 213 }
216 } 214 }
217 215
218 if (subtraction != 0) { 216 if (frame_info.subtraction != 0) {
219 code.add(code.rsp, subtraction); 217 code.add(code.rsp, frame_info.subtraction);
220 } 218 }
221 219
222 // GPRs need to be popped in reverse order 220 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) { 221 for (std::size_t j = 0; j < regs.size(); ++j) {
224 if (regs[i]) { 222 const std::size_t i = regs.size() - j - 1;
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) { 223 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i)); 224 code.pop(IndexToReg64(i));
262 } 225 }
263 } 226 }