summaryrefslogtreecommitdiff
path: root/src/common
diff options
context:
space:
mode:
Diffstat (limited to 'src/common')
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/atomic_helpers.h772
-rw-r--r--src/common/bit_field.h9
-rw-r--r--src/common/common_funcs.h10
-rw-r--r--src/common/fixed_point.h726
-rw-r--r--src/common/input.h29
-rw-r--r--src/common/logging/filter.cpp2
-rw-r--r--src/common/logging/types.h2
-rw-r--r--src/common/reader_writer_queue.h941
-rw-r--r--src/common/settings.cpp4
-rw-r--r--src/common/settings.h74
-rw-r--r--src/common/wall_clock.cpp2
-rw-r--r--src/common/x64/cpu_detect.cpp16
-rw-r--r--src/common/x64/cpu_detect.h5
14 files changed, 2552 insertions, 48 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 73bf626d4..d574e4b79 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -43,6 +43,7 @@ add_library(common STATIC
43 alignment.h 43 alignment.h
44 assert.cpp 44 assert.cpp
45 assert.h 45 assert.h
46 atomic_helpers.h
46 atomic_ops.h 47 atomic_ops.h
47 detached_tasks.cpp 48 detached_tasks.cpp
48 detached_tasks.h 49 detached_tasks.h
@@ -64,6 +65,7 @@ add_library(common STATIC
64 expected.h 65 expected.h
65 fiber.cpp 66 fiber.cpp
66 fiber.h 67 fiber.h
68 fixed_point.h
67 fs/file.cpp 69 fs/file.cpp
68 fs/file.h 70 fs/file.h
69 fs/fs.cpp 71 fs/fs.cpp
@@ -109,6 +111,7 @@ add_library(common STATIC
109 parent_of_member.h 111 parent_of_member.h
110 point.h 112 point.h
111 quaternion.h 113 quaternion.h
114 reader_writer_queue.h
112 ring_buffer.h 115 ring_buffer.h
113 scm_rev.cpp 116 scm_rev.cpp
114 scm_rev.h 117 scm_rev.h
@@ -182,8 +185,9 @@ create_target_directory_groups(common)
182 185
183target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile Threads::Threads) 186target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile Threads::Threads)
184target_link_libraries(common PRIVATE lz4::lz4 xbyak) 187target_link_libraries(common PRIVATE lz4::lz4 xbyak)
185if (MSVC) 188if (TARGET zstd::zstd)
186 target_link_libraries(common PRIVATE zstd::zstd) 189 target_link_libraries(common PRIVATE zstd::zstd)
187else() 190else()
188 target_link_libraries(common PRIVATE zstd) 191 target_link_libraries(common PRIVATE
192 $<IF:$<TARGET_EXISTS:zstd::libzstd_shared>,zstd::libzstd_shared,zstd::libzstd_static>)
189endif() 193endif()
diff --git a/src/common/atomic_helpers.h b/src/common/atomic_helpers.h
new file mode 100644
index 000000000..6d912b52e
--- /dev/null
+++ b/src/common/atomic_helpers.h
@@ -0,0 +1,772 @@
1// ©2013-2016 Cameron Desrochers.
2// Distributed under the simplified BSD license (see the license file that
3// should have come with this header).
4// Uses Jeff Preshing's semaphore implementation (under the terms of its
5// separate zlib license, embedded below).
6
7#pragma once
8
9// Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant)
10// implementation of low-level memory barriers, plus a few semi-portable utility macros (for
11// inlining and alignment). Also has a basic atomic type (limited to hardware-supported atomics with
12// no memory ordering guarantees). Uses the AE_* prefix for macros (historical reasons), and the
13// "moodycamel" namespace for symbols.
14
15#include <cassert>
16#include <cerrno>
17#include <cstdint>
18#include <ctime>
19#include <type_traits>
20
21// Platform detection
22#if defined(__INTEL_COMPILER)
23#define AE_ICC
24#elif defined(_MSC_VER)
25#define AE_VCPP
26#elif defined(__GNUC__)
27#define AE_GCC
28#endif
29
30#if defined(_M_IA64) || defined(__ia64__)
31#define AE_ARCH_IA64
32#elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__)
33#define AE_ARCH_X64
34#elif defined(_M_IX86) || defined(__i386__)
35#define AE_ARCH_X86
36#elif defined(_M_PPC) || defined(__powerpc__)
37#define AE_ARCH_PPC
38#else
39#define AE_ARCH_UNKNOWN
40#endif
41
42// AE_UNUSED
43#define AE_UNUSED(x) ((void)x)
44
45// AE_NO_TSAN/AE_TSAN_ANNOTATE_*
46#if defined(__has_feature)
47#if __has_feature(thread_sanitizer)
48#if __cplusplus >= 201703L // inline variables require C++17
49namespace Common {
50inline int ae_tsan_global;
51}
52#define AE_TSAN_ANNOTATE_RELEASE() \
53 AnnotateHappensBefore(__FILE__, __LINE__, (void*)(&::moodycamel::ae_tsan_global))
54#define AE_TSAN_ANNOTATE_ACQUIRE() \
55 AnnotateHappensAfter(__FILE__, __LINE__, (void*)(&::moodycamel::ae_tsan_global))
56extern "C" void AnnotateHappensBefore(const char*, int, void*);
57extern "C" void AnnotateHappensAfter(const char*, int, void*);
58#else // when we can't work with tsan, attempt to disable its warnings
59#define AE_NO_TSAN __attribute__((no_sanitize("thread")))
60#endif
61#endif
62#endif
63#ifndef AE_NO_TSAN
64#define AE_NO_TSAN
65#endif
66#ifndef AE_TSAN_ANNOTATE_RELEASE
67#define AE_TSAN_ANNOTATE_RELEASE()
68#define AE_TSAN_ANNOTATE_ACQUIRE()
69#endif
70
71// AE_FORCEINLINE
72#if defined(AE_VCPP) || defined(AE_ICC)
73#define AE_FORCEINLINE __forceinline
74#elif defined(AE_GCC)
75//#define AE_FORCEINLINE __attribute__((always_inline))
76#define AE_FORCEINLINE inline
77#else
78#define AE_FORCEINLINE inline
79#endif
80
81// AE_ALIGN
82#if defined(AE_VCPP) || defined(AE_ICC)
83#define AE_ALIGN(x) __declspec(align(x))
84#elif defined(AE_GCC)
85#define AE_ALIGN(x) __attribute__((aligned(x)))
86#else
87// Assume GCC compliant syntax...
88#define AE_ALIGN(x) __attribute__((aligned(x)))
89#endif
90
91// Portable atomic fences implemented below:
92
93namespace Common {
94
95enum memory_order {
96 memory_order_relaxed,
97 memory_order_acquire,
98 memory_order_release,
99 memory_order_acq_rel,
100 memory_order_seq_cst,
101
102 // memory_order_sync: Forces a full sync:
103 // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
104 memory_order_sync = memory_order_seq_cst
105};
106
107} // namespace Common
108
109#if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || \
110 (defined(AE_ICC) && __INTEL_COMPILER < 1600)
111// VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
112
113#include <intrin.h>
114
115#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
116#define AeFullSync _mm_mfence
117#define AeLiteSync _mm_mfence
118#elif defined(AE_ARCH_IA64)
119#define AeFullSync __mf
120#define AeLiteSync __mf
121#elif defined(AE_ARCH_PPC)
122#include <ppcintrinsics.h>
123#define AeFullSync __sync
124#define AeLiteSync __lwsync
125#endif
126
127#ifdef AE_VCPP
128#pragma warning(push)
129#pragma warning(disable : 4365) // Disable erroneous 'conversion from long to unsigned int,
130 // signed/unsigned mismatch' error when using `assert`
131#ifdef __cplusplus_cli
132#pragma managed(push, off)
133#endif
134#endif
135
136namespace Common {
137
138AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN {
139 switch (order) {
140 case memory_order_relaxed:
141 break;
142 case memory_order_acquire:
143 _ReadBarrier();
144 break;
145 case memory_order_release:
146 _WriteBarrier();
147 break;
148 case memory_order_acq_rel:
149 _ReadWriteBarrier();
150 break;
151 case memory_order_seq_cst:
152 _ReadWriteBarrier();
153 break;
154 default:
155 assert(false);
156 }
157}
158
159// x86/x64 have a strong memory model -- all loads and stores have
160// acquire and release semantics automatically (so only need compiler
161// barriers for those).
162#if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
163AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN {
164 switch (order) {
165 case memory_order_relaxed:
166 break;
167 case memory_order_acquire:
168 _ReadBarrier();
169 break;
170 case memory_order_release:
171 _WriteBarrier();
172 break;
173 case memory_order_acq_rel:
174 _ReadWriteBarrier();
175 break;
176 case memory_order_seq_cst:
177 _ReadWriteBarrier();
178 AeFullSync();
179 _ReadWriteBarrier();
180 break;
181 default:
182 assert(false);
183 }
184}
185#else
186AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN {
187 // Non-specialized arch, use heavier memory barriers everywhere just in case :-(
188 switch (order) {
189 case memory_order_relaxed:
190 break;
191 case memory_order_acquire:
192 _ReadBarrier();
193 AeLiteSync();
194 _ReadBarrier();
195 break;
196 case memory_order_release:
197 _WriteBarrier();
198 AeLiteSync();
199 _WriteBarrier();
200 break;
201 case memory_order_acq_rel:
202 _ReadWriteBarrier();
203 AeLiteSync();
204 _ReadWriteBarrier();
205 break;
206 case memory_order_seq_cst:
207 _ReadWriteBarrier();
208 AeFullSync();
209 _ReadWriteBarrier();
210 break;
211 default:
212 assert(false);
213 }
214}
215#endif
216} // namespace Common
217#else
218// Use standard library of atomics
219#include <atomic>
220
221namespace Common {
222
223AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN {
224 switch (order) {
225 case memory_order_relaxed:
226 break;
227 case memory_order_acquire:
228 std::atomic_signal_fence(std::memory_order_acquire);
229 break;
230 case memory_order_release:
231 std::atomic_signal_fence(std::memory_order_release);
232 break;
233 case memory_order_acq_rel:
234 std::atomic_signal_fence(std::memory_order_acq_rel);
235 break;
236 case memory_order_seq_cst:
237 std::atomic_signal_fence(std::memory_order_seq_cst);
238 break;
239 default:
240 assert(false);
241 }
242}
243
244AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN {
245 switch (order) {
246 case memory_order_relaxed:
247 break;
248 case memory_order_acquire:
249 AE_TSAN_ANNOTATE_ACQUIRE();
250 std::atomic_thread_fence(std::memory_order_acquire);
251 break;
252 case memory_order_release:
253 AE_TSAN_ANNOTATE_RELEASE();
254 std::atomic_thread_fence(std::memory_order_release);
255 break;
256 case memory_order_acq_rel:
257 AE_TSAN_ANNOTATE_ACQUIRE();
258 AE_TSAN_ANNOTATE_RELEASE();
259 std::atomic_thread_fence(std::memory_order_acq_rel);
260 break;
261 case memory_order_seq_cst:
262 AE_TSAN_ANNOTATE_ACQUIRE();
263 AE_TSAN_ANNOTATE_RELEASE();
264 std::atomic_thread_fence(std::memory_order_seq_cst);
265 break;
266 default:
267 assert(false);
268 }
269}
270
271} // namespace Common
272
273#endif
274
275#if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
276#define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
277#endif
278
279#ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
280#include <atomic>
281#endif
282#include <utility>
283
284// WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
285// Provides basic support for atomic variables -- no memory ordering guarantees are provided.
286// The guarantee of atomicity is only made for types that already have atomic load and store
287// guarantees at the hardware level -- on most platforms this generally means aligned pointers and
288// integers (only).
289namespace Common {
290template <typename T>
291class weak_atomic {
292public:
293 AE_NO_TSAN weak_atomic() : value() {}
294#ifdef AE_VCPP
295#pragma warning(push)
296#pragma warning(disable : 4100) // Get rid of (erroneous) 'unreferenced formal parameter' warning
297#endif
298 template <typename U>
299 AE_NO_TSAN weak_atomic(U&& x) : value(std::forward<U>(x)) {}
300#ifdef __cplusplus_cli
301 // Work around bug with universal reference/nullptr combination that only appears when /clr is
302 // on
303 AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) {}
304#endif
305 AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) {}
306 AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) {}
307#ifdef AE_VCPP
308#pragma warning(pop)
309#endif
310
311 AE_FORCEINLINE operator T() const AE_NO_TSAN {
312 return load();
313 }
314
315#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
316 template <typename U>
317 AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN {
318 value = std::forward<U>(x);
319 return *this;
320 }
321 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN {
322 value = other.value;
323 return *this;
324 }
325
326 AE_FORCEINLINE T load() const AE_NO_TSAN {
327 return value;
328 }
329
330 AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN {
331#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
332 if (sizeof(T) == 4)
333 return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
334#if defined(_M_AMD64)
335 else if (sizeof(T) == 8)
336 return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
337#endif
338#else
339#error Unsupported platform
340#endif
341 assert(false && "T must be either a 32 or 64 bit type");
342 return value;
343 }
344
345 AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN {
346#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
347 if (sizeof(T) == 4)
348 return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
349#if defined(_M_AMD64)
350 else if (sizeof(T) == 8)
351 return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
352#endif
353#else
354#error Unsupported platform
355#endif
356 assert(false && "T must be either a 32 or 64 bit type");
357 return value;
358 }
359#else
360 template <typename U>
361 AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN {
362 value.store(std::forward<U>(x), std::memory_order_relaxed);
363 return *this;
364 }
365
366 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN {
367 value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
368 return *this;
369 }
370
371 AE_FORCEINLINE T load() const AE_NO_TSAN {
372 return value.load(std::memory_order_relaxed);
373 }
374
375 AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN {
376 return value.fetch_add(increment, std::memory_order_acquire);
377 }
378
379 AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN {
380 return value.fetch_add(increment, std::memory_order_release);
381 }
382#endif
383
384private:
385#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
386 // No std::atomic support, but still need to circumvent compiler optimizations.
387 // `volatile` will make memory access slow, but is guaranteed to be reliable.
388 volatile T value;
389#else
390 std::atomic<T> value;
391#endif
392};
393
394} // namespace Common
395
396// Portable single-producer, single-consumer semaphore below:
397
398#if defined(_WIN32)
399// Avoid including windows.h in a header; we only need a handful of
400// items, so we'll redeclare them here (this is relatively safe since
401// the API generally has to remain stable between Windows versions).
402// I know this is an ugly hack but it still beats polluting the global
403// namespace with thousands of generic names or adding a .cpp for nothing.
404extern "C" {
405struct _SECURITY_ATTRIBUTES;
406__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes,
407 long lInitialCount, long lMaximumCount,
408 const wchar_t* lpName);
409__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
410__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle,
411 unsigned long dwMilliseconds);
412__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount,
413 long* lpPreviousCount);
414}
415#elif defined(__MACH__)
416#include <mach/mach.h>
417#elif defined(__unix__)
418#include <semaphore.h>
419#elif defined(FREERTOS)
420#include <FreeRTOS.h>
421#include <semphr.h>
422#include <task.h>
423#endif
424
425namespace Common {
426// Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
427// portable + lightweight semaphore implementations, originally from
428// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
429// LICENSE:
430// Copyright (c) 2015 Jeff Preshing
431//
432// This software is provided 'as-is', without any express or implied
433// warranty. In no event will the authors be held liable for any damages
434// arising from the use of this software.
435//
436// Permission is granted to anyone to use this software for any purpose,
437// including commercial applications, and to alter it and redistribute it
438// freely, subject to the following restrictions:
439//
440// 1. The origin of this software must not be misrepresented; you must not
441// claim that you wrote the original software. If you use this software
442// in a product, an acknowledgement in the product documentation would be
443// appreciated but is not required.
444// 2. Altered source versions must be plainly marked as such, and must not be
445// misrepresented as being the original software.
446// 3. This notice may not be removed or altered from any source distribution.
447namespace spsc_sema {
448#if defined(_WIN32)
449class Semaphore {
450private:
451 void* m_hSema;
452
453 Semaphore(const Semaphore& other);
454 Semaphore& operator=(const Semaphore& other);
455
456public:
457 AE_NO_TSAN Semaphore(int initialCount = 0) : m_hSema() {
458 assert(initialCount >= 0);
459 const long maxLong = 0x7fffffff;
460 m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
461 assert(m_hSema);
462 }
463
464 AE_NO_TSAN ~Semaphore() {
465 CloseHandle(m_hSema);
466 }
467
468 bool wait() AE_NO_TSAN {
469 const unsigned long infinite = 0xffffffff;
470 return WaitForSingleObject(m_hSema, infinite) == 0;
471 }
472
473 bool try_wait() AE_NO_TSAN {
474 return WaitForSingleObject(m_hSema, 0) == 0;
475 }
476
477 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN {
478 return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
479 }
480
481 void signal(int count = 1) AE_NO_TSAN {
482 while (!ReleaseSemaphore(m_hSema, count, nullptr))
483 ;
484 }
485};
486#elif defined(__MACH__)
487//---------------------------------------------------------
488// Semaphore (Apple iOS and OSX)
489// Can't use POSIX semaphores due to
490// http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
491//---------------------------------------------------------
492class Semaphore {
493private:
494 semaphore_t m_sema;
495
496 Semaphore(const Semaphore& other);
497 Semaphore& operator=(const Semaphore& other);
498
499public:
500 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() {
501 assert(initialCount >= 0);
502 kern_return_t rc =
503 semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
504 assert(rc == KERN_SUCCESS);
505 AE_UNUSED(rc);
506 }
507
508 AE_NO_TSAN ~Semaphore() {
509 semaphore_destroy(mach_task_self(), m_sema);
510 }
511
512 bool wait() AE_NO_TSAN {
513 return semaphore_wait(m_sema) == KERN_SUCCESS;
514 }
515
516 bool try_wait() AE_NO_TSAN {
517 return timed_wait(0);
518 }
519
520 bool timed_wait(std::uint64_t timeout_usecs) AE_NO_TSAN {
521 mach_timespec_t ts;
522 ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
523 ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
524
525 // added in OSX 10.10:
526 // https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
527 kern_return_t rc = semaphore_timedwait(m_sema, ts);
528 return rc == KERN_SUCCESS;
529 }
530
531 void signal() AE_NO_TSAN {
532 while (semaphore_signal(m_sema) != KERN_SUCCESS)
533 ;
534 }
535
536 void signal(int count) AE_NO_TSAN {
537 while (count-- > 0) {
538 while (semaphore_signal(m_sema) != KERN_SUCCESS)
539 ;
540 }
541 }
542};
543#elif defined(__unix__)
544//---------------------------------------------------------
545// Semaphore (POSIX, Linux)
546//---------------------------------------------------------
547class Semaphore {
548private:
549 sem_t m_sema;
550
551 Semaphore(const Semaphore& other);
552 Semaphore& operator=(const Semaphore& other);
553
554public:
555 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() {
556 assert(initialCount >= 0);
557 int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
558 assert(rc == 0);
559 AE_UNUSED(rc);
560 }
561
562 AE_NO_TSAN ~Semaphore() {
563 sem_destroy(&m_sema);
564 }
565
566 bool wait() AE_NO_TSAN {
567 // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
568 int rc;
569 do {
570 rc = sem_wait(&m_sema);
571 } while (rc == -1 && errno == EINTR);
572 return rc == 0;
573 }
574
575 bool try_wait() AE_NO_TSAN {
576 int rc;
577 do {
578 rc = sem_trywait(&m_sema);
579 } while (rc == -1 && errno == EINTR);
580 return rc == 0;
581 }
582
583 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN {
584 struct timespec ts;
585 const int usecs_in_1_sec = 1000000;
586 const int nsecs_in_1_sec = 1000000000;
587 clock_gettime(CLOCK_REALTIME, &ts);
588 ts.tv_sec += static_cast<time_t>(usecs / usecs_in_1_sec);
589 ts.tv_nsec += static_cast<long>(usecs % usecs_in_1_sec) * 1000;
590 // sem_timedwait bombs if you have more than 1e9 in tv_nsec
591 // so we have to clean things up before passing it in
592 if (ts.tv_nsec >= nsecs_in_1_sec) {
593 ts.tv_nsec -= nsecs_in_1_sec;
594 ++ts.tv_sec;
595 }
596
597 int rc;
598 do {
599 rc = sem_timedwait(&m_sema, &ts);
600 } while (rc == -1 && errno == EINTR);
601 return rc == 0;
602 }
603
604 void signal() AE_NO_TSAN {
605 while (sem_post(&m_sema) == -1)
606 ;
607 }
608
609 void signal(int count) AE_NO_TSAN {
610 while (count-- > 0) {
611 while (sem_post(&m_sema) == -1)
612 ;
613 }
614 }
615};
616#elif defined(FREERTOS)
617//---------------------------------------------------------
618// Semaphore (FreeRTOS)
619//---------------------------------------------------------
620class Semaphore {
621private:
622 SemaphoreHandle_t m_sema;
623
624 Semaphore(const Semaphore& other);
625 Semaphore& operator=(const Semaphore& other);
626
627public:
628 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() {
629 assert(initialCount >= 0);
630 m_sema = xSemaphoreCreateCounting(static_cast<UBaseType_t>(~0ull),
631 static_cast<UBaseType_t>(initialCount));
632 assert(m_sema);
633 }
634
635 AE_NO_TSAN ~Semaphore() {
636 vSemaphoreDelete(m_sema);
637 }
638
639 bool wait() AE_NO_TSAN {
640 return xSemaphoreTake(m_sema, portMAX_DELAY) == pdTRUE;
641 }
642
643 bool try_wait() AE_NO_TSAN {
644 // Note: In an ISR context, if this causes a task to unblock,
645 // the caller won't know about it
646 if (xPortIsInsideInterrupt())
647 return xSemaphoreTakeFromISR(m_sema, NULL) == pdTRUE;
648 return xSemaphoreTake(m_sema, 0) == pdTRUE;
649 }
650
651 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN {
652 std::uint64_t msecs = usecs / 1000;
653 TickType_t ticks = static_cast<TickType_t>(msecs / portTICK_PERIOD_MS);
654 if (ticks == 0)
655 return try_wait();
656 return xSemaphoreTake(m_sema, ticks) == pdTRUE;
657 }
658
659 void signal() AE_NO_TSAN {
660 // Note: In an ISR context, if this causes a task to unblock,
661 // the caller won't know about it
662 BaseType_t rc;
663 if (xPortIsInsideInterrupt())
664 rc = xSemaphoreGiveFromISR(m_sema, NULL);
665 else
666 rc = xSemaphoreGive(m_sema);
667 assert(rc == pdTRUE);
668 AE_UNUSED(rc);
669 }
670
671 void signal(int count) AE_NO_TSAN {
672 while (count-- > 0)
673 signal();
674 }
675};
676#else
677#error Unsupported platform! (No semaphore wrapper available)
678#endif
679
680//---------------------------------------------------------
681// LightweightSemaphore
682//---------------------------------------------------------
683class LightweightSemaphore {
684public:
685 typedef std::make_signed<std::size_t>::type ssize_t;
686
687private:
688 weak_atomic<ssize_t> m_count;
689 Semaphore m_sema;
690
691 bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) AE_NO_TSAN {
692 ssize_t oldCount;
693 // Is there a better way to set the initial spin count?
694 // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
695 // as threads start hitting the kernel semaphore.
696 int spin = 1024;
697 while (--spin >= 0) {
698 if (m_count.load() > 0) {
699 m_count.fetch_add_acquire(-1);
700 return true;
701 }
702 compiler_fence(memory_order_acquire); // Prevent the compiler from collapsing the loop.
703 }
704 oldCount = m_count.fetch_add_acquire(-1);
705 if (oldCount > 0)
706 return true;
707 if (timeout_usecs < 0) {
708 if (m_sema.wait())
709 return true;
710 }
711 if (timeout_usecs > 0 && m_sema.timed_wait(static_cast<uint64_t>(timeout_usecs)))
712 return true;
713 // At this point, we've timed out waiting for the semaphore, but the
714 // count is still decremented indicating we may still be waiting on
715 // it. So we have to re-adjust the count, but only if the semaphore
716 // wasn't signaled enough times for us too since then. If it was, we
717 // need to release the semaphore too.
718 while (true) {
719 oldCount = m_count.fetch_add_release(1);
720 if (oldCount < 0)
721 return false; // successfully restored things to the way they were
722 // Oh, the producer thread just signaled the semaphore after all. Try again:
723 oldCount = m_count.fetch_add_acquire(-1);
724 if (oldCount > 0 && m_sema.try_wait())
725 return true;
726 }
727 }
728
729public:
730 AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema() {
731 assert(initialCount >= 0);
732 }
733
734 bool tryWait() AE_NO_TSAN {
735 if (m_count.load() > 0) {
736 m_count.fetch_add_acquire(-1);
737 return true;
738 }
739 return false;
740 }
741
742 bool wait() AE_NO_TSAN {
743 return tryWait() || waitWithPartialSpinning();
744 }
745
746 bool wait(std::int64_t timeout_usecs) AE_NO_TSAN {
747 return tryWait() || waitWithPartialSpinning(timeout_usecs);
748 }
749
750 void signal(ssize_t count = 1) AE_NO_TSAN {
751 assert(count >= 0);
752 ssize_t oldCount = m_count.fetch_add_release(count);
753 assert(oldCount >= -1);
754 if (oldCount < 0) {
755 m_sema.signal(1);
756 }
757 }
758
759 std::size_t availableApprox() const AE_NO_TSAN {
760 ssize_t count = m_count.load();
761 return count > 0 ? static_cast<std::size_t>(count) : 0;
762 }
763};
764} // namespace spsc_sema
765} // namespace Common
766
767#if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
768#pragma warning(pop)
769#ifdef __cplusplus_cli
770#pragma managed(pop)
771#endif
772#endif
diff --git a/src/common/bit_field.h b/src/common/bit_field.h
index 16d805694..7e1df62b1 100644
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@@ -146,7 +146,16 @@ public:
146 } 146 }
147 147
148 constexpr void Assign(const T& value) { 148 constexpr void Assign(const T& value) {
149#ifdef _MSC_VER
149 storage = static_cast<StorageType>((storage & ~mask) | FormatValue(value)); 150 storage = static_cast<StorageType>((storage & ~mask) | FormatValue(value));
151#else
152 // Explicitly reload with memcpy to avoid compiler aliasing quirks
153 // regarding optimization: GCC/Clang clobber chained stores to
154 // different bitfields in the same struct with the last value.
155 StorageTypeWithEndian storage_;
156 std::memcpy(&storage_, &storage, sizeof(storage_));
157 storage = static_cast<StorageType>((storage_ & ~mask) | FormatValue(value));
158#endif
150 } 159 }
151 160
152 [[nodiscard]] constexpr T Value() const { 161 [[nodiscard]] constexpr T Value() const {
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index adc31c758..e1e2a90fc 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -18,14 +18,16 @@
18/// Helper macros to insert unused bytes or words to properly align structs. These values will be 18/// Helper macros to insert unused bytes or words to properly align structs. These values will be
19/// zero-initialized. 19/// zero-initialized.
20#define INSERT_PADDING_BYTES(num_bytes) \ 20#define INSERT_PADDING_BYTES(num_bytes) \
21 std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {} 21 [[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {}
22#define INSERT_PADDING_WORDS(num_words) \ 22#define INSERT_PADDING_WORDS(num_words) \
23 std::array<u32, num_words> CONCAT2(pad, __LINE__) {} 23 [[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
24 24
25/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents. 25/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents.
26/// This keeps the structure trivial to construct. 26/// This keeps the structure trivial to construct.
27#define INSERT_PADDING_BYTES_NOINIT(num_bytes) std::array<u8, num_bytes> CONCAT2(pad, __LINE__) 27#define INSERT_PADDING_BYTES_NOINIT(num_bytes) \
28#define INSERT_PADDING_WORDS_NOINIT(num_words) std::array<u32, num_words> CONCAT2(pad, __LINE__) 28 [[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
29#define INSERT_PADDING_WORDS_NOINIT(num_words) \
30 [[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)
29 31
30#ifndef _MSC_VER 32#ifndef _MSC_VER
31 33
diff --git a/src/common/fixed_point.h b/src/common/fixed_point.h
new file mode 100644
index 000000000..1d45e51b3
--- /dev/null
+++ b/src/common/fixed_point.h
@@ -0,0 +1,726 @@
1// From: https://github.com/eteran/cpp-utilities/blob/master/fixed/include/cpp-utilities/fixed.h
2// See also: http://stackoverflow.com/questions/79677/whats-the-best-way-to-do-fixed-point-math
3/*
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2015 Evan Teran
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in all
16 * copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * SOFTWARE.
25 */
26
27#ifndef FIXED_H_
28#define FIXED_H_
29
30#if __cplusplus >= 201402L
31#define CONSTEXPR14 constexpr
32#else
33#define CONSTEXPR14
34#endif
35
36#include <cstddef> // for size_t
37#include <cstdint>
38#include <exception>
39#include <ostream>
40#include <type_traits>
41
42namespace Common {
43
44template <size_t I, size_t F>
45class FixedPoint;
46
47namespace detail {
48
49// helper templates to make magic with types :)
50// these allow us to determine resonable types from
51// a desired size, they also let us infer the next largest type
52// from a type which is nice for the division op
53template <size_t T>
54struct type_from_size {
55 using value_type = void;
56 using unsigned_type = void;
57 using signed_type = void;
58 static constexpr bool is_specialized = false;
59};
60
61#if defined(__GNUC__) && defined(__x86_64__) && !defined(__STRICT_ANSI__)
62template <>
63struct type_from_size<128> {
64 static constexpr bool is_specialized = true;
65 static constexpr size_t size = 128;
66
67 using value_type = __int128;
68 using unsigned_type = unsigned __int128;
69 using signed_type = __int128;
70 using next_size = type_from_size<256>;
71};
72#endif
73
74template <>
75struct type_from_size<64> {
76 static constexpr bool is_specialized = true;
77 static constexpr size_t size = 64;
78
79 using value_type = int64_t;
80 using unsigned_type = std::make_unsigned<value_type>::type;
81 using signed_type = std::make_signed<value_type>::type;
82 using next_size = type_from_size<128>;
83};
84
85template <>
86struct type_from_size<32> {
87 static constexpr bool is_specialized = true;
88 static constexpr size_t size = 32;
89
90 using value_type = int32_t;
91 using unsigned_type = std::make_unsigned<value_type>::type;
92 using signed_type = std::make_signed<value_type>::type;
93 using next_size = type_from_size<64>;
94};
95
96template <>
97struct type_from_size<16> {
98 static constexpr bool is_specialized = true;
99 static constexpr size_t size = 16;
100
101 using value_type = int16_t;
102 using unsigned_type = std::make_unsigned<value_type>::type;
103 using signed_type = std::make_signed<value_type>::type;
104 using next_size = type_from_size<32>;
105};
106
107template <>
108struct type_from_size<8> {
109 static constexpr bool is_specialized = true;
110 static constexpr size_t size = 8;
111
112 using value_type = int8_t;
113 using unsigned_type = std::make_unsigned<value_type>::type;
114 using signed_type = std::make_signed<value_type>::type;
115 using next_size = type_from_size<16>;
116};
117
118// this is to assist in adding support for non-native base
119// types (for adding big-int support), this should be fine
120// unless your bit-int class doesn't nicely support casting
121template <class B, class N>
122constexpr B next_to_base(N rhs) {
123 return static_cast<B>(rhs);
124}
125
126struct divide_by_zero : std::exception {};
127
128template <size_t I, size_t F>
129CONSTEXPR14 FixedPoint<I, F> divide(
130 FixedPoint<I, F> numerator, FixedPoint<I, F> denominator, FixedPoint<I, F>& remainder,
131 typename std::enable_if<type_from_size<I + F>::next_size::is_specialized>::type* = nullptr) {
132
133 using next_type = typename FixedPoint<I, F>::next_type;
134 using base_type = typename FixedPoint<I, F>::base_type;
135 constexpr size_t fractional_bits = FixedPoint<I, F>::fractional_bits;
136
137 next_type t(numerator.to_raw());
138 t <<= fractional_bits;
139
140 FixedPoint<I, F> quotient;
141
142 quotient = FixedPoint<I, F>::from_base(next_to_base<base_type>(t / denominator.to_raw()));
143 remainder = FixedPoint<I, F>::from_base(next_to_base<base_type>(t % denominator.to_raw()));
144
145 return quotient;
146}
147
148template <size_t I, size_t F>
149CONSTEXPR14 FixedPoint<I, F> divide(
150 FixedPoint<I, F> numerator, FixedPoint<I, F> denominator, FixedPoint<I, F>& remainder,
151 typename std::enable_if<!type_from_size<I + F>::next_size::is_specialized>::type* = nullptr) {
152
153 using unsigned_type = typename FixedPoint<I, F>::unsigned_type;
154
155 constexpr int bits = FixedPoint<I, F>::total_bits;
156
157 if (denominator == 0) {
158 throw divide_by_zero();
159 } else {
160
161 int sign = 0;
162
163 FixedPoint<I, F> quotient;
164
165 if (numerator < 0) {
166 sign ^= 1;
167 numerator = -numerator;
168 }
169
170 if (denominator < 0) {
171 sign ^= 1;
172 denominator = -denominator;
173 }
174
175 unsigned_type n = numerator.to_raw();
176 unsigned_type d = denominator.to_raw();
177 unsigned_type x = 1;
178 unsigned_type answer = 0;
179
180 // egyptian division algorithm
181 while ((n >= d) && (((d >> (bits - 1)) & 1) == 0)) {
182 x <<= 1;
183 d <<= 1;
184 }
185
186 while (x != 0) {
187 if (n >= d) {
188 n -= d;
189 answer += x;
190 }
191
192 x >>= 1;
193 d >>= 1;
194 }
195
196 unsigned_type l1 = n;
197 unsigned_type l2 = denominator.to_raw();
198
199 // calculate the lower bits (needs to be unsigned)
200 while (l1 >> (bits - F) > 0) {
201 l1 >>= 1;
202 l2 >>= 1;
203 }
204 const unsigned_type lo = (l1 << F) / l2;
205
206 quotient = FixedPoint<I, F>::from_base((answer << F) | lo);
207 remainder = n;
208
209 if (sign) {
210 quotient = -quotient;
211 }
212
213 return quotient;
214 }
215}
216
217// this is the usual implementation of multiplication
218template <size_t I, size_t F>
219CONSTEXPR14 FixedPoint<I, F> multiply(
220 FixedPoint<I, F> lhs, FixedPoint<I, F> rhs,
221 typename std::enable_if<type_from_size<I + F>::next_size::is_specialized>::type* = nullptr) {
222
223 using next_type = typename FixedPoint<I, F>::next_type;
224 using base_type = typename FixedPoint<I, F>::base_type;
225
226 constexpr size_t fractional_bits = FixedPoint<I, F>::fractional_bits;
227
228 next_type t(static_cast<next_type>(lhs.to_raw()) * static_cast<next_type>(rhs.to_raw()));
229 t >>= fractional_bits;
230
231 return FixedPoint<I, F>::from_base(next_to_base<base_type>(t));
232}
233
234// this is the fall back version we use when we don't have a next size
235// it is slightly slower, but is more robust since it doesn't
236// require and upgraded type
237template <size_t I, size_t F>
238CONSTEXPR14 FixedPoint<I, F> multiply(
239 FixedPoint<I, F> lhs, FixedPoint<I, F> rhs,
240 typename std::enable_if<!type_from_size<I + F>::next_size::is_specialized>::type* = nullptr) {
241
242 using base_type = typename FixedPoint<I, F>::base_type;
243
244 constexpr size_t fractional_bits = FixedPoint<I, F>::fractional_bits;
245 constexpr base_type integer_mask = FixedPoint<I, F>::integer_mask;
246 constexpr base_type fractional_mask = FixedPoint<I, F>::fractional_mask;
247
248 // more costly but doesn't need a larger type
249 const base_type a_hi = (lhs.to_raw() & integer_mask) >> fractional_bits;
250 const base_type b_hi = (rhs.to_raw() & integer_mask) >> fractional_bits;
251 const base_type a_lo = (lhs.to_raw() & fractional_mask);
252 const base_type b_lo = (rhs.to_raw() & fractional_mask);
253
254 const base_type x1 = a_hi * b_hi;
255 const base_type x2 = a_hi * b_lo;
256 const base_type x3 = a_lo * b_hi;
257 const base_type x4 = a_lo * b_lo;
258
259 return FixedPoint<I, F>::from_base((x1 << fractional_bits) + (x3 + x2) +
260 (x4 >> fractional_bits));
261}
262} // namespace detail
263
264template <size_t I, size_t F>
265class FixedPoint {
266 static_assert(detail::type_from_size<I + F>::is_specialized, "invalid combination of sizes");
267
268public:
269 static constexpr size_t fractional_bits = F;
270 static constexpr size_t integer_bits = I;
271 static constexpr size_t total_bits = I + F;
272
273 using base_type_info = detail::type_from_size<total_bits>;
274
275 using base_type = typename base_type_info::value_type;
276 using next_type = typename base_type_info::next_size::value_type;
277 using unsigned_type = typename base_type_info::unsigned_type;
278
279public:
280#ifdef __GNUC__
281#pragma GCC diagnostic push
282#pragma GCC diagnostic ignored "-Woverflow"
283#endif
284 static constexpr base_type fractional_mask =
285 ~(static_cast<unsigned_type>(~base_type(0)) << fractional_bits);
286 static constexpr base_type integer_mask = ~fractional_mask;
287#ifdef __GNUC__
288#pragma GCC diagnostic pop
289#endif
290
291public:
292 static constexpr base_type one = base_type(1) << fractional_bits;
293
294public: // constructors
295 FixedPoint() = default;
296 FixedPoint(const FixedPoint&) = default;
297 FixedPoint(FixedPoint&&) = default;
298 FixedPoint& operator=(const FixedPoint&) = default;
299
300 template <class Number>
301 constexpr FixedPoint(
302 Number n, typename std::enable_if<std::is_arithmetic<Number>::value>::type* = nullptr)
303 : data_(static_cast<base_type>(n * one)) {}
304
305public: // conversion
306 template <size_t I2, size_t F2>
307 CONSTEXPR14 explicit FixedPoint(FixedPoint<I2, F2> other) {
308 static_assert(I2 <= I && F2 <= F, "Scaling conversion can only upgrade types");
309 using T = FixedPoint<I2, F2>;
310
311 const base_type fractional = (other.data_ & T::fractional_mask);
312 const base_type integer = (other.data_ & T::integer_mask) >> T::fractional_bits;
313 data_ =
314 (integer << fractional_bits) | (fractional << (fractional_bits - T::fractional_bits));
315 }
316
317private:
318 // this makes it simpler to create a FixedPoint point object from
319 // a native type without scaling
320 // use "FixedPoint::from_base" in order to perform this.
321 struct NoScale {};
322
323 constexpr FixedPoint(base_type n, const NoScale&) : data_(n) {}
324
325public:
326 static constexpr FixedPoint from_base(base_type n) {
327 return FixedPoint(n, NoScale());
328 }
329
330public: // comparison operators
331 constexpr bool operator==(FixedPoint rhs) const {
332 return data_ == rhs.data_;
333 }
334
335 constexpr bool operator!=(FixedPoint rhs) const {
336 return data_ != rhs.data_;
337 }
338
339 constexpr bool operator<(FixedPoint rhs) const {
340 return data_ < rhs.data_;
341 }
342
343 constexpr bool operator>(FixedPoint rhs) const {
344 return data_ > rhs.data_;
345 }
346
347 constexpr bool operator<=(FixedPoint rhs) const {
348 return data_ <= rhs.data_;
349 }
350
351 constexpr bool operator>=(FixedPoint rhs) const {
352 return data_ >= rhs.data_;
353 }
354
355public: // unary operators
356 constexpr bool operator!() const {
357 return !data_;
358 }
359
360 constexpr FixedPoint operator~() const {
361 // NOTE(eteran): this will often appear to "just negate" the value
362 // that is not an error, it is because -x == (~x+1)
363 // and that "+1" is adding an infinitesimally small fraction to the
364 // complimented value
365 return FixedPoint::from_base(~data_);
366 }
367
368 constexpr FixedPoint operator-() const {
369 return FixedPoint::from_base(-data_);
370 }
371
372 constexpr FixedPoint operator+() const {
373 return FixedPoint::from_base(+data_);
374 }
375
376 CONSTEXPR14 FixedPoint& operator++() {
377 data_ += one;
378 return *this;
379 }
380
381 CONSTEXPR14 FixedPoint& operator--() {
382 data_ -= one;
383 return *this;
384 }
385
386 CONSTEXPR14 FixedPoint operator++(int) {
387 FixedPoint tmp(*this);
388 data_ += one;
389 return tmp;
390 }
391
392 CONSTEXPR14 FixedPoint operator--(int) {
393 FixedPoint tmp(*this);
394 data_ -= one;
395 return tmp;
396 }
397
398public: // basic math operators
399 CONSTEXPR14 FixedPoint& operator+=(FixedPoint n) {
400 data_ += n.data_;
401 return *this;
402 }
403
404 CONSTEXPR14 FixedPoint& operator-=(FixedPoint n) {
405 data_ -= n.data_;
406 return *this;
407 }
408
409 CONSTEXPR14 FixedPoint& operator*=(FixedPoint n) {
410 return assign(detail::multiply(*this, n));
411 }
412
413 CONSTEXPR14 FixedPoint& operator/=(FixedPoint n) {
414 FixedPoint temp;
415 return assign(detail::divide(*this, n, temp));
416 }
417
418private:
419 CONSTEXPR14 FixedPoint& assign(FixedPoint rhs) {
420 data_ = rhs.data_;
421 return *this;
422 }
423
424public: // binary math operators, effects underlying bit pattern since these
425 // don't really typically make sense for non-integer values
426 CONSTEXPR14 FixedPoint& operator&=(FixedPoint n) {
427 data_ &= n.data_;
428 return *this;
429 }
430
431 CONSTEXPR14 FixedPoint& operator|=(FixedPoint n) {
432 data_ |= n.data_;
433 return *this;
434 }
435
436 CONSTEXPR14 FixedPoint& operator^=(FixedPoint n) {
437 data_ ^= n.data_;
438 return *this;
439 }
440
441 template <class Integer,
442 class = typename std::enable_if<std::is_integral<Integer>::value>::type>
443 CONSTEXPR14 FixedPoint& operator>>=(Integer n) {
444 data_ >>= n;
445 return *this;
446 }
447
448 template <class Integer,
449 class = typename std::enable_if<std::is_integral<Integer>::value>::type>
450 CONSTEXPR14 FixedPoint& operator<<=(Integer n) {
451 data_ <<= n;
452 return *this;
453 }
454
455public: // conversion to basic types
456 constexpr void round_up() {
457 data_ += (data_ & fractional_mask) >> 1;
458 }
459
460 constexpr int to_int() {
461 round_up();
462 return static_cast<int>((data_ & integer_mask) >> fractional_bits);
463 }
464
465 constexpr unsigned int to_uint() const {
466 round_up();
467 return static_cast<unsigned int>((data_ & integer_mask) >> fractional_bits);
468 }
469
470 constexpr int64_t to_long() {
471 round_up();
472 return static_cast<int64_t>((data_ & integer_mask) >> fractional_bits);
473 }
474
475 constexpr int to_int_floor() const {
476 return static_cast<int>((data_ & integer_mask) >> fractional_bits);
477 }
478
479 constexpr int64_t to_long_floor() {
480 return static_cast<int64_t>((data_ & integer_mask) >> fractional_bits);
481 }
482
483 constexpr unsigned int to_uint_floor() const {
484 return static_cast<unsigned int>((data_ & integer_mask) >> fractional_bits);
485 }
486
487 constexpr float to_float() const {
488 return static_cast<float>(data_) / FixedPoint::one;
489 }
490
491 constexpr double to_double() const {
492 return static_cast<double>(data_) / FixedPoint::one;
493 }
494
495 constexpr base_type to_raw() const {
496 return data_;
497 }
498
499 constexpr void clear_int() {
500 data_ &= fractional_mask;
501 }
502
503 constexpr base_type get_frac() const {
504 return data_ & fractional_mask;
505 }
506
507public:
508 CONSTEXPR14 void swap(FixedPoint& rhs) {
509 using std::swap;
510 swap(data_, rhs.data_);
511 }
512
513public:
514 base_type data_;
515};
516
517// if we have the same fractional portion, but differing integer portions, we trivially upgrade the
518// smaller type
519template <size_t I1, size_t I2, size_t F>
520CONSTEXPR14 typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type
521operator+(FixedPoint<I1, F> lhs, FixedPoint<I2, F> rhs) {
522
523 using T = typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type;
524
525 const T l = T::from_base(lhs.to_raw());
526 const T r = T::from_base(rhs.to_raw());
527 return l + r;
528}
529
530template <size_t I1, size_t I2, size_t F>
531CONSTEXPR14 typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type
532operator-(FixedPoint<I1, F> lhs, FixedPoint<I2, F> rhs) {
533
534 using T = typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type;
535
536 const T l = T::from_base(lhs.to_raw());
537 const T r = T::from_base(rhs.to_raw());
538 return l - r;
539}
540
541template <size_t I1, size_t I2, size_t F>
542CONSTEXPR14 typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type
543operator*(FixedPoint<I1, F> lhs, FixedPoint<I2, F> rhs) {
544
545 using T = typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type;
546
547 const T l = T::from_base(lhs.to_raw());
548 const T r = T::from_base(rhs.to_raw());
549 return l * r;
550}
551
552template <size_t I1, size_t I2, size_t F>
553CONSTEXPR14 typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type
554operator/(FixedPoint<I1, F> lhs, FixedPoint<I2, F> rhs) {
555
556 using T = typename std::conditional<I1 >= I2, FixedPoint<I1, F>, FixedPoint<I2, F>>::type;
557
558 const T l = T::from_base(lhs.to_raw());
559 const T r = T::from_base(rhs.to_raw());
560 return l / r;
561}
562
563template <size_t I, size_t F>
564std::ostream& operator<<(std::ostream& os, FixedPoint<I, F> f) {
565 os << f.to_double();
566 return os;
567}
568
569// basic math operators
570template <size_t I, size_t F>
571CONSTEXPR14 FixedPoint<I, F> operator+(FixedPoint<I, F> lhs, FixedPoint<I, F> rhs) {
572 lhs += rhs;
573 return lhs;
574}
575template <size_t I, size_t F>
576CONSTEXPR14 FixedPoint<I, F> operator-(FixedPoint<I, F> lhs, FixedPoint<I, F> rhs) {
577 lhs -= rhs;
578 return lhs;
579}
580template <size_t I, size_t F>
581CONSTEXPR14 FixedPoint<I, F> operator*(FixedPoint<I, F> lhs, FixedPoint<I, F> rhs) {
582 lhs *= rhs;
583 return lhs;
584}
585template <size_t I, size_t F>
586CONSTEXPR14 FixedPoint<I, F> operator/(FixedPoint<I, F> lhs, FixedPoint<I, F> rhs) {
587 lhs /= rhs;
588 return lhs;
589}
590
591template <size_t I, size_t F, class Number,
592 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
593CONSTEXPR14 FixedPoint<I, F> operator+(FixedPoint<I, F> lhs, Number rhs) {
594 lhs += FixedPoint<I, F>(rhs);
595 return lhs;
596}
597template <size_t I, size_t F, class Number,
598 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
599CONSTEXPR14 FixedPoint<I, F> operator-(FixedPoint<I, F> lhs, Number rhs) {
600 lhs -= FixedPoint<I, F>(rhs);
601 return lhs;
602}
603template <size_t I, size_t F, class Number,
604 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
605CONSTEXPR14 FixedPoint<I, F> operator*(FixedPoint<I, F> lhs, Number rhs) {
606 lhs *= FixedPoint<I, F>(rhs);
607 return lhs;
608}
609template <size_t I, size_t F, class Number,
610 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
611CONSTEXPR14 FixedPoint<I, F> operator/(FixedPoint<I, F> lhs, Number rhs) {
612 lhs /= FixedPoint<I, F>(rhs);
613 return lhs;
614}
615
616template <size_t I, size_t F, class Number,
617 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
618CONSTEXPR14 FixedPoint<I, F> operator+(Number lhs, FixedPoint<I, F> rhs) {
619 FixedPoint<I, F> tmp(lhs);
620 tmp += rhs;
621 return tmp;
622}
623template <size_t I, size_t F, class Number,
624 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
625CONSTEXPR14 FixedPoint<I, F> operator-(Number lhs, FixedPoint<I, F> rhs) {
626 FixedPoint<I, F> tmp(lhs);
627 tmp -= rhs;
628 return tmp;
629}
630template <size_t I, size_t F, class Number,
631 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
632CONSTEXPR14 FixedPoint<I, F> operator*(Number lhs, FixedPoint<I, F> rhs) {
633 FixedPoint<I, F> tmp(lhs);
634 tmp *= rhs;
635 return tmp;
636}
637template <size_t I, size_t F, class Number,
638 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
639CONSTEXPR14 FixedPoint<I, F> operator/(Number lhs, FixedPoint<I, F> rhs) {
640 FixedPoint<I, F> tmp(lhs);
641 tmp /= rhs;
642 return tmp;
643}
644
645// shift operators
646template <size_t I, size_t F, class Integer,
647 class = typename std::enable_if<std::is_integral<Integer>::value>::type>
648CONSTEXPR14 FixedPoint<I, F> operator<<(FixedPoint<I, F> lhs, Integer rhs) {
649 lhs <<= rhs;
650 return lhs;
651}
652template <size_t I, size_t F, class Integer,
653 class = typename std::enable_if<std::is_integral<Integer>::value>::type>
654CONSTEXPR14 FixedPoint<I, F> operator>>(FixedPoint<I, F> lhs, Integer rhs) {
655 lhs >>= rhs;
656 return lhs;
657}
658
659// comparison operators
660template <size_t I, size_t F, class Number,
661 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
662constexpr bool operator>(FixedPoint<I, F> lhs, Number rhs) {
663 return lhs > FixedPoint<I, F>(rhs);
664}
665template <size_t I, size_t F, class Number,
666 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
667constexpr bool operator<(FixedPoint<I, F> lhs, Number rhs) {
668 return lhs < FixedPoint<I, F>(rhs);
669}
670template <size_t I, size_t F, class Number,
671 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
672constexpr bool operator>=(FixedPoint<I, F> lhs, Number rhs) {
673 return lhs >= FixedPoint<I, F>(rhs);
674}
675template <size_t I, size_t F, class Number,
676 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
677constexpr bool operator<=(FixedPoint<I, F> lhs, Number rhs) {
678 return lhs <= FixedPoint<I, F>(rhs);
679}
680template <size_t I, size_t F, class Number,
681 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
682constexpr bool operator==(FixedPoint<I, F> lhs, Number rhs) {
683 return lhs == FixedPoint<I, F>(rhs);
684}
685template <size_t I, size_t F, class Number,
686 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
687constexpr bool operator!=(FixedPoint<I, F> lhs, Number rhs) {
688 return lhs != FixedPoint<I, F>(rhs);
689}
690
691template <size_t I, size_t F, class Number,
692 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
693constexpr bool operator>(Number lhs, FixedPoint<I, F> rhs) {
694 return FixedPoint<I, F>(lhs) > rhs;
695}
696template <size_t I, size_t F, class Number,
697 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
698constexpr bool operator<(Number lhs, FixedPoint<I, F> rhs) {
699 return FixedPoint<I, F>(lhs) < rhs;
700}
701template <size_t I, size_t F, class Number,
702 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
703constexpr bool operator>=(Number lhs, FixedPoint<I, F> rhs) {
704 return FixedPoint<I, F>(lhs) >= rhs;
705}
706template <size_t I, size_t F, class Number,
707 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
708constexpr bool operator<=(Number lhs, FixedPoint<I, F> rhs) {
709 return FixedPoint<I, F>(lhs) <= rhs;
710}
711template <size_t I, size_t F, class Number,
712 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
713constexpr bool operator==(Number lhs, FixedPoint<I, F> rhs) {
714 return FixedPoint<I, F>(lhs) == rhs;
715}
716template <size_t I, size_t F, class Number,
717 class = typename std::enable_if<std::is_arithmetic<Number>::value>::type>
718constexpr bool operator!=(Number lhs, FixedPoint<I, F> rhs) {
719 return FixedPoint<I, F>(lhs) != rhs;
720}
721
722} // namespace Common
723
724#undef CONSTEXPR14
725
726#endif
diff --git a/src/common/input.h b/src/common/input.h
index bb42aaacc..995c35d9d 100644
--- a/src/common/input.h
+++ b/src/common/input.h
@@ -28,7 +28,7 @@ enum class InputType {
28 Color, 28 Color,
29 Vibration, 29 Vibration,
30 Nfc, 30 Nfc,
31 Ir, 31 IrSensor,
32}; 32};
33 33
34// Internal battery charge level 34// Internal battery charge level
@@ -53,6 +53,15 @@ enum class PollingMode {
53 IR, 53 IR,
54}; 54};
55 55
56enum class CameraFormat {
57 Size320x240,
58 Size160x120,
59 Size80x60,
60 Size40x30,
61 Size20x15,
62 None,
63};
64
56// Vibration reply from the controller 65// Vibration reply from the controller
57enum class VibrationError { 66enum class VibrationError {
58 None, 67 None,
@@ -68,6 +77,13 @@ enum class PollingError {
68 Unknown, 77 Unknown,
69}; 78};
70 79
80// Ir camera reply from the controller
81enum class CameraError {
82 None,
83 NotSupported,
84 Unknown,
85};
86
71// Hint for amplification curve to be used 87// Hint for amplification curve to be used
72enum class VibrationAmplificationType { 88enum class VibrationAmplificationType {
73 Linear, 89 Linear,
@@ -176,6 +192,12 @@ struct LedStatus {
176 bool led_4{}; 192 bool led_4{};
177}; 193};
178 194
195// Raw data fom camera
196struct CameraStatus {
197 CameraFormat format{CameraFormat::None};
198 std::vector<u8> data{};
199};
200
179// List of buttons to be passed to Qt that can be translated 201// List of buttons to be passed to Qt that can be translated
180enum class ButtonNames { 202enum class ButtonNames {
181 Undefined, 203 Undefined,
@@ -233,6 +255,7 @@ struct CallbackStatus {
233 BodyColorStatus color_status{}; 255 BodyColorStatus color_status{};
234 BatteryStatus battery_status{}; 256 BatteryStatus battery_status{};
235 VibrationStatus vibration_status{}; 257 VibrationStatus vibration_status{};
258 CameraStatus camera_status{};
236}; 259};
237 260
238// Triggered once every input change 261// Triggered once every input change
@@ -281,6 +304,10 @@ public:
281 virtual PollingError SetPollingMode([[maybe_unused]] PollingMode polling_mode) { 304 virtual PollingError SetPollingMode([[maybe_unused]] PollingMode polling_mode) {
282 return PollingError::NotSupported; 305 return PollingError::NotSupported;
283 } 306 }
307
308 virtual CameraError SetCameraFormat([[maybe_unused]] CameraFormat camera_format) {
309 return CameraError::NotSupported;
310 }
284}; 311};
285 312
286/// An abstract class template for a factory that can create input devices. 313/// An abstract class template for a factory that can create input devices.
diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp
index 4acbff649..6de9bacbf 100644
--- a/src/common/logging/filter.cpp
+++ b/src/common/logging/filter.cpp
@@ -128,7 +128,7 @@ bool ParseFilterRule(Filter& instance, Iterator begin, Iterator end) {
128 SUB(Service, PM) \ 128 SUB(Service, PM) \
129 SUB(Service, PREPO) \ 129 SUB(Service, PREPO) \
130 SUB(Service, PSC) \ 130 SUB(Service, PSC) \
131 SUB(Service, PSM) \ 131 SUB(Service, PTM) \
132 SUB(Service, SET) \ 132 SUB(Service, SET) \
133 SUB(Service, SM) \ 133 SUB(Service, SM) \
134 SUB(Service, SPL) \ 134 SUB(Service, SPL) \
diff --git a/src/common/logging/types.h b/src/common/logging/types.h
index cabb4db8e..595c15ada 100644
--- a/src/common/logging/types.h
+++ b/src/common/logging/types.h
@@ -95,7 +95,7 @@ enum class Class : u8 {
95 Service_PM, ///< The PM service 95 Service_PM, ///< The PM service
96 Service_PREPO, ///< The PREPO (Play report) service 96 Service_PREPO, ///< The PREPO (Play report) service
97 Service_PSC, ///< The PSC service 97 Service_PSC, ///< The PSC service
98 Service_PSM, ///< The PSM service 98 Service_PTM, ///< The PTM service
99 Service_SET, ///< The SET (Settings) service 99 Service_SET, ///< The SET (Settings) service
100 Service_SM, ///< The SM (Service manager) service 100 Service_SM, ///< The SM (Service manager) service
101 Service_SPL, ///< The SPL service 101 Service_SPL, ///< The SPL service
diff --git a/src/common/reader_writer_queue.h b/src/common/reader_writer_queue.h
new file mode 100644
index 000000000..8d2c9408c
--- /dev/null
+++ b/src/common/reader_writer_queue.h
@@ -0,0 +1,941 @@
1// ©2013-2020 Cameron Desrochers.
2// Distributed under the simplified BSD license (see the license file that
3// should have come with this header).
4
5#pragma once
6
7#include <cassert>
8#include <cstdint>
9#include <cstdlib> // For malloc/free/abort & size_t
10#include <memory>
11#include <new>
12#include <stdexcept>
13#include <type_traits>
14#include <utility>
15
16#include "common/atomic_helpers.h"
17
18#if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012
19#include <chrono>
20#endif
21
22// A lock-free queue for a single-consumer, single-producer architecture.
23// The queue is also wait-free in the common path (except if more memory
24// needs to be allocated, in which case malloc is called).
25// Allocates memory sparingly, and only once if the original maximum size
26// estimate is never exceeded.
27// Tested on x86/x64 processors, but semantics should be correct for all
28// architectures (given the right implementations in atomicops.h), provided
29// that aligned integer and pointer accesses are naturally atomic.
30// Note that there should only be one consumer thread and producer thread;
31// Switching roles of the threads, or using multiple consecutive threads for
32// one role, is not safe unless properly synchronized.
33// Using the queue exclusively from one thread is fine, though a bit silly.
34
35#ifndef MOODYCAMEL_CACHE_LINE_SIZE
36#define MOODYCAMEL_CACHE_LINE_SIZE 64
37#endif
38
39#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
40#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || \
41 (!defined(_MSC_VER) && !defined(__GNUC__))
42#define MOODYCAMEL_EXCEPTIONS_ENABLED
43#endif
44#endif
45
46#ifndef MOODYCAMEL_HAS_EMPLACE
47#if !defined(_MSC_VER) || \
48 _MSC_VER >= 1800 // variadic templates: either a non-MS compiler or VS >= 2013
49#define MOODYCAMEL_HAS_EMPLACE 1
50#endif
51#endif
52
53#ifndef MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE
54#if defined(__APPLE__) && defined(__MACH__) && __cplusplus >= 201703L
55// This is required to find out what deployment target we are using
56#include <CoreFoundation/CoreFoundation.h>
57#if !defined(MAC_OS_X_VERSION_MIN_REQUIRED) || \
58 MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_14
59// C++17 new(size_t, align_val_t) is not backwards-compatible with older versions of macOS, so we
60// can't support over-alignment in this case
61#define MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE
62#endif
63#endif
64#endif
65
66#ifndef MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE
67#define MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE AE_ALIGN(MOODYCAMEL_CACHE_LINE_SIZE)
68#endif
69
70#ifdef AE_VCPP
71#pragma warning(push)
72#pragma warning(disable : 4324) // structure was padded due to __declspec(align())
73#pragma warning(disable : 4820) // padding was added
74#pragma warning(disable : 4127) // conditional expression is constant
75#endif
76
77namespace Common {
78
79template <typename T, size_t MAX_BLOCK_SIZE = 512>
80class MOODYCAMEL_MAYBE_ALIGN_TO_CACHELINE ReaderWriterQueue {
81 // Design: Based on a queue-of-queues. The low-level queues are just
82 // circular buffers with front and tail indices indicating where the
83 // next element to dequeue is and where the next element can be enqueued,
84 // respectively. Each low-level queue is called a "block". Each block
85 // wastes exactly one element's worth of space to keep the design simple
86 // (if front == tail then the queue is empty, and can't be full).
87 // The high-level queue is a circular linked list of blocks; again there
88 // is a front and tail, but this time they are pointers to the blocks.
89 // The front block is where the next element to be dequeued is, provided
90 // the block is not empty. The back block is where elements are to be
91 // enqueued, provided the block is not full.
92 // The producer thread owns all the tail indices/pointers. The consumer
93 // thread owns all the front indices/pointers. Both threads read each
94 // other's variables, but only the owning thread updates them. E.g. After
95 // the consumer reads the producer's tail, the tail may change before the
96 // consumer is done dequeuing an object, but the consumer knows the tail
97 // will never go backwards, only forwards.
98 // If there is no room to enqueue an object, an additional block (of
99 // equal size to the last block) is added. Blocks are never removed.
100
101public:
102 typedef T value_type;
103
104 // Constructs a queue that can hold at least `size` elements without further
105 // allocations. If more than MAX_BLOCK_SIZE elements are requested,
106 // then several blocks of MAX_BLOCK_SIZE each are reserved (including
107 // at least one extra buffer block).
108 AE_NO_TSAN explicit ReaderWriterQueue(size_t size = 15)
109#ifndef NDEBUG
110 : enqueuing(false), dequeuing(false)
111#endif
112 {
113 assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) &&
114 "MAX_BLOCK_SIZE must be a power of 2");
115 assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
116
117 Block* firstBlock = nullptr;
118
119 largestBlockSize =
120 ceilToPow2(size + 1); // We need a spare slot to fit size elements in the block
121 if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
122 // We need a spare block in case the producer is writing to a different block the
123 // consumer is reading from, and wants to enqueue the maximum number of elements. We
124 // also need a spare element in each block to avoid the ambiguity between front == tail
125 // meaning "empty" and "full". So the effective number of slots that are guaranteed to
126 // be usable at any time is the block size - 1 times the number of blocks - 1. Solving
127 // for size and applying a ceiling to the division gives us (after simplifying):
128 size_t initialBlockCount = (size + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
129 largestBlockSize = MAX_BLOCK_SIZE;
130 Block* lastBlock = nullptr;
131 for (size_t i = 0; i != initialBlockCount; ++i) {
132 auto block = make_block(largestBlockSize);
133 if (block == nullptr) {
134#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
135 throw std::bad_alloc();
136#else
137 abort();
138#endif
139 }
140 if (firstBlock == nullptr) {
141 firstBlock = block;
142 } else {
143 lastBlock->next = block;
144 }
145 lastBlock = block;
146 block->next = firstBlock;
147 }
148 } else {
149 firstBlock = make_block(largestBlockSize);
150 if (firstBlock == nullptr) {
151#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
152 throw std::bad_alloc();
153#else
154 abort();
155#endif
156 }
157 firstBlock->next = firstBlock;
158 }
159 frontBlock = firstBlock;
160 tailBlock = firstBlock;
161
162 // Make sure the reader/writer threads will have the initialized memory setup above:
163 fence(memory_order_sync);
164 }
165
166 // Note: The queue should not be accessed concurrently while it's
167 // being moved. It's up to the user to synchronize this.
168 AE_NO_TSAN ReaderWriterQueue(ReaderWriterQueue&& other)
169 : frontBlock(other.frontBlock.load()), tailBlock(other.tailBlock.load()),
170 largestBlockSize(other.largestBlockSize)
171#ifndef NDEBUG
172 ,
173 enqueuing(false), dequeuing(false)
174#endif
175 {
176 other.largestBlockSize = 32;
177 Block* b = other.make_block(other.largestBlockSize);
178 if (b == nullptr) {
179#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
180 throw std::bad_alloc();
181#else
182 abort();
183#endif
184 }
185 b->next = b;
186 other.frontBlock = b;
187 other.tailBlock = b;
188 }
189
190 // Note: The queue should not be accessed concurrently while it's
191 // being moved. It's up to the user to synchronize this.
192 ReaderWriterQueue& operator=(ReaderWriterQueue&& other) AE_NO_TSAN {
193 Block* b = frontBlock.load();
194 frontBlock = other.frontBlock.load();
195 other.frontBlock = b;
196 b = tailBlock.load();
197 tailBlock = other.tailBlock.load();
198 other.tailBlock = b;
199 std::swap(largestBlockSize, other.largestBlockSize);
200 return *this;
201 }
202
203 // Note: The queue should not be accessed concurrently while it's
204 // being deleted. It's up to the user to synchronize this.
205 AE_NO_TSAN ~ReaderWriterQueue() {
206 // Make sure we get the latest version of all variables from other CPUs:
207 fence(memory_order_sync);
208
209 // Destroy any remaining objects in queue and free memory
210 Block* frontBlock_ = frontBlock;
211 Block* block = frontBlock_;
212 do {
213 Block* nextBlock = block->next;
214 size_t blockFront = block->front;
215 size_t blockTail = block->tail;
216
217 for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
218 auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
219 element->~T();
220 (void)element;
221 }
222
223 auto rawBlock = block->rawThis;
224 block->~Block();
225 std::free(rawBlock);
226 block = nextBlock;
227 } while (block != frontBlock_);
228 }
229
230 // Enqueues a copy of element if there is room in the queue.
231 // Returns true if the element was enqueued, false otherwise.
232 // Does not allocate memory.
233 AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN {
234 return inner_enqueue<CannotAlloc>(element);
235 }
236
237 // Enqueues a moved copy of element if there is room in the queue.
238 // Returns true if the element was enqueued, false otherwise.
239 // Does not allocate memory.
240 AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN {
241 return inner_enqueue<CannotAlloc>(std::forward<T>(element));
242 }
243
244#if MOODYCAMEL_HAS_EMPLACE
245 // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
246 template <typename... Args>
247 AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN {
248 return inner_enqueue<CannotAlloc>(std::forward<Args>(args)...);
249 }
250#endif
251
252 // Enqueues a copy of element on the queue.
253 // Allocates an additional block of memory if needed.
254 // Only fails (returns false) if memory allocation fails.
255 AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN {
256 return inner_enqueue<CanAlloc>(element);
257 }
258
259 // Enqueues a moved copy of element on the queue.
260 // Allocates an additional block of memory if needed.
261 // Only fails (returns false) if memory allocation fails.
262 AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN {
263 return inner_enqueue<CanAlloc>(std::forward<T>(element));
264 }
265
266#if MOODYCAMEL_HAS_EMPLACE
267 // Like enqueue() but with emplace semantics (i.e. construct-in-place).
268 template <typename... Args>
269 AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN {
270 return inner_enqueue<CanAlloc>(std::forward<Args>(args)...);
271 }
272#endif
273
274 // Attempts to dequeue an element; if the queue is empty,
275 // returns false instead. If the queue has at least one element,
276 // moves front to result using operator=, then returns true.
277 template <typename U>
278 bool try_dequeue(U& result) AE_NO_TSAN {
279#ifndef NDEBUG
280 ReentrantGuard guard(this->dequeuing);
281#endif
282
283 // High-level pseudocode:
284 // Remember where the tail block is
285 // If the front block has an element in it, dequeue it
286 // Else
287 // If front block was the tail block when we entered the function, return false
288 // Else advance to next block and dequeue the item there
289
290 // Note that we have to use the value of the tail block from before we check if the front
291 // block is full or not, in case the front block is empty and then, before we check if the
292 // tail block is at the front block or not, the producer fills up the front block *and
293 // moves on*, which would make us skip a filled block. Seems unlikely, but was consistently
294 // reproducible in practice.
295 // In order to avoid overhead in the common case, though, we do a double-checked pattern
296 // where we have the fast path if the front block is not empty, then read the tail block,
297 // then re-read the front block and check if it's not empty again, then check if the tail
298 // block has advanced.
299
300 Block* frontBlock_ = frontBlock.load();
301 size_t blockTail = frontBlock_->localTail;
302 size_t blockFront = frontBlock_->front.load();
303
304 if (blockFront != blockTail ||
305 blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
306 fence(memory_order_acquire);
307
308 non_empty_front_block:
309 // Front block not empty, dequeue from here
310 auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
311 result = std::move(*element);
312 element->~T();
313
314 blockFront = (blockFront + 1) & frontBlock_->sizeMask;
315
316 fence(memory_order_release);
317 frontBlock_->front = blockFront;
318 } else if (frontBlock_ != tailBlock.load()) {
319 fence(memory_order_acquire);
320
321 frontBlock_ = frontBlock.load();
322 blockTail = frontBlock_->localTail = frontBlock_->tail.load();
323 blockFront = frontBlock_->front.load();
324 fence(memory_order_acquire);
325
326 if (blockFront != blockTail) {
327 // Oh look, the front block isn't empty after all
328 goto non_empty_front_block;
329 }
330
331 // Front block is empty but there's another block ahead, advance to it
332 Block* nextBlock = frontBlock_->next;
333 // Don't need an acquire fence here since next can only ever be set on the tailBlock,
334 // and we're not the tailBlock, and we did an acquire earlier after reading tailBlock
335 // which ensures next is up-to-date on this CPU in case we recently were at tailBlock.
336
337 size_t nextBlockFront = nextBlock->front.load();
338 size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
339 fence(memory_order_acquire);
340
341 // Since the tailBlock is only ever advanced after being written to,
342 // we know there's for sure an element to dequeue on it
343 assert(nextBlockFront != nextBlockTail);
344 AE_UNUSED(nextBlockTail);
345
346 // We're done with this block, let the producer use it if it needs
347 fence(memory_order_release); // Expose possibly pending changes to frontBlock->front
348 // from last dequeue
349 frontBlock = frontBlock_ = nextBlock;
350
351 compiler_fence(memory_order_release); // Not strictly needed
352
353 auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
354
355 result = std::move(*element);
356 element->~T();
357
358 nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
359
360 fence(memory_order_release);
361 frontBlock_->front = nextBlockFront;
362 } else {
363 // No elements in current block and no other block to advance to
364 return false;
365 }
366
367 return true;
368 }
369
370 // Returns a pointer to the front element in the queue (the one that
371 // would be removed next by a call to `try_dequeue` or `pop`). If the
372 // queue appears empty at the time the method is called, nullptr is
373 // returned instead.
374 // Must be called only from the consumer thread.
375 T* peek() const AE_NO_TSAN {
376#ifndef NDEBUG
377 ReentrantGuard guard(this->dequeuing);
378#endif
379 // See try_dequeue() for reasoning
380
381 Block* frontBlock_ = frontBlock.load();
382 size_t blockTail = frontBlock_->localTail;
383 size_t blockFront = frontBlock_->front.load();
384
385 if (blockFront != blockTail ||
386 blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
387 fence(memory_order_acquire);
388 non_empty_front_block:
389 return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
390 } else if (frontBlock_ != tailBlock.load()) {
391 fence(memory_order_acquire);
392 frontBlock_ = frontBlock.load();
393 blockTail = frontBlock_->localTail = frontBlock_->tail.load();
394 blockFront = frontBlock_->front.load();
395 fence(memory_order_acquire);
396
397 if (blockFront != blockTail) {
398 goto non_empty_front_block;
399 }
400
401 Block* nextBlock = frontBlock_->next;
402
403 size_t nextBlockFront = nextBlock->front.load();
404 fence(memory_order_acquire);
405
406 assert(nextBlockFront != nextBlock->tail.load());
407 return reinterpret_cast<T*>(nextBlock->data + nextBlockFront * sizeof(T));
408 }
409
410 return nullptr;
411 }
412
413 // Removes the front element from the queue, if any, without returning it.
414 // Returns true on success, or false if the queue appeared empty at the time
415 // `pop` was called.
416 bool pop() AE_NO_TSAN {
417#ifndef NDEBUG
418 ReentrantGuard guard(this->dequeuing);
419#endif
420 // See try_dequeue() for reasoning
421
422 Block* frontBlock_ = frontBlock.load();
423 size_t blockTail = frontBlock_->localTail;
424 size_t blockFront = frontBlock_->front.load();
425
426 if (blockFront != blockTail ||
427 blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
428 fence(memory_order_acquire);
429
430 non_empty_front_block:
431 auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
432 element->~T();
433
434 blockFront = (blockFront + 1) & frontBlock_->sizeMask;
435
436 fence(memory_order_release);
437 frontBlock_->front = blockFront;
438 } else if (frontBlock_ != tailBlock.load()) {
439 fence(memory_order_acquire);
440 frontBlock_ = frontBlock.load();
441 blockTail = frontBlock_->localTail = frontBlock_->tail.load();
442 blockFront = frontBlock_->front.load();
443 fence(memory_order_acquire);
444
445 if (blockFront != blockTail) {
446 goto non_empty_front_block;
447 }
448
449 // Front block is empty but there's another block ahead, advance to it
450 Block* nextBlock = frontBlock_->next;
451
452 size_t nextBlockFront = nextBlock->front.load();
453 size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
454 fence(memory_order_acquire);
455
456 assert(nextBlockFront != nextBlockTail);
457 AE_UNUSED(nextBlockTail);
458
459 fence(memory_order_release);
460 frontBlock = frontBlock_ = nextBlock;
461
462 compiler_fence(memory_order_release);
463
464 auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
465 element->~T();
466
467 nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
468
469 fence(memory_order_release);
470 frontBlock_->front = nextBlockFront;
471 } else {
472 // No elements in current block and no other block to advance to
473 return false;
474 }
475
476 return true;
477 }
478
479 // Returns the approximate number of items currently in the queue.
480 // Safe to call from both the producer and consumer threads.
481 inline size_t size_approx() const AE_NO_TSAN {
482 size_t result = 0;
483 Block* frontBlock_ = frontBlock.load();
484 Block* block = frontBlock_;
485 do {
486 fence(memory_order_acquire);
487 size_t blockFront = block->front.load();
488 size_t blockTail = block->tail.load();
489 result += (blockTail - blockFront) & block->sizeMask;
490 block = block->next.load();
491 } while (block != frontBlock_);
492 return result;
493 }
494
495 // Returns the total number of items that could be enqueued without incurring
496 // an allocation when this queue is empty.
497 // Safe to call from both the producer and consumer threads.
498 //
499 // NOTE: The actual capacity during usage may be different depending on the consumer.
500 // If the consumer is removing elements concurrently, the producer cannot add to
501 // the block the consumer is removing from until it's completely empty, except in
502 // the case where the producer was writing to the same block the consumer was
503 // reading from the whole time.
504 inline size_t max_capacity() const {
505 size_t result = 0;
506 Block* frontBlock_ = frontBlock.load();
507 Block* block = frontBlock_;
508 do {
509 fence(memory_order_acquire);
510 result += block->sizeMask;
511 block = block->next.load();
512 } while (block != frontBlock_);
513 return result;
514 }
515
516private:
517 enum AllocationMode { CanAlloc, CannotAlloc };
518
519#if MOODYCAMEL_HAS_EMPLACE
520 template <AllocationMode canAlloc, typename... Args>
521 bool inner_enqueue(Args&&... args) AE_NO_TSAN
522#else
523 template <AllocationMode canAlloc, typename U>
524 bool inner_enqueue(U&& element) AE_NO_TSAN
525#endif
526 {
527#ifndef NDEBUG
528 ReentrantGuard guard(this->enqueuing);
529#endif
530
531 // High-level pseudocode (assuming we're allowed to alloc a new block):
532 // If room in tail block, add to tail
533 // Else check next block
534 // If next block is not the head block, enqueue on next block
535 // Else create a new block and enqueue there
536 // Advance tail to the block we just enqueued to
537
538 Block* tailBlock_ = tailBlock.load();
539 size_t blockFront = tailBlock_->localFront;
540 size_t blockTail = tailBlock_->tail.load();
541
542 size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
543 if (nextBlockTail != blockFront ||
544 nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
545 fence(memory_order_acquire);
546 // This block has room for at least one more element
547 char* location = tailBlock_->data + blockTail * sizeof(T);
548#if MOODYCAMEL_HAS_EMPLACE
549 new (location) T(std::forward<Args>(args)...);
550#else
551 new (location) T(std::forward<U>(element));
552#endif
553
554 fence(memory_order_release);
555 tailBlock_->tail = nextBlockTail;
556 } else {
557 fence(memory_order_acquire);
558 if (tailBlock_->next.load() != frontBlock) {
559 // Note that the reason we can't advance to the frontBlock and start adding new
560 // entries there is because if we did, then dequeue would stay in that block,
561 // eventually reading the new values, instead of advancing to the next full block
562 // (whose values were enqueued first and so should be consumed first).
563
564 fence(memory_order_acquire); // Ensure we get latest writes if we got the latest
565 // frontBlock
566
567 // tailBlock is full, but there's a free block ahead, use it
568 Block* tailBlockNext = tailBlock_->next.load();
569 size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
570 nextBlockTail = tailBlockNext->tail.load();
571 fence(memory_order_acquire);
572
573 // This block must be empty since it's not the head block and we
574 // go through the blocks in a circle
575 assert(nextBlockFront == nextBlockTail);
576 tailBlockNext->localFront = nextBlockFront;
577
578 char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
579#if MOODYCAMEL_HAS_EMPLACE
580 new (location) T(std::forward<Args>(args)...);
581#else
582 new (location) T(std::forward<U>(element));
583#endif
584
585 tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
586
587 fence(memory_order_release);
588 tailBlock = tailBlockNext;
589 } else if (canAlloc == CanAlloc) {
590 // tailBlock is full and there's no free block ahead; create a new block
591 auto newBlockSize =
592 largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
593 auto newBlock = make_block(newBlockSize);
594 if (newBlock == nullptr) {
595 // Could not allocate a block!
596 return false;
597 }
598 largestBlockSize = newBlockSize;
599
600#if MOODYCAMEL_HAS_EMPLACE
601 new (newBlock->data) T(std::forward<Args>(args)...);
602#else
603 new (newBlock->data) T(std::forward<U>(element));
604#endif
605 assert(newBlock->front == 0);
606 newBlock->tail = newBlock->localTail = 1;
607
608 newBlock->next = tailBlock_->next.load();
609 tailBlock_->next = newBlock;
610
611 // Might be possible for the dequeue thread to see the new tailBlock->next
612 // *without* seeing the new tailBlock value, but this is OK since it can't
613 // advance to the next block until tailBlock is set anyway (because the only
614 // case where it could try to read the next is if it's already at the tailBlock,
615 // and it won't advance past tailBlock in any circumstance).
616
617 fence(memory_order_release);
618 tailBlock = newBlock;
619 } else if (canAlloc == CannotAlloc) {
620 // Would have had to allocate a new block to enqueue, but not allowed
621 return false;
622 } else {
623 assert(false && "Should be unreachable code");
624 return false;
625 }
626 }
627
628 return true;
629 }
630
631 // Disable copying
632 ReaderWriterQueue(ReaderWriterQueue const&) {}
633
634 // Disable assignment
635 ReaderWriterQueue& operator=(ReaderWriterQueue const&) {}
636
637 AE_FORCEINLINE static size_t ceilToPow2(size_t x) {
638 // From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
639 --x;
640 x |= x >> 1;
641 x |= x >> 2;
642 x |= x >> 4;
643 for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
644 x |= x >> (i << 3);
645 }
646 ++x;
647 return x;
648 }
649
650 template <typename U>
651 static AE_FORCEINLINE char* align_for(char* ptr) AE_NO_TSAN {
652 const std::size_t alignment = std::alignment_of<U>::value;
653 return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
654 }
655
656private:
657#ifndef NDEBUG
658 struct ReentrantGuard {
659 AE_NO_TSAN ReentrantGuard(weak_atomic<bool>& _inSection) : inSection(_inSection) {
660 assert(!inSection &&
661 "Concurrent (or re-entrant) enqueue or dequeue operation detected (only one "
662 "thread at a time may hold the producer or consumer role)");
663 inSection = true;
664 }
665
666 AE_NO_TSAN ~ReentrantGuard() {
667 inSection = false;
668 }
669
670 private:
671 ReentrantGuard& operator=(ReentrantGuard const&);
672
673 private:
674 weak_atomic<bool>& inSection;
675 };
676#endif
677
678 struct Block {
679 // Avoid false-sharing by putting highly contended variables on their own cache lines
680 weak_atomic<size_t> front; // (Atomic) Elements are read from here
681 size_t localTail; // An uncontended shadow copy of tail, owned by the consumer
682
683 char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) -
684 sizeof(size_t)];
685 weak_atomic<size_t> tail; // (Atomic) Elements are enqueued here
686 size_t localFront;
687
688 char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) -
689 sizeof(size_t)]; // next isn't very contended, but we don't want it on
690 // the same cache line as tail (which is)
691 weak_atomic<Block*> next; // (Atomic)
692
693 char* data; // Contents (on heap) are aligned to T's alignment
694
695 const size_t sizeMask;
696
697 // size must be a power of two (and greater than 0)
698 AE_NO_TSAN Block(size_t const& _size, char* _rawThis, char* _data)
699 : front(0UL), localTail(0), tail(0UL), localFront(0), next(nullptr), data(_data),
700 sizeMask(_size - 1), rawThis(_rawThis) {}
701
702 private:
703 // C4512 - Assignment operator could not be generated
704 Block& operator=(Block const&);
705
706 public:
707 char* rawThis;
708 };
709
710 static Block* make_block(size_t capacity) AE_NO_TSAN {
711 // Allocate enough memory for the block itself, as well as all the elements it will contain
712 auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
713 size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
714 auto newBlockRaw = static_cast<char*>(std::malloc(size));
715 if (newBlockRaw == nullptr) {
716 return nullptr;
717 }
718
719 auto newBlockAligned = align_for<Block>(newBlockRaw);
720 auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
721 return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
722 }
723
724private:
725 weak_atomic<Block*> frontBlock; // (Atomic) Elements are dequeued from this block
726
727 char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
728 weak_atomic<Block*> tailBlock; // (Atomic) Elements are enqueued to this block
729
730 size_t largestBlockSize;
731
732#ifndef NDEBUG
733 weak_atomic<bool> enqueuing;
734 mutable weak_atomic<bool> dequeuing;
735#endif
736};
737
738// Like ReaderWriterQueue, but also providees blocking operations
739template <typename T, size_t MAX_BLOCK_SIZE = 512>
740class BlockingReaderWriterQueue {
741private:
742 typedef ::Common::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
743
744public:
745 explicit BlockingReaderWriterQueue(size_t size = 15) AE_NO_TSAN
746 : inner(size),
747 sema(new spsc_sema::LightweightSemaphore()) {}
748
749 BlockingReaderWriterQueue(BlockingReaderWriterQueue&& other) AE_NO_TSAN
750 : inner(std::move(other.inner)),
751 sema(std::move(other.sema)) {}
752
753 BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue&& other) AE_NO_TSAN {
754 std::swap(sema, other.sema);
755 std::swap(inner, other.inner);
756 return *this;
757 }
758
759 // Enqueues a copy of element if there is room in the queue.
760 // Returns true if the element was enqueued, false otherwise.
761 // Does not allocate memory.
762 AE_FORCEINLINE bool try_enqueue(T const& element) AE_NO_TSAN {
763 if (inner.try_enqueue(element)) {
764 sema->signal();
765 return true;
766 }
767 return false;
768 }
769
770 // Enqueues a moved copy of element if there is room in the queue.
771 // Returns true if the element was enqueued, false otherwise.
772 // Does not allocate memory.
773 AE_FORCEINLINE bool try_enqueue(T&& element) AE_NO_TSAN {
774 if (inner.try_enqueue(std::forward<T>(element))) {
775 sema->signal();
776 return true;
777 }
778 return false;
779 }
780
781#if MOODYCAMEL_HAS_EMPLACE
782 // Like try_enqueue() but with emplace semantics (i.e. construct-in-place).
783 template <typename... Args>
784 AE_FORCEINLINE bool try_emplace(Args&&... args) AE_NO_TSAN {
785 if (inner.try_emplace(std::forward<Args>(args)...)) {
786 sema->signal();
787 return true;
788 }
789 return false;
790 }
791#endif
792
793 // Enqueues a copy of element on the queue.
794 // Allocates an additional block of memory if needed.
795 // Only fails (returns false) if memory allocation fails.
796 AE_FORCEINLINE bool enqueue(T const& element) AE_NO_TSAN {
797 if (inner.enqueue(element)) {
798 sema->signal();
799 return true;
800 }
801 return false;
802 }
803
804 // Enqueues a moved copy of element on the queue.
805 // Allocates an additional block of memory if needed.
806 // Only fails (returns false) if memory allocation fails.
807 AE_FORCEINLINE bool enqueue(T&& element) AE_NO_TSAN {
808 if (inner.enqueue(std::forward<T>(element))) {
809 sema->signal();
810 return true;
811 }
812 return false;
813 }
814
815#if MOODYCAMEL_HAS_EMPLACE
816 // Like enqueue() but with emplace semantics (i.e. construct-in-place).
817 template <typename... Args>
818 AE_FORCEINLINE bool emplace(Args&&... args) AE_NO_TSAN {
819 if (inner.emplace(std::forward<Args>(args)...)) {
820 sema->signal();
821 return true;
822 }
823 return false;
824 }
825#endif
826
827 // Attempts to dequeue an element; if the queue is empty,
828 // returns false instead. If the queue has at least one element,
829 // moves front to result using operator=, then returns true.
830 template <typename U>
831 bool try_dequeue(U& result) AE_NO_TSAN {
832 if (sema->tryWait()) {
833 bool success = inner.try_dequeue(result);
834 assert(success);
835 AE_UNUSED(success);
836 return true;
837 }
838 return false;
839 }
840
841 // Attempts to dequeue an element; if the queue is empty,
842 // waits until an element is available, then dequeues it.
843 template <typename U>
844 void wait_dequeue(U& result) AE_NO_TSAN {
845 while (!sema->wait())
846 ;
847 bool success = inner.try_dequeue(result);
848 AE_UNUSED(result);
849 assert(success);
850 AE_UNUSED(success);
851 }
852
853 // Attempts to dequeue an element; if the queue is empty,
854 // waits until an element is available up to the specified timeout,
855 // then dequeues it and returns true, or returns false if the timeout
856 // expires before an element can be dequeued.
857 // Using a negative timeout indicates an indefinite timeout,
858 // and is thus functionally equivalent to calling wait_dequeue.
859 template <typename U>
860 bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs) AE_NO_TSAN {
861 if (!sema->wait(timeout_usecs)) {
862 return false;
863 }
864 bool success = inner.try_dequeue(result);
865 AE_UNUSED(result);
866 assert(success);
867 AE_UNUSED(success);
868 return true;
869 }
870
871#if __cplusplus > 199711L || _MSC_VER >= 1700
872 // Attempts to dequeue an element; if the queue is empty,
873 // waits until an element is available up to the specified timeout,
874 // then dequeues it and returns true, or returns false if the timeout
875 // expires before an element can be dequeued.
876 // Using a negative timeout indicates an indefinite timeout,
877 // and is thus functionally equivalent to calling wait_dequeue.
878 template <typename U, typename Rep, typename Period>
879 inline bool wait_dequeue_timed(U& result,
880 std::chrono::duration<Rep, Period> const& timeout) AE_NO_TSAN {
881 return wait_dequeue_timed(
882 result, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
883 }
884#endif
885
886 // Returns a pointer to the front element in the queue (the one that
887 // would be removed next by a call to `try_dequeue` or `pop`). If the
888 // queue appears empty at the time the method is called, nullptr is
889 // returned instead.
890 // Must be called only from the consumer thread.
891 AE_FORCEINLINE T* peek() const AE_NO_TSAN {
892 return inner.peek();
893 }
894
895 // Removes the front element from the queue, if any, without returning it.
896 // Returns true on success, or false if the queue appeared empty at the time
897 // `pop` was called.
898 AE_FORCEINLINE bool pop() AE_NO_TSAN {
899 if (sema->tryWait()) {
900 bool result = inner.pop();
901 assert(result);
902 AE_UNUSED(result);
903 return true;
904 }
905 return false;
906 }
907
908 // Returns the approximate number of items currently in the queue.
909 // Safe to call from both the producer and consumer threads.
910 AE_FORCEINLINE size_t size_approx() const AE_NO_TSAN {
911 return sema->availableApprox();
912 }
913
914 // Returns the total number of items that could be enqueued without incurring
915 // an allocation when this queue is empty.
916 // Safe to call from both the producer and consumer threads.
917 //
918 // NOTE: The actual capacity during usage may be different depending on the consumer.
919 // If the consumer is removing elements concurrently, the producer cannot add to
920 // the block the consumer is removing from until it's completely empty, except in
921 // the case where the producer was writing to the same block the consumer was
922 // reading from the whole time.
923 AE_FORCEINLINE size_t max_capacity() const {
924 return inner.max_capacity();
925 }
926
927private:
928 // Disable copying & assignment
929 BlockingReaderWriterQueue(BlockingReaderWriterQueue const&) {}
930 BlockingReaderWriterQueue& operator=(BlockingReaderWriterQueue const&) {}
931
932private:
933 ReaderWriterQueue inner;
934 std::unique_ptr<spsc_sema::LightweightSemaphore> sema;
935};
936
937} // namespace Common
938
939#ifdef AE_VCPP
940#pragma warning(pop)
941#endif
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 751549583..1c7b6dfae 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -62,7 +62,8 @@ void LogSettings() {
62 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); 62 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
63 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); 63 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
64 log_setting("Audio_OutputEngine", values.sink_id.GetValue()); 64 log_setting("Audio_OutputEngine", values.sink_id.GetValue());
65 log_setting("Audio_OutputDevice", values.audio_device_id.GetValue()); 65 log_setting("Audio_OutputDevice", values.audio_output_device_id.GetValue());
66 log_setting("Audio_InputDevice", values.audio_input_device_id.GetValue());
66 log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd.GetValue()); 67 log_setting("DataStorage_UseVirtualSd", values.use_virtual_sd.GetValue());
67 log_path("DataStorage_CacheDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir)); 68 log_path("DataStorage_CacheDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::CacheDir));
68 log_path("DataStorage_ConfigDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir)); 69 log_path("DataStorage_ConfigDir", Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir));
@@ -185,7 +186,6 @@ void RestoreGlobalState(bool is_powered_on) {
185 values.max_anisotropy.SetGlobal(true); 186 values.max_anisotropy.SetGlobal(true);
186 values.use_speed_limit.SetGlobal(true); 187 values.use_speed_limit.SetGlobal(true);
187 values.speed_limit.SetGlobal(true); 188 values.speed_limit.SetGlobal(true);
188 values.fps_cap.SetGlobal(true);
189 values.use_disk_shader_cache.SetGlobal(true); 189 values.use_disk_shader_cache.SetGlobal(true);
190 values.gpu_accuracy.SetGlobal(true); 190 values.gpu_accuracy.SetGlobal(true);
191 values.use_asynchronous_gpu_emulation.SetGlobal(true); 191 values.use_asynchronous_gpu_emulation.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 3583a2e70..1079cf8cb 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -106,7 +106,7 @@ struct ResolutionScalingInfo {
106 * configurations. Specifying a default value and label is required. A minimum and maximum range can 106 * configurations. Specifying a default value and label is required. A minimum and maximum range can
107 * be specified for sanitization. 107 * be specified for sanitization.
108 */ 108 */
109template <typename Type> 109template <typename Type, bool ranged = false>
110class Setting { 110class Setting {
111protected: 111protected:
112 Setting() = default; 112 Setting() = default;
@@ -126,8 +126,8 @@ public:
126 * @param default_val Intial value of the setting, and default value of the setting 126 * @param default_val Intial value of the setting, and default value of the setting
127 * @param name Label for the setting 127 * @param name Label for the setting
128 */ 128 */
129 explicit Setting(const Type& default_val, const std::string& name) 129 explicit Setting(const Type& default_val, const std::string& name) requires(!ranged)
130 : value{default_val}, default_value{default_val}, ranged{false}, label{name} {} 130 : value{default_val}, default_value{default_val}, label{name} {}
131 virtual ~Setting() = default; 131 virtual ~Setting() = default;
132 132
133 /** 133 /**
@@ -139,9 +139,9 @@ public:
139 * @param name Label for the setting 139 * @param name Label for the setting
140 */ 140 */
141 explicit Setting(const Type& default_val, const Type& min_val, const Type& max_val, 141 explicit Setting(const Type& default_val, const Type& min_val, const Type& max_val,
142 const std::string& name) 142 const std::string& name) requires(ranged)
143 : value{default_val}, default_value{default_val}, maximum{max_val}, minimum{min_val}, 143 : value{default_val},
144 ranged{true}, label{name} {} 144 default_value{default_val}, maximum{max_val}, minimum{min_val}, label{name} {}
145 145
146 /** 146 /**
147 * Returns a reference to the setting's value. 147 * Returns a reference to the setting's value.
@@ -158,7 +158,7 @@ public:
158 * @param val The desired value 158 * @param val The desired value
159 */ 159 */
160 virtual void SetValue(const Type& val) { 160 virtual void SetValue(const Type& val) {
161 Type temp{(ranged) ? std::clamp(val, minimum, maximum) : val}; 161 Type temp{ranged ? std::clamp(val, minimum, maximum) : val};
162 std::swap(value, temp); 162 std::swap(value, temp);
163 } 163 }
164 164
@@ -188,7 +188,7 @@ public:
188 * @returns A reference to the setting 188 * @returns A reference to the setting
189 */ 189 */
190 virtual const Type& operator=(const Type& val) { 190 virtual const Type& operator=(const Type& val) {
191 Type temp{(ranged) ? std::clamp(val, minimum, maximum) : val}; 191 Type temp{ranged ? std::clamp(val, minimum, maximum) : val};
192 std::swap(value, temp); 192 std::swap(value, temp);
193 return value; 193 return value;
194 } 194 }
@@ -207,7 +207,6 @@ protected:
207 const Type default_value{}; ///< The default value 207 const Type default_value{}; ///< The default value
208 const Type maximum{}; ///< Maximum allowed value of the setting 208 const Type maximum{}; ///< Maximum allowed value of the setting
209 const Type minimum{}; ///< Minimum allowed value of the setting 209 const Type minimum{}; ///< Minimum allowed value of the setting
210 const bool ranged; ///< The setting has sanitization ranges
211 const std::string label{}; ///< The setting's label 210 const std::string label{}; ///< The setting's label
212}; 211};
213 212
@@ -219,8 +218,8 @@ protected:
219 * 218 *
220 * By default, the global setting is used. 219 * By default, the global setting is used.
221 */ 220 */
222template <typename Type> 221template <typename Type, bool ranged = false>
223class SwitchableSetting : virtual public Setting<Type> { 222class SwitchableSetting : virtual public Setting<Type, ranged> {
224public: 223public:
225 /** 224 /**
226 * Sets a default value, label, and setting value. 225 * Sets a default value, label, and setting value.
@@ -228,7 +227,7 @@ public:
228 * @param default_val Intial value of the setting, and default value of the setting 227 * @param default_val Intial value of the setting, and default value of the setting
229 * @param name Label for the setting 228 * @param name Label for the setting
230 */ 229 */
231 explicit SwitchableSetting(const Type& default_val, const std::string& name) 230 explicit SwitchableSetting(const Type& default_val, const std::string& name) requires(!ranged)
232 : Setting<Type>{default_val, name} {} 231 : Setting<Type>{default_val, name} {}
233 virtual ~SwitchableSetting() = default; 232 virtual ~SwitchableSetting() = default;
234 233
@@ -241,8 +240,8 @@ public:
241 * @param name Label for the setting 240 * @param name Label for the setting
242 */ 241 */
243 explicit SwitchableSetting(const Type& default_val, const Type& min_val, const Type& max_val, 242 explicit SwitchableSetting(const Type& default_val, const Type& min_val, const Type& max_val,
244 const std::string& name) 243 const std::string& name) requires(ranged)
245 : Setting<Type>{default_val, min_val, max_val, name} {} 244 : Setting<Type, true>{default_val, min_val, max_val, name} {}
246 245
247 /** 246 /**
248 * Tells this setting to represent either the global or custom setting when other member 247 * Tells this setting to represent either the global or custom setting when other member
@@ -290,7 +289,7 @@ public:
290 * @param val The new value 289 * @param val The new value
291 */ 290 */
292 void SetValue(const Type& val) override { 291 void SetValue(const Type& val) override {
293 Type temp{(this->ranged) ? std::clamp(val, this->minimum, this->maximum) : val}; 292 Type temp{ranged ? std::clamp(val, this->minimum, this->maximum) : val};
294 if (use_global) { 293 if (use_global) {
295 std::swap(this->value, temp); 294 std::swap(this->value, temp);
296 } else { 295 } else {
@@ -306,7 +305,7 @@ public:
306 * @returns A reference to the current setting value 305 * @returns A reference to the current setting value
307 */ 306 */
308 const Type& operator=(const Type& val) override { 307 const Type& operator=(const Type& val) override {
309 Type temp{(this->ranged) ? std::clamp(val, this->minimum, this->maximum) : val}; 308 Type temp{ranged ? std::clamp(val, this->minimum, this->maximum) : val};
310 if (use_global) { 309 if (use_global) {
311 std::swap(this->value, temp); 310 std::swap(this->value, temp);
312 return this->value; 311 return this->value;
@@ -371,18 +370,20 @@ struct TouchFromButtonMap {
371 370
372struct Values { 371struct Values {
373 // Audio 372 // Audio
374 Setting<std::string> audio_device_id{"auto", "output_device"};
375 Setting<std::string> sink_id{"auto", "output_engine"}; 373 Setting<std::string> sink_id{"auto", "output_engine"};
374 Setting<std::string> audio_output_device_id{"auto", "output_device"};
375 Setting<std::string> audio_input_device_id{"auto", "input_device"};
376 Setting<bool> audio_muted{false, "audio_muted"}; 376 Setting<bool> audio_muted{false, "audio_muted"};
377 SwitchableSetting<u8> volume{100, 0, 100, "volume"}; 377 SwitchableSetting<u8, true> volume{100, 0, 100, "volume"};
378 Setting<bool> dump_audio_commands{false, "dump_audio_commands"};
378 379
379 // Core 380 // Core
380 SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; 381 SwitchableSetting<bool> use_multi_core{true, "use_multi_core"};
381 SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"}; 382 SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"};
382 383
383 // Cpu 384 // Cpu
384 SwitchableSetting<CPUAccuracy> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, 385 SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
385 CPUAccuracy::Paranoid, "cpu_accuracy"}; 386 CPUAccuracy::Paranoid, "cpu_accuracy"};
386 // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021 387 // TODO: remove cpu_accuracy_first_time, migration setting added 8 July 2021
387 Setting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"}; 388 Setting<bool> cpu_accuracy_first_time{true, "cpu_accuracy_first_time"};
388 Setting<bool> cpu_debug_mode{false, "cpu_debug_mode"}; 389 Setting<bool> cpu_debug_mode{false, "cpu_debug_mode"};
@@ -409,7 +410,7 @@ struct Values {
409 true, "cpuopt_unsafe_ignore_global_monitor"}; 410 true, "cpuopt_unsafe_ignore_global_monitor"};
410 411
411 // Renderer 412 // Renderer
412 SwitchableSetting<RendererBackend> renderer_backend{ 413 SwitchableSetting<RendererBackend, true> renderer_backend{
413 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Vulkan, "backend"}; 414 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Vulkan, "backend"};
414 Setting<bool> renderer_debug{false, "debug"}; 415 Setting<bool> renderer_debug{false, "debug"};
415 Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; 416 Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
@@ -423,28 +424,26 @@ struct Values {
423 SwitchableSetting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"}; 424 SwitchableSetting<AntiAliasing> anti_aliasing{AntiAliasing::None, "anti_aliasing"};
424 // *nix platforms may have issues with the borderless windowed fullscreen mode. 425 // *nix platforms may have issues with the borderless windowed fullscreen mode.
425 // Default to exclusive fullscreen on these platforms for now. 426 // Default to exclusive fullscreen on these platforms for now.
426 SwitchableSetting<FullscreenMode> fullscreen_mode{ 427 SwitchableSetting<FullscreenMode, true> fullscreen_mode{
427#ifdef _WIN32 428#ifdef _WIN32
428 FullscreenMode::Borderless, 429 FullscreenMode::Borderless,
429#else 430#else
430 FullscreenMode::Exclusive, 431 FullscreenMode::Exclusive,
431#endif 432#endif
432 FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"}; 433 FullscreenMode::Borderless, FullscreenMode::Exclusive, "fullscreen_mode"};
433 SwitchableSetting<int> aspect_ratio{0, 0, 3, "aspect_ratio"}; 434 SwitchableSetting<int, true> aspect_ratio{0, 0, 3, "aspect_ratio"};
434 SwitchableSetting<int> max_anisotropy{0, 0, 5, "max_anisotropy"}; 435 SwitchableSetting<int, true> max_anisotropy{0, 0, 5, "max_anisotropy"};
435 SwitchableSetting<bool> use_speed_limit{true, "use_speed_limit"}; 436 SwitchableSetting<bool> use_speed_limit{true, "use_speed_limit"};
436 SwitchableSetting<u16> speed_limit{100, 0, 9999, "speed_limit"}; 437 SwitchableSetting<u16, true> speed_limit{100, 0, 9999, "speed_limit"};
437 SwitchableSetting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"}; 438 SwitchableSetting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
438 SwitchableSetting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal, 439 SwitchableSetting<GPUAccuracy, true> gpu_accuracy{GPUAccuracy::High, GPUAccuracy::Normal,
439 GPUAccuracy::Extreme, "gpu_accuracy"}; 440 GPUAccuracy::Extreme, "gpu_accuracy"};
440 SwitchableSetting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; 441 SwitchableSetting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
441 SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; 442 SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
442 SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"}; 443 SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"};
443 SwitchableSetting<bool> use_vsync{true, "use_vsync"}; 444 SwitchableSetting<bool> use_vsync{true, "use_vsync"};
444 SwitchableSetting<u16> fps_cap{1000, 1, 1000, "fps_cap"}; 445 SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLASM, ShaderBackend::GLSL,
445 Setting<bool> disable_fps_limit{false, "disable_fps_limit"}; 446 ShaderBackend::SPIRV, "shader_backend"};
446 SwitchableSetting<ShaderBackend> shader_backend{ShaderBackend::GLASM, ShaderBackend::GLSL,
447 ShaderBackend::SPIRV, "shader_backend"};
448 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; 447 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
449 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; 448 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
450 449
@@ -460,10 +459,10 @@ struct Values {
460 s64 custom_rtc_differential; 459 s64 custom_rtc_differential;
461 460
462 Setting<s32> current_user{0, "current_user"}; 461 Setting<s32> current_user{0, "current_user"};
463 SwitchableSetting<s32> language_index{1, 0, 17, "language_index"}; 462 SwitchableSetting<s32, true> language_index{1, 0, 17, "language_index"};
464 SwitchableSetting<s32> region_index{1, 0, 6, "region_index"}; 463 SwitchableSetting<s32, true> region_index{1, 0, 6, "region_index"};
465 SwitchableSetting<s32> time_zone_index{0, 0, 45, "time_zone_index"}; 464 SwitchableSetting<s32, true> time_zone_index{0, 0, 45, "time_zone_index"};
466 SwitchableSetting<s32> sound_index{1, 0, 2, "sound_index"}; 465 SwitchableSetting<s32, true> sound_index{1, 0, 2, "sound_index"};
467 466
468 // Controls 467 // Controls
469 InputSetting<std::array<PlayerInput, 10>> players; 468 InputSetting<std::array<PlayerInput, 10>> players;
@@ -485,7 +484,7 @@ struct Values {
485 Setting<bool> tas_loop{false, "tas_loop"}; 484 Setting<bool> tas_loop{false, "tas_loop"};
486 485
487 Setting<bool> mouse_panning{false, "mouse_panning"}; 486 Setting<bool> mouse_panning{false, "mouse_panning"};
488 Setting<u8> mouse_panning_sensitivity{10, 1, 100, "mouse_panning_sensitivity"}; 487 Setting<u8, true> mouse_panning_sensitivity{10, 1, 100, "mouse_panning_sensitivity"};
489 Setting<bool> mouse_enabled{false, "mouse_enabled"}; 488 Setting<bool> mouse_enabled{false, "mouse_enabled"};
490 489
491 Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"}; 490 Setting<bool> emulate_analog_keyboard{false, "emulate_analog_keyboard"};
@@ -504,6 +503,9 @@ struct Values {
504 Setting<bool> enable_ring_controller{true, "enable_ring_controller"}; 503 Setting<bool> enable_ring_controller{true, "enable_ring_controller"};
505 RingconRaw ringcon_analogs; 504 RingconRaw ringcon_analogs;
506 505
506 Setting<bool> enable_ir_sensor{false, "enable_ir_sensor"};
507 Setting<std::string> ir_sensor_device{"auto", "ir_sensor_device"};
508
507 // Data Storage 509 // Data Storage
508 Setting<bool> use_virtual_sd{true, "use_virtual_sd"}; 510 Setting<bool> use_virtual_sd{true, "use_virtual_sd"};
509 Setting<bool> gamecard_inserted{false, "gamecard_inserted"}; 511 Setting<bool> gamecard_inserted{false, "gamecard_inserted"};
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index b4fb3a59f..ae07f2811 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -67,7 +67,7 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency,
67 const auto& caps = GetCPUCaps(); 67 const auto& caps = GetCPUCaps();
68 u64 rtsc_frequency = 0; 68 u64 rtsc_frequency = 0;
69 if (caps.invariant_tsc) { 69 if (caps.invariant_tsc) {
70 rtsc_frequency = EstimateRDTSCFrequency(); 70 rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency();
71 } 71 }
72 72
73 // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: 73 // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than:
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 322aa1f08..1a27532d4 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -161,6 +161,22 @@ static CPUCaps Detect() {
161 caps.invariant_tsc = Common::Bit<8>(cpu_id[3]); 161 caps.invariant_tsc = Common::Bit<8>(cpu_id[3]);
162 } 162 }
163 163
164 if (max_std_fn >= 0x15) {
165 __cpuid(cpu_id, 0x15);
166 caps.tsc_crystal_ratio_denominator = cpu_id[0];
167 caps.tsc_crystal_ratio_numerator = cpu_id[1];
168 caps.crystal_frequency = cpu_id[2];
169 // Some CPU models might not return a crystal frequency.
170 // The CPU model can be detected to use the values from turbostat
171 // https://github.com/torvalds/linux/blob/master/tools/power/x86/turbostat/turbostat.c#L5569
172 // but it's easier to just estimate the TSC tick rate for these cases.
173 if (caps.tsc_crystal_ratio_denominator) {
174 caps.tsc_frequency = static_cast<u64>(caps.crystal_frequency) *
175 caps.tsc_crystal_ratio_numerator /
176 caps.tsc_crystal_ratio_denominator;
177 }
178 }
179
164 if (max_std_fn >= 0x16) { 180 if (max_std_fn >= 0x16) {
165 __cpuid(cpu_id, 0x16); 181 __cpuid(cpu_id, 0x16);
166 caps.base_frequency = cpu_id[0]; 182 caps.base_frequency = cpu_id[0];
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 9bdc9dbfa..6830f3795 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -30,6 +30,11 @@ struct CPUCaps {
30 u32 max_frequency; 30 u32 max_frequency;
31 u32 bus_frequency; 31 u32 bus_frequency;
32 32
33 u32 tsc_crystal_ratio_denominator;
34 u32 tsc_crystal_ratio_numerator;
35 u32 crystal_frequency;
36 u64 tsc_frequency; // Derived from the above three values
37
33 bool sse : 1; 38 bool sse : 1;
34 bool sse2 : 1; 39 bool sse2 : 1;
35 bool sse3 : 1; 40 bool sse3 : 1;