summaryrefslogtreecommitdiff
path: root/src/common/atomic_helpers.h
diff options
context:
space:
mode:
authorGravatar Kelebek12022-07-16 23:48:45 +0100
committerGravatar Kelebek12022-07-22 01:11:32 +0100
commit458da8a94877677f086f06cdeecf959ec4283a33 (patch)
tree583166d77602ad90a0d552f37de8729ad80fd6c1 /src/common/atomic_helpers.h
parentMerge pull request #8598 from Link4565/recv-dontwait (diff)
downloadyuzu-458da8a94877677f086f06cdeecf959ec4283a33.tar.gz
yuzu-458da8a94877677f086f06cdeecf959ec4283a33.tar.xz
yuzu-458da8a94877677f086f06cdeecf959ec4283a33.zip
Project Andio
Diffstat (limited to '')
-rw-r--r--src/common/atomic_helpers.h772
1 files changed, 772 insertions, 0 deletions
diff --git a/src/common/atomic_helpers.h b/src/common/atomic_helpers.h
new file mode 100644
index 000000000..6d912b52e
--- /dev/null
+++ b/src/common/atomic_helpers.h
@@ -0,0 +1,772 @@
1// ©2013-2016 Cameron Desrochers.
2// Distributed under the simplified BSD license (see the license file that
3// should have come with this header).
4// Uses Jeff Preshing's semaphore implementation (under the terms of its
5// separate zlib license, embedded below).
6
7#pragma once
8
9// Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant)
10// implementation of low-level memory barriers, plus a few semi-portable utility macros (for
11// inlining and alignment). Also has a basic atomic type (limited to hardware-supported atomics with
12// no memory ordering guarantees). Uses the AE_* prefix for macros (historical reasons), and the
13// "moodycamel" namespace for symbols.
14
15#include <cassert>
16#include <cerrno>
17#include <cstdint>
18#include <ctime>
19#include <type_traits>
20
21// Platform detection
22#if defined(__INTEL_COMPILER)
23#define AE_ICC
24#elif defined(_MSC_VER)
25#define AE_VCPP
26#elif defined(__GNUC__)
27#define AE_GCC
28#endif
29
30#if defined(_M_IA64) || defined(__ia64__)
31#define AE_ARCH_IA64
32#elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__)
33#define AE_ARCH_X64
34#elif defined(_M_IX86) || defined(__i386__)
35#define AE_ARCH_X86
36#elif defined(_M_PPC) || defined(__powerpc__)
37#define AE_ARCH_PPC
38#else
39#define AE_ARCH_UNKNOWN
40#endif
41
42// AE_UNUSED
43#define AE_UNUSED(x) ((void)x)
44
45// AE_NO_TSAN/AE_TSAN_ANNOTATE_*
46#if defined(__has_feature)
47#if __has_feature(thread_sanitizer)
48#if __cplusplus >= 201703L // inline variables require C++17
49namespace Common {
50inline int ae_tsan_global;
51}
52#define AE_TSAN_ANNOTATE_RELEASE() \
53 AnnotateHappensBefore(__FILE__, __LINE__, (void*)(&::moodycamel::ae_tsan_global))
54#define AE_TSAN_ANNOTATE_ACQUIRE() \
55 AnnotateHappensAfter(__FILE__, __LINE__, (void*)(&::moodycamel::ae_tsan_global))
56extern "C" void AnnotateHappensBefore(const char*, int, void*);
57extern "C" void AnnotateHappensAfter(const char*, int, void*);
58#else // when we can't work with tsan, attempt to disable its warnings
59#define AE_NO_TSAN __attribute__((no_sanitize("thread")))
60#endif
61#endif
62#endif
63#ifndef AE_NO_TSAN
64#define AE_NO_TSAN
65#endif
66#ifndef AE_TSAN_ANNOTATE_RELEASE
67#define AE_TSAN_ANNOTATE_RELEASE()
68#define AE_TSAN_ANNOTATE_ACQUIRE()
69#endif
70
71// AE_FORCEINLINE
72#if defined(AE_VCPP) || defined(AE_ICC)
73#define AE_FORCEINLINE __forceinline
74#elif defined(AE_GCC)
75//#define AE_FORCEINLINE __attribute__((always_inline))
76#define AE_FORCEINLINE inline
77#else
78#define AE_FORCEINLINE inline
79#endif
80
81// AE_ALIGN
82#if defined(AE_VCPP) || defined(AE_ICC)
83#define AE_ALIGN(x) __declspec(align(x))
84#elif defined(AE_GCC)
85#define AE_ALIGN(x) __attribute__((aligned(x)))
86#else
87// Assume GCC compliant syntax...
88#define AE_ALIGN(x) __attribute__((aligned(x)))
89#endif
90
91// Portable atomic fences implemented below:
92
93namespace Common {
94
95enum memory_order {
96 memory_order_relaxed,
97 memory_order_acquire,
98 memory_order_release,
99 memory_order_acq_rel,
100 memory_order_seq_cst,
101
102 // memory_order_sync: Forces a full sync:
103 // #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
104 memory_order_sync = memory_order_seq_cst
105};
106
107} // namespace Common
108
109#if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || \
110 (defined(AE_ICC) && __INTEL_COMPILER < 1600)
111// VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
112
113#include <intrin.h>
114
115#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
116#define AeFullSync _mm_mfence
117#define AeLiteSync _mm_mfence
118#elif defined(AE_ARCH_IA64)
119#define AeFullSync __mf
120#define AeLiteSync __mf
121#elif defined(AE_ARCH_PPC)
122#include <ppcintrinsics.h>
123#define AeFullSync __sync
124#define AeLiteSync __lwsync
125#endif
126
127#ifdef AE_VCPP
128#pragma warning(push)
129#pragma warning(disable : 4365) // Disable erroneous 'conversion from long to unsigned int,
130 // signed/unsigned mismatch' error when using `assert`
131#ifdef __cplusplus_cli
132#pragma managed(push, off)
133#endif
134#endif
135
136namespace Common {
137
138AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN {
139 switch (order) {
140 case memory_order_relaxed:
141 break;
142 case memory_order_acquire:
143 _ReadBarrier();
144 break;
145 case memory_order_release:
146 _WriteBarrier();
147 break;
148 case memory_order_acq_rel:
149 _ReadWriteBarrier();
150 break;
151 case memory_order_seq_cst:
152 _ReadWriteBarrier();
153 break;
154 default:
155 assert(false);
156 }
157}
158
159// x86/x64 have a strong memory model -- all loads and stores have
160// acquire and release semantics automatically (so only need compiler
161// barriers for those).
162#if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
163AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN {
164 switch (order) {
165 case memory_order_relaxed:
166 break;
167 case memory_order_acquire:
168 _ReadBarrier();
169 break;
170 case memory_order_release:
171 _WriteBarrier();
172 break;
173 case memory_order_acq_rel:
174 _ReadWriteBarrier();
175 break;
176 case memory_order_seq_cst:
177 _ReadWriteBarrier();
178 AeFullSync();
179 _ReadWriteBarrier();
180 break;
181 default:
182 assert(false);
183 }
184}
185#else
186AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN {
187 // Non-specialized arch, use heavier memory barriers everywhere just in case :-(
188 switch (order) {
189 case memory_order_relaxed:
190 break;
191 case memory_order_acquire:
192 _ReadBarrier();
193 AeLiteSync();
194 _ReadBarrier();
195 break;
196 case memory_order_release:
197 _WriteBarrier();
198 AeLiteSync();
199 _WriteBarrier();
200 break;
201 case memory_order_acq_rel:
202 _ReadWriteBarrier();
203 AeLiteSync();
204 _ReadWriteBarrier();
205 break;
206 case memory_order_seq_cst:
207 _ReadWriteBarrier();
208 AeFullSync();
209 _ReadWriteBarrier();
210 break;
211 default:
212 assert(false);
213 }
214}
215#endif
216} // namespace Common
217#else
218// Use standard library of atomics
219#include <atomic>
220
221namespace Common {
222
223AE_FORCEINLINE void compiler_fence(memory_order order) AE_NO_TSAN {
224 switch (order) {
225 case memory_order_relaxed:
226 break;
227 case memory_order_acquire:
228 std::atomic_signal_fence(std::memory_order_acquire);
229 break;
230 case memory_order_release:
231 std::atomic_signal_fence(std::memory_order_release);
232 break;
233 case memory_order_acq_rel:
234 std::atomic_signal_fence(std::memory_order_acq_rel);
235 break;
236 case memory_order_seq_cst:
237 std::atomic_signal_fence(std::memory_order_seq_cst);
238 break;
239 default:
240 assert(false);
241 }
242}
243
244AE_FORCEINLINE void fence(memory_order order) AE_NO_TSAN {
245 switch (order) {
246 case memory_order_relaxed:
247 break;
248 case memory_order_acquire:
249 AE_TSAN_ANNOTATE_ACQUIRE();
250 std::atomic_thread_fence(std::memory_order_acquire);
251 break;
252 case memory_order_release:
253 AE_TSAN_ANNOTATE_RELEASE();
254 std::atomic_thread_fence(std::memory_order_release);
255 break;
256 case memory_order_acq_rel:
257 AE_TSAN_ANNOTATE_ACQUIRE();
258 AE_TSAN_ANNOTATE_RELEASE();
259 std::atomic_thread_fence(std::memory_order_acq_rel);
260 break;
261 case memory_order_seq_cst:
262 AE_TSAN_ANNOTATE_ACQUIRE();
263 AE_TSAN_ANNOTATE_RELEASE();
264 std::atomic_thread_fence(std::memory_order_seq_cst);
265 break;
266 default:
267 assert(false);
268 }
269}
270
271} // namespace Common
272
273#endif
274
275#if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
276#define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
277#endif
278
279#ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
280#include <atomic>
281#endif
282#include <utility>
283
284// WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
285// Provides basic support for atomic variables -- no memory ordering guarantees are provided.
286// The guarantee of atomicity is only made for types that already have atomic load and store
287// guarantees at the hardware level -- on most platforms this generally means aligned pointers and
288// integers (only).
289namespace Common {
290template <typename T>
291class weak_atomic {
292public:
293 AE_NO_TSAN weak_atomic() : value() {}
294#ifdef AE_VCPP
295#pragma warning(push)
296#pragma warning(disable : 4100) // Get rid of (erroneous) 'unreferenced formal parameter' warning
297#endif
298 template <typename U>
299 AE_NO_TSAN weak_atomic(U&& x) : value(std::forward<U>(x)) {}
300#ifdef __cplusplus_cli
301 // Work around bug with universal reference/nullptr combination that only appears when /clr is
302 // on
303 AE_NO_TSAN weak_atomic(nullptr_t) : value(nullptr) {}
304#endif
305 AE_NO_TSAN weak_atomic(weak_atomic const& other) : value(other.load()) {}
306 AE_NO_TSAN weak_atomic(weak_atomic&& other) : value(std::move(other.load())) {}
307#ifdef AE_VCPP
308#pragma warning(pop)
309#endif
310
311 AE_FORCEINLINE operator T() const AE_NO_TSAN {
312 return load();
313 }
314
315#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
316 template <typename U>
317 AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN {
318 value = std::forward<U>(x);
319 return *this;
320 }
321 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN {
322 value = other.value;
323 return *this;
324 }
325
326 AE_FORCEINLINE T load() const AE_NO_TSAN {
327 return value;
328 }
329
330 AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN {
331#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
332 if (sizeof(T) == 4)
333 return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
334#if defined(_M_AMD64)
335 else if (sizeof(T) == 8)
336 return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
337#endif
338#else
339#error Unsupported platform
340#endif
341 assert(false && "T must be either a 32 or 64 bit type");
342 return value;
343 }
344
345 AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN {
346#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
347 if (sizeof(T) == 4)
348 return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
349#if defined(_M_AMD64)
350 else if (sizeof(T) == 8)
351 return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
352#endif
353#else
354#error Unsupported platform
355#endif
356 assert(false && "T must be either a 32 or 64 bit type");
357 return value;
358 }
359#else
360 template <typename U>
361 AE_FORCEINLINE weak_atomic const& operator=(U&& x) AE_NO_TSAN {
362 value.store(std::forward<U>(x), std::memory_order_relaxed);
363 return *this;
364 }
365
366 AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) AE_NO_TSAN {
367 value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
368 return *this;
369 }
370
371 AE_FORCEINLINE T load() const AE_NO_TSAN {
372 return value.load(std::memory_order_relaxed);
373 }
374
375 AE_FORCEINLINE T fetch_add_acquire(T increment) AE_NO_TSAN {
376 return value.fetch_add(increment, std::memory_order_acquire);
377 }
378
379 AE_FORCEINLINE T fetch_add_release(T increment) AE_NO_TSAN {
380 return value.fetch_add(increment, std::memory_order_release);
381 }
382#endif
383
384private:
385#ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
386 // No std::atomic support, but still need to circumvent compiler optimizations.
387 // `volatile` will make memory access slow, but is guaranteed to be reliable.
388 volatile T value;
389#else
390 std::atomic<T> value;
391#endif
392};
393
394} // namespace Common
395
396// Portable single-producer, single-consumer semaphore below:
397
398#if defined(_WIN32)
399// Avoid including windows.h in a header; we only need a handful of
400// items, so we'll redeclare them here (this is relatively safe since
401// the API generally has to remain stable between Windows versions).
402// I know this is an ugly hack but it still beats polluting the global
403// namespace with thousands of generic names or adding a .cpp for nothing.
404extern "C" {
405struct _SECURITY_ATTRIBUTES;
406__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes,
407 long lInitialCount, long lMaximumCount,
408 const wchar_t* lpName);
409__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
410__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle,
411 unsigned long dwMilliseconds);
412__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount,
413 long* lpPreviousCount);
414}
415#elif defined(__MACH__)
416#include <mach/mach.h>
417#elif defined(__unix__)
418#include <semaphore.h>
419#elif defined(FREERTOS)
420#include <FreeRTOS.h>
421#include <semphr.h>
422#include <task.h>
423#endif
424
425namespace Common {
426// Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
427// portable + lightweight semaphore implementations, originally from
428// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
429// LICENSE:
430// Copyright (c) 2015 Jeff Preshing
431//
432// This software is provided 'as-is', without any express or implied
433// warranty. In no event will the authors be held liable for any damages
434// arising from the use of this software.
435//
436// Permission is granted to anyone to use this software for any purpose,
437// including commercial applications, and to alter it and redistribute it
438// freely, subject to the following restrictions:
439//
440// 1. The origin of this software must not be misrepresented; you must not
441// claim that you wrote the original software. If you use this software
442// in a product, an acknowledgement in the product documentation would be
443// appreciated but is not required.
444// 2. Altered source versions must be plainly marked as such, and must not be
445// misrepresented as being the original software.
446// 3. This notice may not be removed or altered from any source distribution.
447namespace spsc_sema {
448#if defined(_WIN32)
449class Semaphore {
450private:
451 void* m_hSema;
452
453 Semaphore(const Semaphore& other);
454 Semaphore& operator=(const Semaphore& other);
455
456public:
457 AE_NO_TSAN Semaphore(int initialCount = 0) : m_hSema() {
458 assert(initialCount >= 0);
459 const long maxLong = 0x7fffffff;
460 m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
461 assert(m_hSema);
462 }
463
464 AE_NO_TSAN ~Semaphore() {
465 CloseHandle(m_hSema);
466 }
467
468 bool wait() AE_NO_TSAN {
469 const unsigned long infinite = 0xffffffff;
470 return WaitForSingleObject(m_hSema, infinite) == 0;
471 }
472
473 bool try_wait() AE_NO_TSAN {
474 return WaitForSingleObject(m_hSema, 0) == 0;
475 }
476
477 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN {
478 return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
479 }
480
481 void signal(int count = 1) AE_NO_TSAN {
482 while (!ReleaseSemaphore(m_hSema, count, nullptr))
483 ;
484 }
485};
486#elif defined(__MACH__)
487//---------------------------------------------------------
488// Semaphore (Apple iOS and OSX)
489// Can't use POSIX semaphores due to
490// http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
491//---------------------------------------------------------
492class Semaphore {
493private:
494 semaphore_t m_sema;
495
496 Semaphore(const Semaphore& other);
497 Semaphore& operator=(const Semaphore& other);
498
499public:
500 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() {
501 assert(initialCount >= 0);
502 kern_return_t rc =
503 semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
504 assert(rc == KERN_SUCCESS);
505 AE_UNUSED(rc);
506 }
507
508 AE_NO_TSAN ~Semaphore() {
509 semaphore_destroy(mach_task_self(), m_sema);
510 }
511
512 bool wait() AE_NO_TSAN {
513 return semaphore_wait(m_sema) == KERN_SUCCESS;
514 }
515
516 bool try_wait() AE_NO_TSAN {
517 return timed_wait(0);
518 }
519
520 bool timed_wait(std::uint64_t timeout_usecs) AE_NO_TSAN {
521 mach_timespec_t ts;
522 ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
523 ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
524
525 // added in OSX 10.10:
526 // https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
527 kern_return_t rc = semaphore_timedwait(m_sema, ts);
528 return rc == KERN_SUCCESS;
529 }
530
531 void signal() AE_NO_TSAN {
532 while (semaphore_signal(m_sema) != KERN_SUCCESS)
533 ;
534 }
535
536 void signal(int count) AE_NO_TSAN {
537 while (count-- > 0) {
538 while (semaphore_signal(m_sema) != KERN_SUCCESS)
539 ;
540 }
541 }
542};
543#elif defined(__unix__)
544//---------------------------------------------------------
545// Semaphore (POSIX, Linux)
546//---------------------------------------------------------
547class Semaphore {
548private:
549 sem_t m_sema;
550
551 Semaphore(const Semaphore& other);
552 Semaphore& operator=(const Semaphore& other);
553
554public:
555 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() {
556 assert(initialCount >= 0);
557 int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
558 assert(rc == 0);
559 AE_UNUSED(rc);
560 }
561
562 AE_NO_TSAN ~Semaphore() {
563 sem_destroy(&m_sema);
564 }
565
566 bool wait() AE_NO_TSAN {
567 // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
568 int rc;
569 do {
570 rc = sem_wait(&m_sema);
571 } while (rc == -1 && errno == EINTR);
572 return rc == 0;
573 }
574
575 bool try_wait() AE_NO_TSAN {
576 int rc;
577 do {
578 rc = sem_trywait(&m_sema);
579 } while (rc == -1 && errno == EINTR);
580 return rc == 0;
581 }
582
583 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN {
584 struct timespec ts;
585 const int usecs_in_1_sec = 1000000;
586 const int nsecs_in_1_sec = 1000000000;
587 clock_gettime(CLOCK_REALTIME, &ts);
588 ts.tv_sec += static_cast<time_t>(usecs / usecs_in_1_sec);
589 ts.tv_nsec += static_cast<long>(usecs % usecs_in_1_sec) * 1000;
590 // sem_timedwait bombs if you have more than 1e9 in tv_nsec
591 // so we have to clean things up before passing it in
592 if (ts.tv_nsec >= nsecs_in_1_sec) {
593 ts.tv_nsec -= nsecs_in_1_sec;
594 ++ts.tv_sec;
595 }
596
597 int rc;
598 do {
599 rc = sem_timedwait(&m_sema, &ts);
600 } while (rc == -1 && errno == EINTR);
601 return rc == 0;
602 }
603
604 void signal() AE_NO_TSAN {
605 while (sem_post(&m_sema) == -1)
606 ;
607 }
608
609 void signal(int count) AE_NO_TSAN {
610 while (count-- > 0) {
611 while (sem_post(&m_sema) == -1)
612 ;
613 }
614 }
615};
616#elif defined(FREERTOS)
617//---------------------------------------------------------
618// Semaphore (FreeRTOS)
619//---------------------------------------------------------
620class Semaphore {
621private:
622 SemaphoreHandle_t m_sema;
623
624 Semaphore(const Semaphore& other);
625 Semaphore& operator=(const Semaphore& other);
626
627public:
628 AE_NO_TSAN Semaphore(int initialCount = 0) : m_sema() {
629 assert(initialCount >= 0);
630 m_sema = xSemaphoreCreateCounting(static_cast<UBaseType_t>(~0ull),
631 static_cast<UBaseType_t>(initialCount));
632 assert(m_sema);
633 }
634
635 AE_NO_TSAN ~Semaphore() {
636 vSemaphoreDelete(m_sema);
637 }
638
639 bool wait() AE_NO_TSAN {
640 return xSemaphoreTake(m_sema, portMAX_DELAY) == pdTRUE;
641 }
642
643 bool try_wait() AE_NO_TSAN {
644 // Note: In an ISR context, if this causes a task to unblock,
645 // the caller won't know about it
646 if (xPortIsInsideInterrupt())
647 return xSemaphoreTakeFromISR(m_sema, NULL) == pdTRUE;
648 return xSemaphoreTake(m_sema, 0) == pdTRUE;
649 }
650
651 bool timed_wait(std::uint64_t usecs) AE_NO_TSAN {
652 std::uint64_t msecs = usecs / 1000;
653 TickType_t ticks = static_cast<TickType_t>(msecs / portTICK_PERIOD_MS);
654 if (ticks == 0)
655 return try_wait();
656 return xSemaphoreTake(m_sema, ticks) == pdTRUE;
657 }
658
659 void signal() AE_NO_TSAN {
660 // Note: In an ISR context, if this causes a task to unblock,
661 // the caller won't know about it
662 BaseType_t rc;
663 if (xPortIsInsideInterrupt())
664 rc = xSemaphoreGiveFromISR(m_sema, NULL);
665 else
666 rc = xSemaphoreGive(m_sema);
667 assert(rc == pdTRUE);
668 AE_UNUSED(rc);
669 }
670
671 void signal(int count) AE_NO_TSAN {
672 while (count-- > 0)
673 signal();
674 }
675};
676#else
677#error Unsupported platform! (No semaphore wrapper available)
678#endif
679
680//---------------------------------------------------------
681// LightweightSemaphore
682//---------------------------------------------------------
683class LightweightSemaphore {
684public:
685 typedef std::make_signed<std::size_t>::type ssize_t;
686
687private:
688 weak_atomic<ssize_t> m_count;
689 Semaphore m_sema;
690
691 bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1) AE_NO_TSAN {
692 ssize_t oldCount;
693 // Is there a better way to set the initial spin count?
694 // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
695 // as threads start hitting the kernel semaphore.
696 int spin = 1024;
697 while (--spin >= 0) {
698 if (m_count.load() > 0) {
699 m_count.fetch_add_acquire(-1);
700 return true;
701 }
702 compiler_fence(memory_order_acquire); // Prevent the compiler from collapsing the loop.
703 }
704 oldCount = m_count.fetch_add_acquire(-1);
705 if (oldCount > 0)
706 return true;
707 if (timeout_usecs < 0) {
708 if (m_sema.wait())
709 return true;
710 }
711 if (timeout_usecs > 0 && m_sema.timed_wait(static_cast<uint64_t>(timeout_usecs)))
712 return true;
713 // At this point, we've timed out waiting for the semaphore, but the
714 // count is still decremented indicating we may still be waiting on
715 // it. So we have to re-adjust the count, but only if the semaphore
716 // wasn't signaled enough times for us too since then. If it was, we
717 // need to release the semaphore too.
718 while (true) {
719 oldCount = m_count.fetch_add_release(1);
720 if (oldCount < 0)
721 return false; // successfully restored things to the way they were
722 // Oh, the producer thread just signaled the semaphore after all. Try again:
723 oldCount = m_count.fetch_add_acquire(-1);
724 if (oldCount > 0 && m_sema.try_wait())
725 return true;
726 }
727 }
728
729public:
730 AE_NO_TSAN LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount), m_sema() {
731 assert(initialCount >= 0);
732 }
733
734 bool tryWait() AE_NO_TSAN {
735 if (m_count.load() > 0) {
736 m_count.fetch_add_acquire(-1);
737 return true;
738 }
739 return false;
740 }
741
742 bool wait() AE_NO_TSAN {
743 return tryWait() || waitWithPartialSpinning();
744 }
745
746 bool wait(std::int64_t timeout_usecs) AE_NO_TSAN {
747 return tryWait() || waitWithPartialSpinning(timeout_usecs);
748 }
749
750 void signal(ssize_t count = 1) AE_NO_TSAN {
751 assert(count >= 0);
752 ssize_t oldCount = m_count.fetch_add_release(count);
753 assert(oldCount >= -1);
754 if (oldCount < 0) {
755 m_sema.signal(1);
756 }
757 }
758
759 std::size_t availableApprox() const AE_NO_TSAN {
760 ssize_t count = m_count.load();
761 return count > 0 ? static_cast<std::size_t>(count) : 0;
762 }
763};
764} // namespace spsc_sema
765} // namespace Common
766
767#if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
768#pragma warning(pop)
769#ifdef __cplusplus_cli
770#pragma managed(pop)
771#endif
772#endif