diff options
Diffstat (limited to 'src/common/bounded_threadsafe_queue.h')
| -rw-r--r-- | src/common/bounded_threadsafe_queue.h | 319 |
1 files changed, 205 insertions, 114 deletions
diff --git a/src/common/bounded_threadsafe_queue.h b/src/common/bounded_threadsafe_queue.h index 21217801e..bd87aa09b 100644 --- a/src/common/bounded_threadsafe_queue.h +++ b/src/common/bounded_threadsafe_queue.h | |||
| @@ -1,158 +1,249 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright (c) 2020 Erik Rigtorp <erik@rigtorp.se> | 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: MIT | 2 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <atomic> | 6 | #include <atomic> |
| 7 | #include <bit> | ||
| 8 | #include <condition_variable> | 7 | #include <condition_variable> |
| 9 | #include <memory> | 8 | #include <cstddef> |
| 10 | #include <mutex> | 9 | #include <mutex> |
| 11 | #include <new> | 10 | #include <new> |
| 12 | #include <stop_token> | 11 | |
| 13 | #include <type_traits> | 12 | #include "common/polyfill_thread.h" |
| 14 | #include <utility> | ||
| 15 | 13 | ||
| 16 | namespace Common { | 14 | namespace Common { |
| 17 | 15 | ||
| 18 | #if defined(__cpp_lib_hardware_interference_size) | 16 | namespace detail { |
| 19 | constexpr size_t hardware_interference_size = std::hardware_destructive_interference_size; | 17 | constexpr size_t DefaultCapacity = 0x1000; |
| 20 | #else | 18 | } // namespace detail |
| 21 | constexpr size_t hardware_interference_size = 64; | 19 | |
| 22 | #endif | 20 | template <typename T, size_t Capacity = detail::DefaultCapacity> |
| 21 | class SPSCQueue { | ||
| 22 | static_assert((Capacity & (Capacity - 1)) == 0, "Capacity must be a power of two."); | ||
| 23 | 23 | ||
| 24 | template <typename T, size_t capacity = 0x400> | ||
| 25 | class MPSCQueue { | ||
| 26 | public: | 24 | public: |
| 27 | explicit MPSCQueue() : allocator{std::allocator<Slot<T>>()} { | 25 | template <typename... Args> |
| 28 | // Allocate one extra slot to prevent false sharing on the last slot | 26 | bool TryEmplace(Args&&... args) { |
| 29 | slots = allocator.allocate(capacity + 1); | 27 | return Emplace<PushMode::Try>(std::forward<Args>(args)...); |
| 30 | // Allocators are not required to honor alignment for over-aligned types | ||
| 31 | // (see http://eel.is/c++draft/allocator.requirements#10) so we verify | ||
| 32 | // alignment here | ||
| 33 | if (reinterpret_cast<uintptr_t>(slots) % alignof(Slot<T>) != 0) { | ||
| 34 | allocator.deallocate(slots, capacity + 1); | ||
| 35 | throw std::bad_alloc(); | ||
| 36 | } | ||
| 37 | for (size_t i = 0; i < capacity; ++i) { | ||
| 38 | std::construct_at(&slots[i]); | ||
| 39 | } | ||
| 40 | static_assert(std::has_single_bit(capacity), "capacity must be an integer power of 2"); | ||
| 41 | static_assert(alignof(Slot<T>) == hardware_interference_size, | ||
| 42 | "Slot must be aligned to cache line boundary to prevent false sharing"); | ||
| 43 | static_assert(sizeof(Slot<T>) % hardware_interference_size == 0, | ||
| 44 | "Slot size must be a multiple of cache line size to prevent " | ||
| 45 | "false sharing between adjacent slots"); | ||
| 46 | static_assert(sizeof(MPSCQueue) % hardware_interference_size == 0, | ||
| 47 | "Queue size must be a multiple of cache line size to " | ||
| 48 | "prevent false sharing between adjacent queues"); | ||
| 49 | } | ||
| 50 | |||
| 51 | ~MPSCQueue() noexcept { | ||
| 52 | for (size_t i = 0; i < capacity; ++i) { | ||
| 53 | std::destroy_at(&slots[i]); | ||
| 54 | } | ||
| 55 | allocator.deallocate(slots, capacity + 1); | ||
| 56 | } | 28 | } |
| 57 | 29 | ||
| 58 | // The queue must be both non-copyable and non-movable | 30 | template <typename... Args> |
| 59 | MPSCQueue(const MPSCQueue&) = delete; | 31 | void EmplaceWait(Args&&... args) { |
| 60 | MPSCQueue& operator=(const MPSCQueue&) = delete; | 32 | Emplace<PushMode::Wait>(std::forward<Args>(args)...); |
| 33 | } | ||
| 61 | 34 | ||
| 62 | MPSCQueue(MPSCQueue&&) = delete; | 35 | bool TryPop(T& t) { |
| 63 | MPSCQueue& operator=(MPSCQueue&&) = delete; | 36 | return Pop<PopMode::Try>(t); |
| 37 | } | ||
| 64 | 38 | ||
| 65 | void Push(const T& v) noexcept { | 39 | void PopWait(T& t) { |
| 66 | static_assert(std::is_nothrow_copy_constructible_v<T>, | 40 | Pop<PopMode::Wait>(t); |
| 67 | "T must be nothrow copy constructible"); | ||
| 68 | emplace(v); | ||
| 69 | } | 41 | } |
| 70 | 42 | ||
| 71 | template <typename P, typename = std::enable_if_t<std::is_nothrow_constructible_v<T, P&&>>> | 43 | void PopWait(T& t, std::stop_token stop_token) { |
| 72 | void Push(P&& v) noexcept { | 44 | Pop<PopMode::WaitWithStopToken>(t, stop_token); |
| 73 | emplace(std::forward<P>(v)); | ||
| 74 | } | 45 | } |
| 75 | 46 | ||
| 76 | void Pop(T& v, std::stop_token stop) noexcept { | 47 | T PopWait() { |
| 77 | auto const tail = tail_.fetch_add(1); | 48 | T t; |
| 78 | auto& slot = slots[idx(tail)]; | 49 | Pop<PopMode::Wait>(t); |
| 79 | if (!slot.turn.test()) { | 50 | return t; |
| 80 | std::unique_lock lock{cv_mutex}; | 51 | } |
| 81 | cv.wait(lock, stop, [&slot] { return slot.turn.test(); }); | 52 | |
| 82 | } | 53 | T PopWait(std::stop_token stop_token) { |
| 83 | v = slot.move(); | 54 | T t; |
| 84 | slot.destroy(); | 55 | Pop<PopMode::WaitWithStopToken>(t, stop_token); |
| 85 | slot.turn.clear(); | 56 | return t; |
| 86 | slot.turn.notify_one(); | ||
| 87 | } | 57 | } |
| 88 | 58 | ||
| 89 | private: | 59 | private: |
| 90 | template <typename U = T> | 60 | enum class PushMode { |
| 91 | struct Slot { | 61 | Try, |
| 92 | ~Slot() noexcept { | 62 | Wait, |
| 93 | if (turn.test()) { | 63 | Count, |
| 94 | destroy(); | 64 | }; |
| 65 | |||
| 66 | enum class PopMode { | ||
| 67 | Try, | ||
| 68 | Wait, | ||
| 69 | WaitWithStopToken, | ||
| 70 | Count, | ||
| 71 | }; | ||
| 72 | |||
| 73 | template <PushMode Mode, typename... Args> | ||
| 74 | bool Emplace(Args&&... args) { | ||
| 75 | const size_t write_index = m_write_index.load(std::memory_order::relaxed); | ||
| 76 | |||
| 77 | if constexpr (Mode == PushMode::Try) { | ||
| 78 | // Check if we have free slots to write to. | ||
| 79 | if ((write_index - m_read_index.load(std::memory_order::acquire)) == Capacity) { | ||
| 80 | return false; | ||
| 95 | } | 81 | } |
| 82 | } else if constexpr (Mode == PushMode::Wait) { | ||
| 83 | // Wait until we have free slots to write to. | ||
| 84 | std::unique_lock lock{producer_cv_mutex}; | ||
| 85 | producer_cv.wait(lock, [this, write_index] { | ||
| 86 | return (write_index - m_read_index.load(std::memory_order::acquire)) < Capacity; | ||
| 87 | }); | ||
| 88 | } else { | ||
| 89 | static_assert(Mode < PushMode::Count, "Invalid PushMode."); | ||
| 96 | } | 90 | } |
| 97 | 91 | ||
| 98 | template <typename... Args> | 92 | // Determine the position to write to. |
| 99 | void construct(Args&&... args) noexcept { | 93 | const size_t pos = write_index % Capacity; |
| 100 | static_assert(std::is_nothrow_constructible_v<U, Args&&...>, | ||
| 101 | "T must be nothrow constructible with Args&&..."); | ||
| 102 | std::construct_at(reinterpret_cast<U*>(&storage), std::forward<Args>(args)...); | ||
| 103 | } | ||
| 104 | 94 | ||
| 105 | void destroy() noexcept { | 95 | // Emplace into the queue. |
| 106 | static_assert(std::is_nothrow_destructible_v<U>, "T must be nothrow destructible"); | 96 | std::construct_at(std::addressof(m_data[pos]), std::forward<Args>(args)...); |
| 107 | std::destroy_at(reinterpret_cast<U*>(&storage)); | 97 | |
| 108 | } | 98 | // Increment the write index. |
| 99 | ++m_write_index; | ||
| 100 | |||
| 101 | // Notify the consumer that we have pushed into the queue. | ||
| 102 | std::scoped_lock lock{consumer_cv_mutex}; | ||
| 103 | consumer_cv.notify_one(); | ||
| 104 | |||
| 105 | return true; | ||
| 106 | } | ||
| 107 | |||
| 108 | template <PopMode Mode> | ||
| 109 | bool Pop(T& t, [[maybe_unused]] std::stop_token stop_token = {}) { | ||
| 110 | const size_t read_index = m_read_index.load(std::memory_order::relaxed); | ||
| 109 | 111 | ||
| 110 | U&& move() noexcept { | 112 | if constexpr (Mode == PopMode::Try) { |
| 111 | return reinterpret_cast<U&&>(storage); | 113 | // Check if the queue is empty. |
| 114 | if (read_index == m_write_index.load(std::memory_order::acquire)) { | ||
| 115 | return false; | ||
| 116 | } | ||
| 117 | } else if constexpr (Mode == PopMode::Wait) { | ||
| 118 | // Wait until the queue is not empty. | ||
| 119 | std::unique_lock lock{consumer_cv_mutex}; | ||
| 120 | consumer_cv.wait(lock, [this, read_index] { | ||
| 121 | return read_index != m_write_index.load(std::memory_order::acquire); | ||
| 122 | }); | ||
| 123 | } else if constexpr (Mode == PopMode::WaitWithStopToken) { | ||
| 124 | // Wait until the queue is not empty. | ||
| 125 | std::unique_lock lock{consumer_cv_mutex}; | ||
| 126 | Common::CondvarWait(consumer_cv, lock, stop_token, [this, read_index] { | ||
| 127 | return read_index != m_write_index.load(std::memory_order::acquire); | ||
| 128 | }); | ||
| 129 | if (stop_token.stop_requested()) { | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | } else { | ||
| 133 | static_assert(Mode < PopMode::Count, "Invalid PopMode."); | ||
| 112 | } | 134 | } |
| 113 | 135 | ||
| 114 | // Align to avoid false sharing between adjacent slots | 136 | // Determine the position to read from. |
| 115 | alignas(hardware_interference_size) std::atomic_flag turn{}; | 137 | const size_t pos = read_index % Capacity; |
| 116 | struct aligned_store { | 138 | |
| 117 | struct type { | 139 | // Pop the data off the queue, moving it. |
| 118 | alignas(U) unsigned char data[sizeof(U)]; | 140 | t = std::move(m_data[pos]); |
| 119 | }; | 141 | |
| 120 | }; | 142 | // Increment the read index. |
| 121 | typename aligned_store::type storage; | 143 | ++m_read_index; |
| 122 | }; | 144 | |
| 145 | // Notify the producer that we have popped off the queue. | ||
| 146 | std::scoped_lock lock{producer_cv_mutex}; | ||
| 147 | producer_cv.notify_one(); | ||
| 148 | |||
| 149 | return true; | ||
| 150 | } | ||
| 123 | 151 | ||
| 152 | alignas(128) std::atomic_size_t m_read_index{0}; | ||
| 153 | alignas(128) std::atomic_size_t m_write_index{0}; | ||
| 154 | |||
| 155 | std::array<T, Capacity> m_data; | ||
| 156 | |||
| 157 | std::condition_variable_any producer_cv; | ||
| 158 | std::mutex producer_cv_mutex; | ||
| 159 | std::condition_variable_any consumer_cv; | ||
| 160 | std::mutex consumer_cv_mutex; | ||
| 161 | }; | ||
| 162 | |||
| 163 | template <typename T, size_t Capacity = detail::DefaultCapacity> | ||
| 164 | class MPSCQueue { | ||
| 165 | public: | ||
| 124 | template <typename... Args> | 166 | template <typename... Args> |
| 125 | void emplace(Args&&... args) noexcept { | 167 | bool TryEmplace(Args&&... args) { |
| 126 | static_assert(std::is_nothrow_constructible_v<T, Args&&...>, | 168 | std::scoped_lock lock{write_mutex}; |
| 127 | "T must be nothrow constructible with Args&&..."); | 169 | return spsc_queue.TryEmplace(std::forward<Args>(args)...); |
| 128 | auto const head = head_.fetch_add(1); | ||
| 129 | auto& slot = slots[idx(head)]; | ||
| 130 | slot.turn.wait(true); | ||
| 131 | slot.construct(std::forward<Args>(args)...); | ||
| 132 | slot.turn.test_and_set(); | ||
| 133 | cv.notify_one(); | ||
| 134 | } | 170 | } |
| 135 | 171 | ||
| 136 | constexpr size_t idx(size_t i) const noexcept { | 172 | template <typename... Args> |
| 137 | return i & mask; | 173 | void EmplaceWait(Args&&... args) { |
| 174 | std::scoped_lock lock{write_mutex}; | ||
| 175 | spsc_queue.EmplaceWait(std::forward<Args>(args)...); | ||
| 138 | } | 176 | } |
| 139 | 177 | ||
| 140 | static constexpr size_t mask = capacity - 1; | 178 | bool TryPop(T& t) { |
| 179 | return spsc_queue.TryPop(t); | ||
| 180 | } | ||
| 141 | 181 | ||
| 142 | // Align to avoid false sharing between head_ and tail_ | 182 | void PopWait(T& t) { |
| 143 | alignas(hardware_interference_size) std::atomic<size_t> head_{0}; | 183 | spsc_queue.PopWait(t); |
| 144 | alignas(hardware_interference_size) std::atomic<size_t> tail_{0}; | 184 | } |
| 145 | 185 | ||
| 146 | std::mutex cv_mutex; | 186 | void PopWait(T& t, std::stop_token stop_token) { |
| 147 | std::condition_variable_any cv; | 187 | spsc_queue.PopWait(t, stop_token); |
| 188 | } | ||
| 189 | |||
| 190 | T PopWait() { | ||
| 191 | return spsc_queue.PopWait(); | ||
| 192 | } | ||
| 148 | 193 | ||
| 149 | Slot<T>* slots; | 194 | T PopWait(std::stop_token stop_token) { |
| 150 | [[no_unique_address]] std::allocator<Slot<T>> allocator; | 195 | return spsc_queue.PopWait(stop_token); |
| 196 | } | ||
| 151 | 197 | ||
| 152 | static_assert(std::is_nothrow_copy_assignable_v<T> || std::is_nothrow_move_assignable_v<T>, | 198 | private: |
| 153 | "T must be nothrow copy or move assignable"); | 199 | SPSCQueue<T, Capacity> spsc_queue; |
| 200 | std::mutex write_mutex; | ||
| 201 | }; | ||
| 154 | 202 | ||
| 155 | static_assert(std::is_nothrow_destructible_v<T>, "T must be nothrow destructible"); | 203 | template <typename T, size_t Capacity = detail::DefaultCapacity> |
| 204 | class MPMCQueue { | ||
| 205 | public: | ||
| 206 | template <typename... Args> | ||
| 207 | bool TryEmplace(Args&&... args) { | ||
| 208 | std::scoped_lock lock{write_mutex}; | ||
| 209 | return spsc_queue.TryEmplace(std::forward<Args>(args)...); | ||
| 210 | } | ||
| 211 | |||
| 212 | template <typename... Args> | ||
| 213 | void EmplaceWait(Args&&... args) { | ||
| 214 | std::scoped_lock lock{write_mutex}; | ||
| 215 | spsc_queue.EmplaceWait(std::forward<Args>(args)...); | ||
| 216 | } | ||
| 217 | |||
| 218 | bool TryPop(T& t) { | ||
| 219 | std::scoped_lock lock{read_mutex}; | ||
| 220 | return spsc_queue.TryPop(t); | ||
| 221 | } | ||
| 222 | |||
| 223 | void PopWait(T& t) { | ||
| 224 | std::scoped_lock lock{read_mutex}; | ||
| 225 | spsc_queue.PopWait(t); | ||
| 226 | } | ||
| 227 | |||
| 228 | void PopWait(T& t, std::stop_token stop_token) { | ||
| 229 | std::scoped_lock lock{read_mutex}; | ||
| 230 | spsc_queue.PopWait(t, stop_token); | ||
| 231 | } | ||
| 232 | |||
| 233 | T PopWait() { | ||
| 234 | std::scoped_lock lock{read_mutex}; | ||
| 235 | return spsc_queue.PopWait(); | ||
| 236 | } | ||
| 237 | |||
| 238 | T PopWait(std::stop_token stop_token) { | ||
| 239 | std::scoped_lock lock{read_mutex}; | ||
| 240 | return spsc_queue.PopWait(stop_token); | ||
| 241 | } | ||
| 242 | |||
| 243 | private: | ||
| 244 | SPSCQueue<T, Capacity> spsc_queue; | ||
| 245 | std::mutex write_mutex; | ||
| 246 | std::mutex read_mutex; | ||
| 156 | }; | 247 | }; |
| 157 | 248 | ||
| 158 | } // namespace Common | 249 | } // namespace Common |