diff options
| author | 2023-04-15 00:03:48 +0200 | |
|---|---|---|
| committer | 2023-04-23 04:48:50 +0200 | |
| commit | fca72beb2db658e84ceac6e1f46f682bcacf8f25 (patch) | |
| tree | b2139045203240661f924a1cbaec017ca666d16a /src | |
| parent | Merge pull request #10074 from Kelebek1/fermi_blit (diff) | |
| download | yuzu-fca72beb2db658e84ceac6e1f46f682bcacf8f25.tar.gz yuzu-fca72beb2db658e84ceac6e1f46f682bcacf8f25.tar.xz yuzu-fca72beb2db658e84ceac6e1f46f682bcacf8f25.zip | |
Fence Manager: implement async fence management in a sepparate thread.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/fence_manager.h | 137 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_fence_manager.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_fence_manager.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_fence_manager.h | 11 |
5 files changed, 133 insertions, 35 deletions
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c390ac91b..027e663bf 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -4,13 +4,20 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <condition_variable> | ||
| 7 | #include <cstring> | 8 | #include <cstring> |
| 8 | #include <deque> | 9 | #include <deque> |
| 9 | #include <functional> | 10 | #include <functional> |
| 10 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 13 | #include <thread> | ||
| 11 | #include <queue> | 14 | #include <queue> |
| 12 | 15 | ||
| 13 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/microprofile.h" | ||
| 18 | #include "common/scope_exit.h" | ||
| 19 | #include "common/settings.h" | ||
| 20 | #include "common/thread.h" | ||
| 14 | #include "video_core/delayed_destruction_ring.h" | 21 | #include "video_core/delayed_destruction_ring.h" |
| 15 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 16 | #include "video_core/host1x/host1x.h" | 23 | #include "video_core/host1x/host1x.h" |
| @@ -23,15 +30,26 @@ class FenceBase { | |||
| 23 | public: | 30 | public: |
| 24 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} | 31 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} |
| 25 | 32 | ||
| 33 | bool IsStubbed() const { | ||
| 34 | return is_stubbed; | ||
| 35 | } | ||
| 36 | |||
| 26 | protected: | 37 | protected: |
| 27 | bool is_stubbed; | 38 | bool is_stubbed; |
| 28 | }; | 39 | }; |
| 29 | 40 | ||
| 30 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | 41 | template <typename Traits> |
| 31 | class FenceManager { | 42 | class FenceManager { |
| 43 | using TFence = typename Traits::FenceType; | ||
| 44 | using TTextureCache = typename Traits::TextureCacheType; | ||
| 45 | using TBufferCache = typename Traits::BufferCacheType; | ||
| 46 | using TQueryCache = typename Traits::QueryCacheType; | ||
| 47 | static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK; | ||
| 48 | |||
| 32 | public: | 49 | public: |
| 33 | /// Notify the fence manager about a new frame | 50 | /// Notify the fence manager about a new frame |
| 34 | void TickFrame() { | 51 | void TickFrame() { |
| 52 | std::unique_lock lock(ring_guard); | ||
| 35 | delayed_destruction_ring.Tick(); | 53 | delayed_destruction_ring.Tick(); |
| 36 | } | 54 | } |
| 37 | 55 | ||
| @@ -46,17 +64,27 @@ public: | |||
| 46 | } | 64 | } |
| 47 | 65 | ||
| 48 | void SignalFence(std::function<void()>&& func) { | 66 | void SignalFence(std::function<void()>&& func) { |
| 49 | TryReleasePendingFences(); | 67 | if constexpr (!can_async_check) { |
| 68 | TryReleasePendingFences<false>(); | ||
| 69 | } | ||
| 70 | std::function<void()> callback = std::move(func); | ||
| 50 | const bool should_flush = ShouldFlush(); | 71 | const bool should_flush = ShouldFlush(); |
| 51 | CommitAsyncFlushes(); | 72 | CommitAsyncFlushes(); |
| 52 | uncommitted_operations.emplace_back(std::move(func)); | ||
| 53 | CommitOperations(); | ||
| 54 | TFence new_fence = CreateFence(!should_flush); | 73 | TFence new_fence = CreateFence(!should_flush); |
| 55 | fences.push(new_fence); | 74 | if constexpr (can_async_check) { |
| 75 | guard.lock(); | ||
| 76 | } | ||
| 77 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 56 | QueueFence(new_fence); | 78 | QueueFence(new_fence); |
| 79 | callback(); | ||
| 80 | fences.push(std::move(new_fence)); | ||
| 57 | if (should_flush) { | 81 | if (should_flush) { |
| 58 | rasterizer.FlushCommands(); | 82 | rasterizer.FlushCommands(); |
| 59 | } | 83 | } |
| 84 | if constexpr (can_async_check) { | ||
| 85 | guard.unlock(); | ||
| 86 | cv.notify_all(); | ||
| 87 | } | ||
| 60 | } | 88 | } |
| 61 | 89 | ||
| 62 | void SignalSyncPoint(u32 value) { | 90 | void SignalSyncPoint(u32 value) { |
| @@ -66,29 +94,30 @@ public: | |||
| 66 | } | 94 | } |
| 67 | 95 | ||
| 68 | void WaitPendingFences() { | 96 | void WaitPendingFences() { |
| 69 | while (!fences.empty()) { | 97 | if constexpr (!can_async_check) { |
| 70 | TFence& current_fence = fences.front(); | 98 | TryReleasePendingFences<true>(); |
| 71 | if (ShouldWait()) { | ||
| 72 | WaitFence(current_fence); | ||
| 73 | } | ||
| 74 | PopAsyncFlushes(); | ||
| 75 | auto operations = std::move(pending_operations.front()); | ||
| 76 | pending_operations.pop_front(); | ||
| 77 | for (auto& operation : operations) { | ||
| 78 | operation(); | ||
| 79 | } | ||
| 80 | PopFence(); | ||
| 81 | } | 99 | } |
| 82 | } | 100 | } |
| 83 | 101 | ||
| 84 | protected: | 102 | protected: |
| 85 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 103 | explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 86 | TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, | 104 | TTextureCache& texture_cache_, TBufferCache& buffer_cache_, |
| 87 | TQueryCache& query_cache_) | 105 | TQueryCache& query_cache_) |
| 88 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, | 106 | : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, |
| 89 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} | 107 | texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} { |
| 108 | if constexpr (can_async_check) { | ||
| 109 | fence_thread = | ||
| 110 | std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); }); | ||
| 111 | } | ||
| 112 | } | ||
| 90 | 113 | ||
| 91 | virtual ~FenceManager() = default; | 114 | virtual ~FenceManager() { |
| 115 | if constexpr (can_async_check) { | ||
| 116 | fence_thread.request_stop(); | ||
| 117 | cv.notify_all(); | ||
| 118 | fence_thread.join(); | ||
| 119 | } | ||
| 120 | } | ||
| 92 | 121 | ||
| 93 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is | 122 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is |
| 94 | /// true | 123 | /// true |
| @@ -104,15 +133,20 @@ protected: | |||
| 104 | Tegra::GPU& gpu; | 133 | Tegra::GPU& gpu; |
| 105 | Tegra::Host1x::SyncpointManager& syncpoint_manager; | 134 | Tegra::Host1x::SyncpointManager& syncpoint_manager; |
| 106 | TTextureCache& texture_cache; | 135 | TTextureCache& texture_cache; |
| 107 | TTBufferCache& buffer_cache; | 136 | TBufferCache& buffer_cache; |
| 108 | TQueryCache& query_cache; | 137 | TQueryCache& query_cache; |
| 109 | 138 | ||
| 110 | private: | 139 | private: |
| 140 | template <bool force_wait> | ||
| 111 | void TryReleasePendingFences() { | 141 | void TryReleasePendingFences() { |
| 112 | while (!fences.empty()) { | 142 | while (!fences.empty()) { |
| 113 | TFence& current_fence = fences.front(); | 143 | TFence& current_fence = fences.front(); |
| 114 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | 144 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { |
| 115 | return; | 145 | if constexpr (force_wait) { |
| 146 | WaitFence(current_fence); | ||
| 147 | } else { | ||
| 148 | return; | ||
| 149 | } | ||
| 116 | } | 150 | } |
| 117 | PopAsyncFlushes(); | 151 | PopAsyncFlushes(); |
| 118 | auto operations = std::move(pending_operations.front()); | 152 | auto operations = std::move(pending_operations.front()); |
| @@ -120,7 +154,49 @@ private: | |||
| 120 | for (auto& operation : operations) { | 154 | for (auto& operation : operations) { |
| 121 | operation(); | 155 | operation(); |
| 122 | } | 156 | } |
| 123 | PopFence(); | 157 | { |
| 158 | std::unique_lock lock(ring_guard); | ||
| 159 | delayed_destruction_ring.Push(std::move(current_fence)); | ||
| 160 | } | ||
| 161 | fences.pop(); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | void ReleaseThreadFunc(std::stop_token stop_token) { | ||
| 166 | std::string name = "GPUFencingThread"; | ||
| 167 | MicroProfileOnThreadCreate(name.c_str()); | ||
| 168 | |||
| 169 | // Cleanup | ||
| 170 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | ||
| 171 | |||
| 172 | Common::SetCurrentThreadName(name.c_str()); | ||
| 173 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | ||
| 174 | |||
| 175 | TFence current_fence; | ||
| 176 | std::deque<std::function<void()>> current_operations; | ||
| 177 | while (!stop_token.stop_requested()) { | ||
| 178 | { | ||
| 179 | std::unique_lock lock(guard); | ||
| 180 | cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); }); | ||
| 181 | if (stop_token.stop_requested()) [[unlikely]] { | ||
| 182 | return; | ||
| 183 | } | ||
| 184 | current_fence = std::move(fences.front()); | ||
| 185 | current_operations = std::move(pending_operations.front()); | ||
| 186 | fences.pop(); | ||
| 187 | pending_operations.pop_front(); | ||
| 188 | } | ||
| 189 | if (!current_fence->IsStubbed()) { | ||
| 190 | WaitFence(current_fence); | ||
| 191 | } | ||
| 192 | PopAsyncFlushes(); | ||
| 193 | for (auto& operation : current_operations) { | ||
| 194 | operation(); | ||
| 195 | } | ||
| 196 | { | ||
| 197 | std::unique_lock lock(ring_guard); | ||
| 198 | delayed_destruction_ring.Push(std::move(current_fence)); | ||
| 199 | } | ||
| 124 | } | 200 | } |
| 125 | } | 201 | } |
| 126 | 202 | ||
| @@ -154,19 +230,16 @@ private: | |||
| 154 | query_cache.CommitAsyncFlushes(); | 230 | query_cache.CommitAsyncFlushes(); |
| 155 | } | 231 | } |
| 156 | 232 | ||
| 157 | void PopFence() { | ||
| 158 | delayed_destruction_ring.Push(std::move(fences.front())); | ||
| 159 | fences.pop(); | ||
| 160 | } | ||
| 161 | |||
| 162 | void CommitOperations() { | ||
| 163 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 164 | } | ||
| 165 | |||
| 166 | std::queue<TFence> fences; | 233 | std::queue<TFence> fences; |
| 167 | std::deque<std::function<void()>> uncommitted_operations; | 234 | std::deque<std::function<void()>> uncommitted_operations; |
| 168 | std::deque<std::deque<std::function<void()>>> pending_operations; | 235 | std::deque<std::deque<std::function<void()>>> pending_operations; |
| 169 | 236 | ||
| 237 | std::mutex guard; | ||
| 238 | std::mutex ring_guard; | ||
| 239 | std::condition_variable cv; | ||
| 240 | |||
| 241 | std::jthread fence_thread; | ||
| 242 | |||
| 170 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | 243 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; |
| 171 | }; | 244 | }; |
| 172 | 245 | ||
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 8906ba6d8..cd339b99d 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -173,15 +173,18 @@ public: | |||
| 173 | } | 173 | } |
| 174 | 174 | ||
| 175 | void CommitAsyncFlushes() { | 175 | void CommitAsyncFlushes() { |
| 176 | std::unique_lock lock{mutex}; | ||
| 176 | committed_flushes.push_back(uncommitted_flushes); | 177 | committed_flushes.push_back(uncommitted_flushes); |
| 177 | uncommitted_flushes.reset(); | 178 | uncommitted_flushes.reset(); |
| 178 | } | 179 | } |
| 179 | 180 | ||
| 180 | bool HasUncommittedFlushes() const { | 181 | bool HasUncommittedFlushes() const { |
| 182 | std::unique_lock lock{mutex}; | ||
| 181 | return uncommitted_flushes != nullptr; | 183 | return uncommitted_flushes != nullptr; |
| 182 | } | 184 | } |
| 183 | 185 | ||
| 184 | bool ShouldWaitAsyncFlushes() const { | 186 | bool ShouldWaitAsyncFlushes() const { |
| 187 | std::unique_lock lock{mutex}; | ||
| 185 | if (committed_flushes.empty()) { | 188 | if (committed_flushes.empty()) { |
| 186 | return false; | 189 | return false; |
| 187 | } | 190 | } |
| @@ -189,6 +192,7 @@ public: | |||
| 189 | } | 192 | } |
| 190 | 193 | ||
| 191 | void PopAsyncFlushes() { | 194 | void PopAsyncFlushes() { |
| 195 | std::unique_lock lock{mutex}; | ||
| 192 | if (committed_flushes.empty()) { | 196 | if (committed_flushes.empty()) { |
| 193 | return; | 197 | return; |
| 194 | } | 198 | } |
| @@ -265,7 +269,7 @@ private: | |||
| 265 | 269 | ||
| 266 | VideoCore::RasterizerInterface& rasterizer; | 270 | VideoCore::RasterizerInterface& rasterizer; |
| 267 | 271 | ||
| 268 | std::recursive_mutex mutex; | 272 | mutable std::recursive_mutex mutex; |
| 269 | 273 | ||
| 270 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | 274 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; |
| 271 | 275 | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index f1446e732..e21b19dcc 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -30,7 +30,17 @@ private: | |||
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | using Fence = std::shared_ptr<GLInnerFence>; | 32 | using Fence = std::shared_ptr<GLInnerFence>; |
| 33 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | 33 | |
| 34 | struct FenceManagerParams { | ||
| 35 | using FenceType = Fence; | ||
| 36 | using BufferCacheType = BufferCache; | ||
| 37 | using TextureCacheType = TextureCache; | ||
| 38 | using QueryCacheType = QueryCache; | ||
| 39 | |||
| 40 | static constexpr bool HAS_ASYNC_CHECK = false; | ||
| 41 | }; | ||
| 42 | |||
| 43 | using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; | ||
| 34 | 44 | ||
| 35 | class FenceManagerOpenGL final : public GenericFenceManager { | 45 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 36 | public: | 46 | public: |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 0214b103a..3bba8aeb0 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -5,10 +5,12 @@ | |||
| 5 | 5 | ||
| 6 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 6 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 7 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 7 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 8 | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 9 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 9 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 10 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 10 | #include "video_core/vulkan_common/vulkan_device.h" | 11 | #include "video_core/vulkan_common/vulkan_device.h" |
| 11 | 12 | ||
| 13 | |||
| 12 | namespace Vulkan { | 14 | namespace Vulkan { |
| 13 | 15 | ||
| 14 | InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_) | 16 | InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_) |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 7fe2afcd9..145359d4e 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -40,7 +40,16 @@ private: | |||
| 40 | }; | 40 | }; |
| 41 | using Fence = std::shared_ptr<InnerFence>; | 41 | using Fence = std::shared_ptr<InnerFence>; |
| 42 | 42 | ||
| 43 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; | 43 | struct FenceManagerParams { |
| 44 | using FenceType = Fence; | ||
| 45 | using BufferCacheType = BufferCache; | ||
| 46 | using TextureCacheType = TextureCache; | ||
| 47 | using QueryCacheType = QueryCache; | ||
| 48 | |||
| 49 | static constexpr bool HAS_ASYNC_CHECK = true; | ||
| 50 | }; | ||
| 51 | |||
| 52 | using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>; | ||
| 44 | 53 | ||
| 45 | class FenceManager final : public GenericFenceManager { | 54 | class FenceManager final : public GenericFenceManager { |
| 46 | public: | 55 | public: |