diff options
| author | 2021-04-07 11:41:31 +0200 | |
|---|---|---|
| committer | 2021-04-07 22:38:52 +0200 | |
| commit | e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305 (patch) | |
| tree | 042497b8c297e874b382f4b695bb601b6ea2d1cf /src | |
| parent | video_core/gpu_thread: Keep the write lock for allocating the fence. (diff) | |
| download | yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.tar.gz yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.tar.xz yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.zip | |
video_core: Use a CV for blocking commands.
There is no need for a busy loop here. Let's just use a condition variable to save some power.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 11 |
2 files changed, 31 insertions, 23 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 9488bf544..7addfbc7b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -56,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 56 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | 56 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 57 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | 57 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); |
| 58 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | 58 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |
| 59 | return; | 59 | ASSERT(state.is_running == false); |
| 60 | } else { | 60 | } else { |
| 61 | UNREACHABLE(); | 61 | UNREACHABLE(); |
| 62 | } | 62 | } |
| 63 | state.signaled_fence.store(next.fence); | 63 | state.signaled_fence.store(next.fence); |
| 64 | if (next.block) { | ||
| 65 | // We have to lock the write_lock to ensure that the condition_variable wait not get a | ||
| 66 | // race between the check and the lock itself. | ||
| 67 | std::lock_guard lk(state.write_lock); | ||
| 68 | state.cv.notify_all(); | ||
| 69 | } | ||
| 64 | } | 70 | } |
| 65 | } | 71 | } |
| 66 | 72 | ||
| @@ -105,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 105 | case Settings::GPUAccuracy::Extreme: { | 111 | case Settings::GPUAccuracy::Extreme: { |
| 106 | auto& gpu = system.GPU(); | 112 | auto& gpu = system.GPU(); |
| 107 | u64 fence = gpu.RequestFlush(addr, size); | 113 | u64 fence = gpu.RequestFlush(addr, size); |
| 108 | PushCommand(GPUTickCommand()); | 114 | PushCommand(GPUTickCommand(), true); |
| 109 | while (fence > gpu.CurrentFlushRequestFence()) { | 115 | ASSERT(fence <= gpu.CurrentFlushRequestFence()); |
| 110 | } | ||
| 111 | break; | 116 | break; |
| 112 | } | 117 | } |
| 113 | default: | 118 | default: |
| @@ -124,18 +129,16 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 124 | rasterizer->OnCPUWrite(addr, size); | 129 | rasterizer->OnCPUWrite(addr, size); |
| 125 | } | 130 | } |
| 126 | 131 | ||
| 127 | void ThreadManager::WaitIdle() const { | ||
| 128 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && | ||
| 129 | state.is_running) { | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | void ThreadManager::ShutDown() { | 132 | void ThreadManager::ShutDown() { |
| 134 | if (!state.is_running) { | 133 | if (!state.is_running) { |
| 135 | return; | 134 | return; |
| 136 | } | 135 | } |
| 137 | 136 | ||
| 138 | state.is_running = false; | 137 | { |
| 138 | std::lock_guard lk(state.write_lock); | ||
| 139 | state.is_running = false; | ||
| 140 | state.cv.notify_all(); | ||
| 141 | } | ||
| 139 | 142 | ||
| 140 | if (!thread.joinable()) { | 143 | if (!thread.joinable()) { |
| 141 | return; | 144 | return; |
| @@ -150,15 +153,21 @@ void ThreadManager::OnCommandListEnd() { | |||
| 150 | PushCommand(OnCommandListEndCommand()); | 153 | PushCommand(OnCommandListEndCommand()); |
| 151 | } | 154 | } |
| 152 | 155 | ||
| 153 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 156 | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { |
| 157 | if (!is_async) { | ||
| 158 | // In synchronous GPU mode, block the caller until the command has executed | ||
| 159 | block = true; | ||
| 160 | } | ||
| 161 | |||
| 154 | std::unique_lock lk(state.write_lock); | 162 | std::unique_lock lk(state.write_lock); |
| 155 | const u64 fence{++state.last_fence}; | 163 | const u64 fence{++state.last_fence}; |
| 156 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 164 | state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); |
| 157 | 165 | ||
| 158 | if (!is_async) { | 166 | if (block) { |
| 159 | // In synchronous GPU mode, block the caller until the command has executed | 167 | state.cv.wait(lk, [this, fence] { |
| 160 | lk.unlock(); | 168 | return fence <= state.signaled_fence.load(std::memory_order_relaxed) || |
| 161 | WaitIdle(); | 169 | !state.is_running; |
| 170 | }); | ||
| 162 | } | 171 | } |
| 163 | 172 | ||
| 164 | return fence; | 173 | return fence; |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cb901c22a..11a648f38 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -90,11 +90,12 @@ using CommandData = | |||
| 90 | struct CommandDataContainer { | 90 | struct CommandDataContainer { |
| 91 | CommandDataContainer() = default; | 91 | CommandDataContainer() = default; |
| 92 | 92 | ||
| 93 | explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) | 93 | explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) |
| 94 | : data{std::move(data_)}, fence{next_fence_} {} | 94 | : data{std::move(data_)}, fence{next_fence_}, block(block_) {} |
| 95 | 95 | ||
| 96 | CommandData data; | 96 | CommandData data; |
| 97 | u64 fence{}; | 97 | u64 fence{}; |
| 98 | bool block{}; | ||
| 98 | }; | 99 | }; |
| 99 | 100 | ||
| 100 | /// Struct used to synchronize the GPU thread | 101 | /// Struct used to synchronize the GPU thread |
| @@ -106,6 +107,7 @@ struct SynchState final { | |||
| 106 | CommandQueue queue; | 107 | CommandQueue queue; |
| 107 | u64 last_fence{}; | 108 | u64 last_fence{}; |
| 108 | std::atomic<u64> signaled_fence{}; | 109 | std::atomic<u64> signaled_fence{}; |
| 110 | std::condition_variable cv; | ||
| 109 | }; | 111 | }; |
| 110 | 112 | ||
| 111 | /// Class used to manage the GPU thread | 113 | /// Class used to manage the GPU thread |
| @@ -140,10 +142,7 @@ public: | |||
| 140 | 142 | ||
| 141 | private: | 143 | private: |
| 142 | /// Pushes a command to be executed by the GPU thread | 144 | /// Pushes a command to be executed by the GPU thread |
| 143 | u64 PushCommand(CommandData&& command_data); | 145 | u64 PushCommand(CommandData&& command_data, bool block = false); |
| 144 | |||
| 145 | // Wait until the gpu thread is idle. | ||
| 146 | void WaitIdle() const; | ||
| 147 | 146 | ||
| 148 | Core::System& system; | 147 | Core::System& system; |
| 149 | const bool is_async; | 148 | const bool is_async; |