video_core: Use a CV for blocking commands.

There is no need for a busy loop here. Let's just use a condition variable to save some power.
author: Markus Wick 2021-04-07 11:41:31 +0200
committer: Markus Wick 2021-04-07 22:38:52 +0200
commit: e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305 (patch)
tree: 042497b8c297e874b382f4b695bb601b6ea2d1cf
parent: video_core/gpu_thread: Keep the write lock for allocating the fence. (diff)
download: yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.tar.gz
yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.tar.xz
yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.zip
2 files changed, 31 insertions, 23 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 9488bf544..7addfbc7b 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -56,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
        } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
            rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
        } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
-            return;
+            ASSERT(state.is_running == false);
        } else {
            UNREACHABLE();
        }
        state.signaled_fence.store(next.fence);
+        if (next.block) {
+            // We have to lock the write_lock to ensure that the condition_variable wait not get a
+            // race between the check and the lock itself.
+            std::lock_guard lk(state.write_lock);
+            state.cv.notify_all();
+        }
    }
 }
@@ -105,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
    case Settings::GPUAccuracy::Extreme: {
        auto& gpu = system.GPU();
        u64 fence = gpu.RequestFlush(addr, size);
-        PushCommand(GPUTickCommand());
+        PushCommand(GPUTickCommand(), true);
-        while (fence > gpu.CurrentFlushRequestFence()) {
+        ASSERT(fence <= gpu.CurrentFlushRequestFence());
-        }
        break;
    }
    default:
@@ -124,18 +129,16 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
    rasterizer->OnCPUWrite(addr, size);
 }
-void ThreadManager::WaitIdle() const {
-    while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
-           state.is_running) {
-    }
-}
 void ThreadManager::ShutDown() {
    if (!state.is_running) {
        return;
    }
-    state.is_running = false;
+    {
+        std::lock_guard lk(state.write_lock);
+        state.is_running = false;
+        state.cv.notify_all();
+    }
    if (!thread.joinable()) {
        return;
@@ -150,15 +153,21 @@ void ThreadManager::OnCommandListEnd() {
    PushCommand(OnCommandListEndCommand());
 }
-u64 ThreadManager::PushCommand(CommandData&& command_data) {
+u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
+    if (!is_async) {
+        // In synchronous GPU mode, block the caller until the command has executed
+        block = true;
+    }
    std::unique_lock lk(state.write_lock);
    const u64 fence{++state.last_fence};
-    state.queue.Push(CommandDataContainer(std::move(command_data), fence));
+    state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
-    if (!is_async) {
+    if (block) {
-        // In synchronous GPU mode, block the caller until the command has executed
+        state.cv.wait(lk, [this, fence] {
-        lk.unlock();
+            return fence <= state.signaled_fence.load(std::memory_order_relaxed) ||
-        WaitIdle();
+                   !state.is_running;
+        });
    }
    return fence;
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cb901c22a..11a648f38 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -90,11 +90,12 @@ using CommandData =
 struct CommandDataContainer {
    CommandDataContainer() = default;
-    explicit CommandDataContainer(CommandData&& data_, u64 next_fence_)
+    explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_)
-        : data{std::move(data_)}, fence{next_fence_} {}
+        : data{std::move(data_)}, fence{next_fence_}, block(block_) {}
    CommandData data;
    u64 fence{};
+    bool block{};
 };
 /// Struct used to synchronize the GPU thread
@@ -106,6 +107,7 @@ struct SynchState final {
    CommandQueue queue;
    u64 last_fence{};
    std::atomic<u64> signaled_fence{};
+    std::condition_variable cv;
 };
 /// Class used to manage the GPU thread
@@ -140,10 +142,7 @@ public:
 private:
    /// Pushes a command to be executed by the GPU thread
-    u64 PushCommand(CommandData&& command_data);
+    u64 PushCommand(CommandData&& command_data, bool block = false);
-    // Wait until the gpu thread is idle.
-    void WaitIdle() const;
    Core::System& system;
    const bool is_async;
author	Markus Wick	2021-04-07 11:41:31 +0200
committer	Markus Wick	2021-04-07 22:38:52 +0200
commit	e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305 (patch)
tree	042497b8c297e874b382f4b695bb601b6ea2d1cf
parent	video_core/gpu_thread: Keep the write lock for allocating the fence. (diff)
download	yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.tar.gz yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.tar.xz yuzu-e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305.zip