diff options
| author | 2019-06-10 08:36:22 -0400 | |
|---|---|---|
| committer | 2019-07-05 15:49:20 -0400 | |
| commit | f2e026a1d8fb2384c1ece24e6dd32062b4f390a2 (patch) | |
| tree | 354bc916012ca989a00da14ee4ab7770b07710a8 | |
| parent | nv_host_ctrl: Make Sync GPU variant always return synced result. (diff) | |
| download | yuzu-f2e026a1d8fb2384c1ece24e6dd32062b4f390a2.tar.gz yuzu-f2e026a1d8fb2384c1ece24e6dd32062b4f390a2.tar.xz yuzu-f2e026a1d8fb2384c1ece24e6dd32062b4f390a2.zip | |
gpu_asynch: Simplify synchronization to a simpler consumer->producer scheme.
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 32 |
2 files changed, 3 insertions, 47 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 692655395..b87938fdd 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -21,7 +21,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 21 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 22 | 22 | ||
| 23 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| 24 | state.WaitForCommands(); | 24 | while (state.queue.Empty()); |
| 25 | 25 | ||
| 26 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | 26 | // If emulation was stopped during disk shader loading, abort before trying to acquire context |
| 27 | if (!state.is_running) { | 27 | if (!state.is_running) { |
| @@ -32,7 +32,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 32 | 32 | ||
| 33 | CommandDataContainer next; | 33 | CommandDataContainer next; |
| 34 | while (state.is_running) { | 34 | while (state.is_running) { |
| 35 | state.WaitForCommands(); | ||
| 36 | while (!state.queue.Empty()) { | 35 | while (!state.queue.Empty()) { |
| 37 | state.queue.Pop(next); | 36 | state.queue.Pop(next); |
| 38 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | 37 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { |
| @@ -49,8 +48,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 49 | } else { | 48 | } else { |
| 50 | UNREACHABLE(); | 49 | UNREACHABLE(); |
| 51 | } | 50 | } |
| 52 | state.signaled_fence = next.fence; | 51 | state.signaled_fence.store(next.fence); |
| 53 | state.TrySynchronize(); | ||
| 54 | } | 52 | } |
| 55 | } | 53 | } |
| 56 | } | 54 | } |
| @@ -100,22 +98,12 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 100 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 101 | const u64 fence{++state.last_fence}; | 99 | const u64 fence{++state.last_fence}; |
| 102 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 103 | state.SignalCommands(); | ||
| 104 | return fence; | 101 | return fence; |
| 105 | } | 102 | } |
| 106 | 103 | ||
| 107 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 104 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 108 | void SynchState::WaitForSynchronization(u64 fence) { | 105 | void SynchState::WaitForSynchronization(u64 fence) { |
| 109 | if (signaled_fence >= fence) { | 106 | while (signaled_fence.load() < fence); |
| 110 | return; | ||
| 111 | } | ||
| 112 | |||
| 113 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 114 | { | ||
| 115 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 116 | std::unique_lock lock{synchronization_mutex}; | ||
| 117 | synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); | ||
| 118 | } | ||
| 119 | } | 107 | } |
| 120 | 108 | ||
| 121 | } // namespace VideoCommon::GPUThread | 109 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 05a168a72..1d9d0c39e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -88,41 +88,9 @@ struct CommandDataContainer { | |||
| 88 | /// Struct used to synchronize the GPU thread | 88 | /// Struct used to synchronize the GPU thread |
| 89 | struct SynchState final { | 89 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 90 | std::atomic_bool is_running{true}; |
| 91 | std::atomic_int queued_frame_count{}; | ||
| 92 | std::mutex synchronization_mutex; | ||
| 93 | std::mutex commands_mutex; | ||
| 94 | std::condition_variable commands_condition; | ||
| 95 | std::condition_variable synchronization_condition; | ||
| 96 | |||
| 97 | /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU | ||
| 98 | /// synchronized. This is entirely empirical. | ||
| 99 | bool IsSynchronized() const { | ||
| 100 | constexpr std::size_t max_queue_gap{5}; | ||
| 101 | return queue.Size() <= max_queue_gap; | ||
| 102 | } | ||
| 103 | |||
| 104 | void TrySynchronize() { | ||
| 105 | if (IsSynchronized()) { | ||
| 106 | std::lock_guard lock{synchronization_mutex}; | ||
| 107 | synchronization_condition.notify_one(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | 91 | ||
| 111 | void WaitForSynchronization(u64 fence); | 92 | void WaitForSynchronization(u64 fence); |
| 112 | 93 | ||
| 113 | void SignalCommands() { | ||
| 114 | if (queue.Empty()) { | ||
| 115 | return; | ||
| 116 | } | ||
| 117 | |||
| 118 | commands_condition.notify_one(); | ||
| 119 | } | ||
| 120 | |||
| 121 | void WaitForCommands() { | ||
| 122 | std::unique_lock lock{commands_mutex}; | ||
| 123 | commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||
| 124 | } | ||
| 125 | |||
| 126 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 127 | CommandQueue queue; | 95 | CommandQueue queue; |
| 128 | u64 last_fence{}; | 96 | u64 last_fence{}; |