diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/threadsafe_queue.h | 10 | ||||
| -rw-r--r-- | src/core/core.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 62 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 15 |
6 files changed, 64 insertions, 33 deletions
diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h index a4647314a..ad04df8ca 100644 --- a/src/common/threadsafe_queue.h +++ b/src/common/threadsafe_queue.h | |||
| @@ -83,11 +83,15 @@ public: | |||
| 83 | return true; | 83 | return true; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | T PopWait() { | 86 | void Wait() { |
| 87 | if (Empty()) { | 87 | if (Empty()) { |
| 88 | std::unique_lock lock{cv_mutex}; | 88 | std::unique_lock lock{cv_mutex}; |
| 89 | cv.wait(lock, [this]() { return !Empty(); }); | 89 | cv.wait(lock, [this]() { return !Empty(); }); |
| 90 | } | 90 | } |
| 91 | } | ||
| 92 | |||
| 93 | T PopWait() { | ||
| 94 | Wait(); | ||
| 91 | T t; | 95 | T t; |
| 92 | Pop(t); | 96 | Pop(t); |
| 93 | return t; | 97 | return t; |
| @@ -156,6 +160,10 @@ public: | |||
| 156 | return spsc_queue.Pop(t); | 160 | return spsc_queue.Pop(t); |
| 157 | } | 161 | } |
| 158 | 162 | ||
| 163 | void Wait() { | ||
| 164 | spsc_queue.Wait(); | ||
| 165 | } | ||
| 166 | |||
| 159 | T PopWait() { | 167 | T PopWait() { |
| 160 | return spsc_queue.PopWait(); | 168 | return spsc_queue.PopWait(); |
| 161 | } | 169 | } |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 305f56ff1..56b47e671 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -296,7 +296,7 @@ struct System::Impl { | |||
| 296 | exit_lock = false; | 296 | exit_lock = false; |
| 297 | 297 | ||
| 298 | if (gpu_core) { | 298 | if (gpu_core) { |
| 299 | gpu_core->WaitIdle(); | 299 | gpu_core->ShutDown(); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | services.reset(); | 302 | services.reset(); |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index c61f44619..009c6f574 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -517,8 +517,8 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | |||
| 517 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 517 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 518 | } | 518 | } |
| 519 | 519 | ||
| 520 | void GPU::WaitIdle() const { | 520 | void GPU::ShutDown() { |
| 521 | gpu_thread.WaitIdle(); | 521 | gpu_thread.ShutDown(); |
| 522 | } | 522 | } |
| 523 | 523 | ||
| 524 | void GPU::OnCommandListEnd() { | 524 | void GPU::OnCommandListEnd() { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b2ee45496..ecab35d3b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -219,8 +219,8 @@ public: | |||
| 219 | return *shader_notify; | 219 | return *shader_notify; |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | // Waits for the GPU to finish working | 222 | // Stops the GPU execution and waits for the GPU to finish working |
| 223 | void WaitIdle() const; | 223 | void ShutDown(); |
| 224 | 224 | ||
| 225 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 225 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 226 | void WaitFence(u32 syncpoint_id, u32 value); | 226 | void WaitFence(u32 syncpoint_id, u32 value); |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 99353f15f..7addfbc7b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -29,8 +29,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 29 | system.RegisterHostThread(); | 29 | system.RegisterHostThread(); |
| 30 | 30 | ||
| 31 | // Wait for first GPU command before acquiring the window context | 31 | // Wait for first GPU command before acquiring the window context |
| 32 | while (state.queue.Empty()) | 32 | state.queue.Wait(); |
| 33 | ; | ||
| 34 | 33 | ||
| 35 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | 34 | // If emulation was stopped during disk shader loading, abort before trying to acquire context |
| 36 | if (!state.is_running) { | 35 | if (!state.is_running) { |
| @@ -57,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 57 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | 56 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 58 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | 57 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); |
| 59 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | 58 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |
| 60 | return; | 59 | ASSERT(state.is_running == false); |
| 61 | } else { | 60 | } else { |
| 62 | UNREACHABLE(); | 61 | UNREACHABLE(); |
| 63 | } | 62 | } |
| 64 | state.signaled_fence.store(next.fence); | 63 | state.signaled_fence.store(next.fence); |
| 64 | if (next.block) { | ||
| 65 | // We have to lock the write_lock to ensure that the condition_variable wait not get a | ||
| 66 | // race between the check and the lock itself. | ||
| 67 | std::lock_guard lk(state.write_lock); | ||
| 68 | state.cv.notify_all(); | ||
| 69 | } | ||
| 65 | } | 70 | } |
| 66 | } | 71 | } |
| 67 | 72 | ||
| @@ -69,13 +74,7 @@ ThreadManager::ThreadManager(Core::System& system_, bool is_async_) | |||
| 69 | : system{system_}, is_async{is_async_} {} | 74 | : system{system_}, is_async{is_async_} {} |
| 70 | 75 | ||
| 71 | ThreadManager::~ThreadManager() { | 76 | ThreadManager::~ThreadManager() { |
| 72 | if (!thread.joinable()) { | 77 | ShutDown(); |
| 73 | return; | ||
| 74 | } | ||
| 75 | |||
| 76 | // Notify GPU thread that a shutdown is pending | ||
| 77 | PushCommand(EndProcessingCommand()); | ||
| 78 | thread.join(); | ||
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 80 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| @@ -112,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 112 | case Settings::GPUAccuracy::Extreme: { | 111 | case Settings::GPUAccuracy::Extreme: { |
| 113 | auto& gpu = system.GPU(); | 112 | auto& gpu = system.GPU(); |
| 114 | u64 fence = gpu.RequestFlush(addr, size); | 113 | u64 fence = gpu.RequestFlush(addr, size); |
| 115 | PushCommand(GPUTickCommand()); | 114 | PushCommand(GPUTickCommand(), true); |
| 116 | while (fence > gpu.CurrentFlushRequestFence()) { | 115 | ASSERT(fence <= gpu.CurrentFlushRequestFence()); |
| 117 | } | ||
| 118 | break; | 116 | break; |
| 119 | } | 117 | } |
| 120 | default: | 118 | default: |
| @@ -131,23 +129,45 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 131 | rasterizer->OnCPUWrite(addr, size); | 129 | rasterizer->OnCPUWrite(addr, size); |
| 132 | } | 130 | } |
| 133 | 131 | ||
| 134 | void ThreadManager::WaitIdle() const { | 132 | void ThreadManager::ShutDown() { |
| 135 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && | 133 | if (!state.is_running) { |
| 136 | system.IsPoweredOn()) { | 134 | return; |
| 137 | } | 135 | } |
| 136 | |||
| 137 | { | ||
| 138 | std::lock_guard lk(state.write_lock); | ||
| 139 | state.is_running = false; | ||
| 140 | state.cv.notify_all(); | ||
| 141 | } | ||
| 142 | |||
| 143 | if (!thread.joinable()) { | ||
| 144 | return; | ||
| 145 | } | ||
| 146 | |||
| 147 | // Notify GPU thread that a shutdown is pending | ||
| 148 | PushCommand(EndProcessingCommand()); | ||
| 149 | thread.join(); | ||
| 138 | } | 150 | } |
| 139 | 151 | ||
| 140 | void ThreadManager::OnCommandListEnd() { | 152 | void ThreadManager::OnCommandListEnd() { |
| 141 | PushCommand(OnCommandListEndCommand()); | 153 | PushCommand(OnCommandListEndCommand()); |
| 142 | } | 154 | } |
| 143 | 155 | ||
| 144 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 156 | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { |
| 145 | const u64 fence{++state.last_fence}; | ||
| 146 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | ||
| 147 | |||
| 148 | if (!is_async) { | 157 | if (!is_async) { |
| 149 | // In synchronous GPU mode, block the caller until the command has executed | 158 | // In synchronous GPU mode, block the caller until the command has executed |
| 150 | WaitIdle(); | 159 | block = true; |
| 160 | } | ||
| 161 | |||
| 162 | std::unique_lock lk(state.write_lock); | ||
| 163 | const u64 fence{++state.last_fence}; | ||
| 164 | state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); | ||
| 165 | |||
| 166 | if (block) { | ||
| 167 | state.cv.wait(lk, [this, fence] { | ||
| 168 | return fence <= state.signaled_fence.load(std::memory_order_relaxed) || | ||
| 169 | !state.is_running; | ||
| 170 | }); | ||
| 151 | } | 171 | } |
| 152 | 172 | ||
| 153 | return fence; | 173 | return fence; |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 18269e51c..11a648f38 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -90,21 +90,24 @@ using CommandData = | |||
| 90 | struct CommandDataContainer { | 90 | struct CommandDataContainer { |
| 91 | CommandDataContainer() = default; | 91 | CommandDataContainer() = default; |
| 92 | 92 | ||
| 93 | explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) | 93 | explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) |
| 94 | : data{std::move(data_)}, fence{next_fence_} {} | 94 | : data{std::move(data_)}, fence{next_fence_}, block(block_) {} |
| 95 | 95 | ||
| 96 | CommandData data; | 96 | CommandData data; |
| 97 | u64 fence{}; | 97 | u64 fence{}; |
| 98 | bool block{}; | ||
| 98 | }; | 99 | }; |
| 99 | 100 | ||
| 100 | /// Struct used to synchronize the GPU thread | 101 | /// Struct used to synchronize the GPU thread |
| 101 | struct SynchState final { | 102 | struct SynchState final { |
| 102 | std::atomic_bool is_running{true}; | 103 | std::atomic_bool is_running{true}; |
| 103 | 104 | ||
| 104 | using CommandQueue = Common::MPSCQueue<CommandDataContainer>; | 105 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 106 | std::mutex write_lock; | ||
| 105 | CommandQueue queue; | 107 | CommandQueue queue; |
| 106 | u64 last_fence{}; | 108 | u64 last_fence{}; |
| 107 | std::atomic<u64> signaled_fence{}; | 109 | std::atomic<u64> signaled_fence{}; |
| 110 | std::condition_variable cv; | ||
| 108 | }; | 111 | }; |
| 109 | 112 | ||
| 110 | /// Class used to manage the GPU thread | 113 | /// Class used to manage the GPU thread |
| @@ -132,14 +135,14 @@ public: | |||
| 132 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 135 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 133 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 136 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 134 | 137 | ||
| 135 | // Wait until the gpu thread is idle. | 138 | // Stops the GPU execution and waits for the GPU to finish working |
| 136 | void WaitIdle() const; | 139 | void ShutDown(); |
| 137 | 140 | ||
| 138 | void OnCommandListEnd(); | 141 | void OnCommandListEnd(); |
| 139 | 142 | ||
| 140 | private: | 143 | private: |
| 141 | /// Pushes a command to be executed by the GPU thread | 144 | /// Pushes a command to be executed by the GPU thread |
| 142 | u64 PushCommand(CommandData&& command_data); | 145 | u64 PushCommand(CommandData&& command_data, bool block = false); |
| 143 | 146 | ||
| 144 | Core::System& system; | 147 | Core::System& system; |
| 145 | const bool is_async; | 148 | const bool is_async; |