diff options
| author | 2019-04-04 22:38:04 -0400 | |
|---|---|---|
| committer | 2019-04-04 22:38:04 -0400 | |
| commit | 66be5150d6d201e3f8ca6e5e09968f052df4beb1 (patch) | |
| tree | ff6a45a1b6dce02eadd9f432cb1b8bda90f66685 /src/video_core/gpu_thread.h | |
| parent | Merge pull request #2292 from lioncash/nacp (diff) | |
| parent | gpu_thread: Improve synchronization by using CoreTiming. (diff) | |
| download | yuzu-66be5150d6d201e3f8ca6e5e09968f052df4beb1.tar.gz yuzu-66be5150d6d201e3f8ca6e5e09968f052df4beb1.tar.xz yuzu-66be5150d6d201e3f8ca6e5e09968f052df4beb1.zip | |
Merge pull request #2282 from bunnei/gpu-asynch-v2
gpu_thread: Improve synchronization by using CoreTiming.
Diffstat (limited to 'src/video_core/gpu_thread.h')
| -rw-r--r-- | src/video_core/gpu_thread.h | 71 |
1 files changed, 31 insertions, 40 deletions
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 70acb2e79..62bcea5bb 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -19,9 +19,12 @@ struct FramebufferConfig; | |||
| 19 | class DmaPusher; | 19 | class DmaPusher; |
| 20 | } // namespace Tegra | 20 | } // namespace Tegra |
| 21 | 21 | ||
| 22 | namespace VideoCore { | 22 | namespace Core { |
| 23 | class RendererBase; | 23 | class System; |
| 24 | } // namespace VideoCore | 24 | namespace Timing { |
| 25 | struct EventType; | ||
| 26 | } // namespace Timing | ||
| 27 | } // namespace Core | ||
| 25 | 28 | ||
| 26 | namespace VideoCommon::GPUThread { | 29 | namespace VideoCommon::GPUThread { |
| 27 | 30 | ||
| @@ -75,63 +78,47 @@ using CommandData = | |||
| 75 | struct CommandDataContainer { | 78 | struct CommandDataContainer { |
| 76 | CommandDataContainer() = default; | 79 | CommandDataContainer() = default; |
| 77 | 80 | ||
| 78 | CommandDataContainer(CommandData&& data) : data{std::move(data)} {} | 81 | CommandDataContainer(CommandData&& data, u64 next_fence) |
| 82 | : data{std::move(data)}, fence{next_fence} {} | ||
| 79 | 83 | ||
| 80 | CommandDataContainer& operator=(const CommandDataContainer& t) { | 84 | CommandDataContainer& operator=(const CommandDataContainer& t) { |
| 81 | data = std::move(t.data); | 85 | data = std::move(t.data); |
| 86 | fence = t.fence; | ||
| 82 | return *this; | 87 | return *this; |
| 83 | } | 88 | } |
| 84 | 89 | ||
| 85 | CommandData data; | 90 | CommandData data; |
| 91 | u64 fence{}; | ||
| 86 | }; | 92 | }; |
| 87 | 93 | ||
| 88 | /// Struct used to synchronize the GPU thread | 94 | /// Struct used to synchronize the GPU thread |
| 89 | struct SynchState final { | 95 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 96 | std::atomic_bool is_running{true}; |
| 91 | std::atomic_int queued_frame_count{}; | 97 | std::atomic_int queued_frame_count{}; |
| 92 | std::mutex frames_mutex; | 98 | std::mutex synchronization_mutex; |
| 93 | std::mutex commands_mutex; | 99 | std::mutex commands_mutex; |
| 94 | std::condition_variable commands_condition; | 100 | std::condition_variable commands_condition; |
| 95 | std::condition_variable frames_condition; | 101 | std::condition_variable synchronization_condition; |
| 96 | 102 | ||
| 97 | void IncrementFramesCounter() { | 103 | /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU |
| 98 | std::lock_guard lock{frames_mutex}; | 104 | /// synchronized. This is entirely empirical. |
| 99 | ++queued_frame_count; | 105 | bool IsSynchronized() const { |
| 106 | constexpr std::size_t max_queue_gap{5}; | ||
| 107 | return queue.Size() <= max_queue_gap; | ||
| 100 | } | 108 | } |
| 101 | 109 | ||
| 102 | void DecrementFramesCounter() { | 110 | void TrySynchronize() { |
| 103 | { | 111 | if (IsSynchronized()) { |
| 104 | std::lock_guard lock{frames_mutex}; | 112 | std::lock_guard<std::mutex> lock{synchronization_mutex}; |
| 105 | --queued_frame_count; | 113 | synchronization_condition.notify_one(); |
| 106 | |||
| 107 | if (queued_frame_count) { | ||
| 108 | return; | ||
| 109 | } | ||
| 110 | } | 114 | } |
| 111 | frames_condition.notify_one(); | ||
| 112 | } | 115 | } |
| 113 | 116 | ||
| 114 | void WaitForFrames() { | 117 | void WaitForSynchronization(u64 fence); |
| 115 | { | ||
| 116 | std::lock_guard lock{frames_mutex}; | ||
| 117 | if (!queued_frame_count) { | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 123 | { | ||
| 124 | std::unique_lock lock{frames_mutex}; | ||
| 125 | frames_condition.wait(lock, [this] { return !queued_frame_count; }); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | 118 | ||
| 129 | void SignalCommands() { | 119 | void SignalCommands() { |
| 130 | { | 120 | if (queue.Empty()) { |
| 131 | std::unique_lock lock{commands_mutex}; | 121 | return; |
| 132 | if (queue.Empty()) { | ||
| 133 | return; | ||
| 134 | } | ||
| 135 | } | 122 | } |
| 136 | 123 | ||
| 137 | commands_condition.notify_one(); | 124 | commands_condition.notify_one(); |
| @@ -144,12 +131,15 @@ struct SynchState final { | |||
| 144 | 131 | ||
| 145 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 132 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 146 | CommandQueue queue; | 133 | CommandQueue queue; |
| 134 | u64 last_fence{}; | ||
| 135 | std::atomic<u64> signaled_fence{}; | ||
| 147 | }; | 136 | }; |
| 148 | 137 | ||
| 149 | /// Class used to manage the GPU thread | 138 | /// Class used to manage the GPU thread |
| 150 | class ThreadManager final { | 139 | class ThreadManager final { |
| 151 | public: | 140 | public: |
| 152 | explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); | 141 | explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, |
| 142 | Tegra::DmaPusher& dma_pusher); | ||
| 153 | ~ThreadManager(); | 143 | ~ThreadManager(); |
| 154 | 144 | ||
| 155 | /// Push GPU command entries to be processed | 145 | /// Push GPU command entries to be processed |
| @@ -170,11 +160,12 @@ public: | |||
| 170 | 160 | ||
| 171 | private: | 161 | private: |
| 172 | /// Pushes a command to be executed by the GPU thread | 162 | /// Pushes a command to be executed by the GPU thread |
| 173 | void PushCommand(CommandData&& command_data); | 163 | u64 PushCommand(CommandData&& command_data); |
| 174 | 164 | ||
| 175 | private: | 165 | private: |
| 176 | SynchState state; | 166 | SynchState state; |
| 177 | VideoCore::RendererBase& renderer; | 167 | Core::System& system; |
| 168 | Core::Timing::EventType* synchronization_event{}; | ||
| 178 | std::thread thread; | 169 | std::thread thread; |
| 179 | std::thread::id thread_id; | 170 | std::thread::id thread_id; |
| 180 | }; | 171 | }; |