diff options
| author | 2019-09-25 19:43:23 -0400 | |
|---|---|---|
| committer | 2019-10-04 19:59:48 -0400 | |
| commit | 5b5e60ffeca1a718cd980e74f0528d6ab91788cf (patch) | |
| tree | 56073a1d11122b8253a69f8e908f6f44687cc3d3 /src/video_core | |
| parent | Nvdrv: Correct Async regression and avoid signaling empty buffer vsyncs (diff) | |
| download | yuzu-5b5e60ffeca1a718cd980e74f0528d6ab91788cf.tar.gz yuzu-5b5e60ffeca1a718cd980e74f0528d6ab91788cf.tar.xz yuzu-5b5e60ffeca1a718cd980e74f0528d6ab91788cf.zip | |
GPU_Async: Correct fences, display events and more.
This commit uses guest fences on vSync event instead of an articial fake
fence we had.
It also corrects to keep signaling display events while loading the game
as the OS is suppose to send buffers to vSync during that time.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/gpu.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 3 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 6 |
4 files changed, 17 insertions, 19 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..d94be9c9d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | ||
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| @@ -17,6 +18,8 @@ | |||
| 17 | 18 | ||
| 18 | namespace Tegra { | 19 | namespace Tegra { |
| 19 | 20 | ||
| 21 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 22 | |||
| 20 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | 23 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 21 | : system{system}, renderer{renderer}, is_async{is_async} { | 24 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 22 | auto& rasterizer{renderer.Rasterizer()}; | 25 | auto& rasterizer{renderer.Rasterizer()}; |
| @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 63 | return *dma_pusher; | 66 | return *dma_pusher; |
| 64 | } | 67 | } |
| 65 | 68 | ||
| 69 | void GPU::WaitFence(u32 syncpoint_id, u32 value) const { | ||
| 70 | // Synced GPU, is always in sync | ||
| 71 | if (!is_async) { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 75 | while (syncpoints[syncpoint_id].load() < value) { | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 66 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | 79 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { |
| 67 | syncpoints[syncpoint_id]++; | 80 | syncpoints[syncpoint_id]++; |
| 68 | std::lock_guard lock{sync_mutex}; | 81 | std::lock_guard lock{sync_mutex}; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..e20b0687a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -177,6 +177,9 @@ public: | |||
| 177 | /// Returns a reference to the GPU DMA pusher. | 177 | /// Returns a reference to the GPU DMA pusher. |
| 178 | Tegra::DmaPusher& DmaPusher(); | 178 | Tegra::DmaPusher& DmaPusher(); |
| 179 | 179 | ||
| 180 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||
| 181 | void WaitFence(u32 syncpoint_id, u32 value) const; | ||
| 182 | |||
| 180 | void IncrementSyncPoint(u32 syncpoint_id); | 183 | void IncrementSyncPoint(u32 syncpoint_id); |
| 181 | 184 | ||
| 182 | u32 GetSyncpointValue(u32 syncpoint_id) const; | 185 | u32 GetSyncpointValue(u32 syncpoint_id) const; |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..d7048b6ae 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | ||
| 9 | #include "core/core_timing_util.h" | ||
| 10 | #include "core/frontend/scope_acquire_window_context.h" | 8 | #include "core/frontend/scope_acquire_window_context.h" |
| 11 | #include "video_core/dma_pusher.h" | 9 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { | |||
| 68 | 66 | ||
| 69 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { | 67 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { |
| 70 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; | 68 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; |
| 71 | synchronization_event = system.CoreTiming().RegisterEvent( | ||
| 72 | "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||
| 73 | } | 69 | } |
| 74 | 70 | ||
| 75 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 71 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 76 | const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; | 72 | PushCommand(SubmitListCommand(std::move(entries))); |
| 77 | const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; | ||
| 78 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | ||
| 79 | } | 73 | } |
| 80 | 74 | ||
| 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 75 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| @@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) { | |||
| 102 | return fence; | 96 | return fence; |
| 103 | } | 97 | } |
| 104 | 98 | ||
| 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 106 | void SynchState::WaitForSynchronization(u64 fence) { | ||
| 107 | while (signaled_fence.load() < fence) | ||
| 108 | ; | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::GPUThread | 99 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..108f456bd 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -21,9 +21,6 @@ class DmaPusher; | |||
| 21 | 21 | ||
| 22 | namespace Core { | 22 | namespace Core { |
| 23 | class System; | 23 | class System; |
| 24 | namespace Timing { | ||
| 25 | struct EventType; | ||
| 26 | } // namespace Timing | ||
| 27 | } // namespace Core | 24 | } // namespace Core |
| 28 | 25 | ||
| 29 | namespace VideoCommon::GPUThread { | 26 | namespace VideoCommon::GPUThread { |
| @@ -89,8 +86,6 @@ struct CommandDataContainer { | |||
| 89 | struct SynchState final { | 86 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 87 | std::atomic_bool is_running{true}; |
| 91 | 88 | ||
| 92 | void WaitForSynchronization(u64 fence); | ||
| 93 | |||
| 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 89 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 95 | CommandQueue queue; | 90 | CommandQueue queue; |
| 96 | u64 last_fence{}; | 91 | u64 last_fence{}; |
| @@ -128,7 +123,6 @@ private: | |||
| 128 | private: | 123 | private: |
| 129 | SynchState state; | 124 | SynchState state; |
| 130 | Core::System& system; | 125 | Core::System& system; |
| 131 | Core::Timing::EventType* synchronization_event{}; | ||
| 132 | std::thread thread; | 126 | std::thread thread; |
| 133 | std::thread::id thread_id; | 127 | std::thread::id thread_id; |
| 134 | }; | 128 | }; |