summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2019-09-25 19:43:23 -0400
committerGravatar FernandoS272019-10-04 19:59:48 -0400
commit5b5e60ffeca1a718cd980e74f0528d6ab91788cf (patch)
tree56073a1d11122b8253a69f8e908f6f44687cc3d3
parentNvdrv: Correct Async regression and avoid signaling empty buffer vsyncs (diff)
downloadyuzu-5b5e60ffeca1a718cd980e74f0528d6ab91788cf.tar.gz
yuzu-5b5e60ffeca1a718cd980e74f0528d6ab91788cf.tar.xz
yuzu-5b5e60ffeca1a718cd980e74f0528d6ab91788cf.zip
GPU_Async: Correct fences, display events and more.
This commit uses guest fences on vSync event instead of an articial fake fence we had. It also corrects to keep signaling display events while loading the game as the OS is suppose to send buffers to vSync during that time.
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp21
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h2
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/gpu.h3
-rw-r--r--src/video_core/gpu_thread.cpp14
-rw-r--r--src/video_core/gpu_thread.h6
6 files changed, 38 insertions, 21 deletions
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 3b251f8c8..86a90526c 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -36,6 +36,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) {
36 displays.emplace_back(3, "Internal", system); 36 displays.emplace_back(3, "Internal", system);
37 displays.emplace_back(4, "Null", system); 37 displays.emplace_back(4, "Null", system);
38 38
39 for (auto& display : displays) {
40 display.SignalVSyncEvent();
41 }
42
39 // Schedule the screen composition events 43 // Schedule the screen composition events
40 composition_event = system.CoreTiming().RegisterEvent( 44 composition_event = system.CoreTiming().RegisterEvent(
41 "ScreenComposition", [this](u64 userdata, s64 cycles_late) { 45 "ScreenComposition", [this](u64 userdata, s64 cycles_late) {
@@ -173,7 +177,13 @@ void NVFlinger::Compose() {
173 bool trigger_event = false; 177 bool trigger_event = false;
174 // Trigger vsync for this display at the end of drawing 178 // Trigger vsync for this display at the end of drawing
175 SCOPE_EXIT({ 179 SCOPE_EXIT({
176 if (trigger_event) { 180 // TODO(Blinkhawk): Correctly send buffers through nvflinger while
181 // loading the game thorugh the OS.
182 // During loading, the OS takes care of sending buffers to vsync,
183 // thus it triggers, since this is not properly emulated due to
184 // HLE complications, we allow it to signal until the game enqueues
185 // it's first buffer.
186 if (trigger_event || !first_buffer_enqueued) {
177 display.SignalVSyncEvent(); 187 display.SignalVSyncEvent();
178 } 188 }
179 }); 189 });
@@ -193,13 +203,20 @@ void NVFlinger::Compose() {
193 203
194 if (!buffer) { 204 if (!buffer) {
195 // There was no queued buffer to draw, render previous frame 205 // There was no queued buffer to draw, render previous frame
196 system.GetPerfStats().EndGameFrame();
197 system.GPU().SwapBuffers({}); 206 system.GPU().SwapBuffers({});
198 continue; 207 continue;
199 } 208 }
200 209
201 const auto& igbp_buffer = buffer->get().igbp_buffer; 210 const auto& igbp_buffer = buffer->get().igbp_buffer;
202 trigger_event = true; 211 trigger_event = true;
212 first_buffer_enqueued = true;
213
214 const auto& gpu = system.GPU();
215 const auto& multi_fence = buffer->get().multi_fence;
216 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
217 const auto& fence = multi_fence.fences[fence_id];
218 gpu.WaitFence(fence.id, fence.value);
219 }
203 220
204 // Now send the buffer to the GPU for drawing. 221 // Now send the buffer to the GPU for drawing.
205 // TODO(Subv): Support more than just disp0. The display device selection is probably based 222 // TODO(Subv): Support more than just disp0. The display device selection is probably based
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 5d7e3bfb8..95d7278f5 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -102,6 +102,8 @@ private:
102 102
103 u32 swap_interval = 1; 103 u32 swap_interval = 1;
104 104
105 bool first_buffer_enqueued{};
106
105 /// Event that handles screen composition. 107 /// Event that handles screen composition.
106 Core::Timing::EventType* composition_event; 108 Core::Timing::EventType* composition_event;
107 109
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 76cfe8107..d94be9c9d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/core_timing.h" 8#include "core/core_timing.h"
8#include "core/memory.h" 9#include "core/memory.h"
@@ -17,6 +18,8 @@
17 18
18namespace Tegra { 19namespace Tegra {
19 20
21MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
22
20GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) 23GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
21 : system{system}, renderer{renderer}, is_async{is_async} { 24 : system{system}, renderer{renderer}, is_async{is_async} {
22 auto& rasterizer{renderer.Rasterizer()}; 25 auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
63 return *dma_pusher; 66 return *dma_pusher;
64} 67}
65 68
69void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
70 // Synced GPU, is always in sync
71 if (!is_async) {
72 return;
73 }
74 MICROPROFILE_SCOPE(GPU_wait);
75 while (syncpoints[syncpoint_id].load() < value) {
76 }
77}
78
66void GPU::IncrementSyncPoint(const u32 syncpoint_id) { 79void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
67 syncpoints[syncpoint_id]++; 80 syncpoints[syncpoint_id]++;
68 std::lock_guard lock{sync_mutex}; 81 std::lock_guard lock{sync_mutex};
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..e20b0687a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,9 @@ public:
177 /// Returns a reference to the GPU DMA pusher. 177 /// Returns a reference to the GPU DMA pusher.
178 Tegra::DmaPusher& DmaPusher(); 178 Tegra::DmaPusher& DmaPusher();
179 179
180 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
181 void WaitFence(u32 syncpoint_id, u32 value) const;
182
180 void IncrementSyncPoint(u32 syncpoint_id); 183 void IncrementSyncPoint(u32 syncpoint_id);
181 184
182 u32 GetSyncpointValue(u32 syncpoint_id) const; 185 u32 GetSyncpointValue(u32 syncpoint_id) const;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..d7048b6ae 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
10#include "core/frontend/scope_acquire_window_context.h" 8#include "core/frontend/scope_acquire_window_context.h"
11#include "video_core/dma_pusher.h" 9#include "video_core/dma_pusher.h"
12#include "video_core/gpu.h" 10#include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
68 66
69void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { 67void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
70 thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; 68 thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
71 synchronization_event = system.CoreTiming().RegisterEvent(
72 "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
73} 69}
74 70
75void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 71void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
76 const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; 72 PushCommand(SubmitListCommand(std::move(entries)));
77 const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
78 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
79} 73}
80 74
81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 75void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) {
102 return fence; 96 return fence;
103} 97}
104 98
105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
106void SynchState::WaitForSynchronization(u64 fence) {
107 while (signaled_fence.load() < fence)
108 ;
109}
110
111} // namespace VideoCommon::GPUThread 99} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..108f456bd 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
21 21
22namespace Core { 22namespace Core {
23class System; 23class System;
24namespace Timing {
25struct EventType;
26} // namespace Timing
27} // namespace Core 24} // namespace Core
28 25
29namespace VideoCommon::GPUThread { 26namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
89struct SynchState final { 86struct SynchState final {
90 std::atomic_bool is_running{true}; 87 std::atomic_bool is_running{true};
91 88
92 void WaitForSynchronization(u64 fence);
93
94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 89 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
95 CommandQueue queue; 90 CommandQueue queue;
96 u64 last_fence{}; 91 u64 last_fence{};
@@ -128,7 +123,6 @@ private:
128private: 123private:
129 SynchState state; 124 SynchState state;
130 Core::System& system; 125 Core::System& system;
131 Core::Timing::EventType* synchronization_event{};
132 std::thread thread; 126 std::thread thread;
133 std::thread::id thread_id; 127 std::thread::id thread_id;
134}; 128};