summaryrefslogtreecommitdiff
path: root/src/video_core/gpu.cpp
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2022-01-30 10:31:13 +0100
committerGravatar Fernando Sahmkow2022-10-06 21:00:52 +0200
commit668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 (patch)
treea1c668d6c3d00eade849b1d31dba4116095e4c12 /src/video_core/gpu.cpp
parentTexture Cache: Fix GC and GPU Modified on Joins. (diff)
downloadyuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.gz
yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.xz
yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.zip
VideoCore: Refactor syncing.
Diffstat (limited to 'src/video_core/gpu.cpp')
-rw-r--r--src/video_core/gpu.cpp197
1 files changed, 102 insertions, 95 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index eebd7f3ff..1097db08a 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -28,6 +28,8 @@
28#include "video_core/engines/maxwell_dma.h" 28#include "video_core/engines/maxwell_dma.h"
29#include "video_core/gpu.h" 29#include "video_core/gpu.h"
30#include "video_core/gpu_thread.h" 30#include "video_core/gpu_thread.h"
31#include "video_core/host1x/host1x.h"
32#include "video_core/host1x/syncpoint_manager.h"
31#include "video_core/memory_manager.h" 33#include "video_core/memory_manager.h"
32#include "video_core/renderer_base.h" 34#include "video_core/renderer_base.h"
33#include "video_core/shader_notify.h" 35#include "video_core/shader_notify.h"
@@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
38 40
39struct GPU::Impl { 41struct GPU::Impl {
40 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) 42 explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
41 : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, 43 : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
42 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, 44 shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
43 gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} 45 gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
44 46
@@ -115,31 +117,35 @@ struct GPU::Impl {
115 } 117 }
116 118
117 /// Request a host GPU memory flush from the CPU. 119 /// Request a host GPU memory flush from the CPU.
118 [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { 120 template <typename Func>
119 std::unique_lock lck{flush_request_mutex}; 121 [[nodiscard]] u64 RequestSyncOperation(Func&& action) {
120 const u64 fence = ++last_flush_fence; 122 std::unique_lock lck{sync_request_mutex};
121 flush_requests.emplace_back(fence, addr, size); 123 const u64 fence = ++last_sync_fence;
124 sync_requests.emplace_back(action);
122 return fence; 125 return fence;
123 } 126 }
124 127
125 /// Obtains current flush request fence id. 128 /// Obtains current flush request fence id.
126 [[nodiscard]] u64 CurrentFlushRequestFence() const { 129 [[nodiscard]] u64 CurrentSyncRequestFence() const {
127 return current_flush_fence.load(std::memory_order_relaxed); 130 return current_sync_fence.load(std::memory_order_relaxed);
131 }
132
133 void WaitForSyncOperation(const u64 fence) {
134 std::unique_lock lck{sync_request_mutex};
135 sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
128 } 136 }
129 137
130 /// Tick pending requests within the GPU. 138 /// Tick pending requests within the GPU.
131 void TickWork() { 139 void TickWork() {
132 std::unique_lock lck{flush_request_mutex}; 140 std::unique_lock lck{sync_request_mutex};
133 while (!flush_requests.empty()) { 141 while (!sync_requests.empty()) {
134 auto& request = flush_requests.front(); 142 auto request = std::move(sync_requests.front());
135 const u64 fence = request.fence; 143 sync_requests.pop_front();
136 const VAddr addr = request.addr; 144 sync_request_mutex.unlock();
137 const std::size_t size = request.size; 145 request();
138 flush_requests.pop_front(); 146 current_sync_fence.fetch_add(1, std::memory_order_release);
139 flush_request_mutex.unlock(); 147 sync_request_mutex.lock();
140 rasterizer->FlushRegion(addr, size); 148 sync_request_cv.notify_all();
141 current_flush_fence.store(fence);
142 flush_request_mutex.lock();
143 } 149 }
144 } 150 }
145 151
@@ -207,78 +213,26 @@ struct GPU::Impl {
207 213
208 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. 214 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
209 void WaitFence(u32 syncpoint_id, u32 value) { 215 void WaitFence(u32 syncpoint_id, u32 value) {
210 // Synced GPU, is always in sync
211 if (!is_async) {
212 return;
213 }
214 if (syncpoint_id == UINT32_MAX) { 216 if (syncpoint_id == UINT32_MAX) {
215 // TODO: Research what this does.
216 LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented");
217 return; 217 return;
218 } 218 }
219 MICROPROFILE_SCOPE(GPU_wait); 219 MICROPROFILE_SCOPE(GPU_wait);
220 std::unique_lock lock{sync_mutex}; 220 host1x.GetSyncpointManager().WaitHost(syncpoint_id, value);
221 sync_cv.wait(lock, [=, this] {
222 if (shutting_down.load(std::memory_order_relaxed)) {
223 // We're shutting down, ensure no threads continue to wait for the next syncpoint
224 return true;
225 }
226 return syncpoints.at(syncpoint_id).load() >= value;
227 });
228 } 221 }
229 222
230 void IncrementSyncPoint(u32 syncpoint_id) { 223 void IncrementSyncPoint(u32 syncpoint_id) {
231 auto& syncpoint = syncpoints.at(syncpoint_id); 224 host1x.GetSyncpointManager().IncrementHost(syncpoint_id);
232 syncpoint++;
233 std::scoped_lock lock{sync_mutex};
234 sync_cv.notify_all();
235 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
236 if (!interrupt.empty()) {
237 u32 value = syncpoint.load();
238 auto it = interrupt.begin();
239 while (it != interrupt.end()) {
240 if (value >= *it) {
241 TriggerCpuInterrupt(syncpoint_id, *it);
242 it = interrupt.erase(it);
243 continue;
244 }
245 it++;
246 }
247 }
248 } 225 }
249 226
250 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { 227 [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const {
251 return syncpoints.at(syncpoint_id).load(); 228 return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id);
252 } 229 }
253 230
254 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { 231 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
255 std::scoped_lock lock{sync_mutex}; 232 auto& syncpoint_manager = host1x.GetSyncpointManager();
256 u32 current_value = syncpoints.at(syncpoint_id).load(); 233 syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() {
257 if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) {
258 TriggerCpuInterrupt(syncpoint_id, value); 234 TriggerCpuInterrupt(syncpoint_id, value);
259 return; 235 });
260 }
261 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
262 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
263 [value](u32 in_value) { return in_value == value; });
264 if (contains) {
265 return;
266 }
267 interrupt.emplace_back(value);
268 }
269
270 [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
271 std::scoped_lock lock{sync_mutex};
272 auto& interrupt = syncpt_interrupts.at(syncpoint_id);
273 const auto iter =
274 std::find_if(interrupt.begin(), interrupt.end(),
275 [value](u32 interrupt_value) { return value == interrupt_value; });
276
277 if (iter == interrupt.end()) {
278 return false;
279 }
280 interrupt.erase(iter);
281 return true;
282 } 236 }
283 237
284 [[nodiscard]] u64 GetTicks() const { 238 [[nodiscard]] u64 GetTicks() const {
@@ -387,8 +341,48 @@ struct GPU::Impl {
387 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); 341 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
388 } 342 }
389 343
344 void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
345 Service::Nvidia::NvFence* fences, size_t num_fences) {
346 size_t current_request_counter{};
347 {
348 std::unique_lock<std::mutex> lk(request_swap_mutex);
349 if (free_swap_counters.empty()) {
350 current_request_counter = request_swap_counters.size();
351 request_swap_counters.emplace_back(num_fences);
352 } else {
353 current_request_counter = free_swap_counters.front();
354 request_swap_counters[current_request_counter] = num_fences;
355 free_swap_counters.pop_front();
356 }
357 }
358 const auto wait_fence =
359 RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] {
360 auto& syncpoint_manager = host1x.GetSyncpointManager();
361 if (num_fences == 0) {
362 renderer->SwapBuffers(framebuffer);
363 }
364 const auto executer = [this, current_request_counter,
365 framebuffer_copy = *framebuffer]() {
366 {
367 std::unique_lock<std::mutex> lk(request_swap_mutex);
368 if (--request_swap_counters[current_request_counter] != 0) {
369 return;
370 }
371 free_swap_counters.push_back(current_request_counter);
372 }
373 renderer->SwapBuffers(&framebuffer_copy);
374 };
375 for (size_t i = 0; i < num_fences; i++) {
376 syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer);
377 }
378 });
379 gpu_thread.TickGPU();
380 WaitForSyncOperation(wait_fence);
381 }
382
390 GPU& gpu; 383 GPU& gpu;
391 Core::System& system; 384 Core::System& system;
385 Host1x::Host1x& host1x;
392 386
393 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; 387 std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
394 std::unique_ptr<VideoCore::RendererBase> renderer; 388 std::unique_ptr<VideoCore::RendererBase> renderer;
@@ -411,18 +405,11 @@ struct GPU::Impl {
411 405
412 std::condition_variable sync_cv; 406 std::condition_variable sync_cv;
413 407
414 struct FlushRequest { 408 std::list<std::function<void(void)>> sync_requests;
415 explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) 409 std::atomic<u64> current_sync_fence{};
416 : fence{fence_}, addr{addr_}, size{size_} {} 410 u64 last_sync_fence{};
417 u64 fence; 411 std::mutex sync_request_mutex;
418 VAddr addr; 412 std::condition_variable sync_request_cv;
419 std::size_t size;
420 };
421
422 std::list<FlushRequest> flush_requests;
423 std::atomic<u64> current_flush_fence{};
424 u64 last_flush_fence{};
425 std::mutex flush_request_mutex;
426 413
427 const bool is_async; 414 const bool is_async;
428 415
@@ -433,6 +420,10 @@ struct GPU::Impl {
433 std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; 420 std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
434 Tegra::Control::ChannelState* current_channel; 421 Tegra::Control::ChannelState* current_channel;
435 s32 bound_channel{-1}; 422 s32 bound_channel{-1};
423
424 std::deque<size_t> free_swap_counters;
425 std::deque<size_t> request_swap_counters;
426 std::mutex request_swap_mutex;
436}; 427};
437 428
438GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) 429GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() {
477} 468}
478 469
479u64 GPU::RequestFlush(VAddr addr, std::size_t size) { 470u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
480 return impl->RequestFlush(addr, size); 471 return impl->RequestSyncOperation(
472 [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
473}
474
475u64 GPU::CurrentSyncRequestFence() const {
476 return impl->CurrentSyncRequestFence();
481} 477}
482 478
483u64 GPU::CurrentFlushRequestFence() const { 479void GPU::WaitForSyncOperation(u64 fence) {
484 return impl->CurrentFlushRequestFence(); 480 return impl->WaitForSyncOperation(fence);
485} 481}
486 482
487void GPU::TickWork() { 483void GPU::TickWork() {
488 impl->TickWork(); 484 impl->TickWork();
489} 485}
490 486
487/// Gets a mutable reference to the Host1x interface
488Host1x::Host1x& GPU::Host1x() {
489 return impl->host1x;
490}
491
492/// Gets an immutable reference to the Host1x interface.
493const Host1x::Host1x& GPU::Host1x() const {
494 return impl->host1x;
495}
496
491Engines::Maxwell3D& GPU::Maxwell3D() { 497Engines::Maxwell3D& GPU::Maxwell3D() {
492 return impl->Maxwell3D(); 498 return impl->Maxwell3D();
493} 499}
@@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
536 return impl->ShaderNotify(); 542 return impl->ShaderNotify();
537} 543}
538 544
545void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
546 Service::Nvidia::NvFence* fences, size_t num_fences) {
547 impl->RequestSwapBuffers(framebuffer, fences, num_fences);
548}
549
539void GPU::WaitFence(u32 syncpoint_id, u32 value) { 550void GPU::WaitFence(u32 syncpoint_id, u32 value) {
540 impl->WaitFence(syncpoint_id, value); 551 impl->WaitFence(syncpoint_id, value);
541} 552}
@@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) {
552 impl->RegisterSyncptInterrupt(syncpoint_id, value); 563 impl->RegisterSyncptInterrupt(syncpoint_id, value);
553} 564}
554 565
555bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) {
556 return impl->CancelSyncptInterrupt(syncpoint_id, value);
557}
558
559u64 GPU::GetTicks() const { 566u64 GPU::GetTicks() const {
560 return impl->GetTicks(); 567 return impl->GetTicks();
561} 568}