diff options
| author | 2022-01-30 10:31:13 +0100 | |
|---|---|---|
| committer | 2022-10-06 21:00:52 +0200 | |
| commit | 668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 (patch) | |
| tree | a1c668d6c3d00eade849b1d31dba4116095e4c12 /src/video_core/gpu.cpp | |
| parent | Texture Cache: Fix GC and GPU Modified on Joins. (diff) | |
| download | yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.gz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.xz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.zip | |
VideoCore: Refactor syncing.
Diffstat (limited to 'src/video_core/gpu.cpp')
| -rw-r--r-- | src/video_core/gpu.cpp | 197 |
1 files changed, 102 insertions, 95 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index eebd7f3ff..1097db08a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -28,6 +28,8 @@ | |||
| 28 | #include "video_core/engines/maxwell_dma.h" | 28 | #include "video_core/engines/maxwell_dma.h" |
| 29 | #include "video_core/gpu.h" | 29 | #include "video_core/gpu.h" |
| 30 | #include "video_core/gpu_thread.h" | 30 | #include "video_core/gpu_thread.h" |
| 31 | #include "video_core/host1x/host1x.h" | ||
| 32 | #include "video_core/host1x/syncpoint_manager.h" | ||
| 31 | #include "video_core/memory_manager.h" | 33 | #include "video_core/memory_manager.h" |
| 32 | #include "video_core/renderer_base.h" | 34 | #include "video_core/renderer_base.h" |
| 33 | #include "video_core/shader_notify.h" | 35 | #include "video_core/shader_notify.h" |
| @@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | |||
| 38 | 40 | ||
| 39 | struct GPU::Impl { | 41 | struct GPU::Impl { |
| 40 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) | 42 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) |
| 41 | : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, | 43 | : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, |
| 42 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | 44 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, |
| 43 | gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} | 45 | gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} |
| 44 | 46 | ||
| @@ -115,31 +117,35 @@ struct GPU::Impl { | |||
| 115 | } | 117 | } |
| 116 | 118 | ||
| 117 | /// Request a host GPU memory flush from the CPU. | 119 | /// Request a host GPU memory flush from the CPU. |
| 118 | [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { | 120 | template <typename Func> |
| 119 | std::unique_lock lck{flush_request_mutex}; | 121 | [[nodiscard]] u64 RequestSyncOperation(Func&& action) { |
| 120 | const u64 fence = ++last_flush_fence; | 122 | std::unique_lock lck{sync_request_mutex}; |
| 121 | flush_requests.emplace_back(fence, addr, size); | 123 | const u64 fence = ++last_sync_fence; |
| 124 | sync_requests.emplace_back(action); | ||
| 122 | return fence; | 125 | return fence; |
| 123 | } | 126 | } |
| 124 | 127 | ||
| 125 | /// Obtains current flush request fence id. | 128 | /// Obtains current flush request fence id. |
| 126 | [[nodiscard]] u64 CurrentFlushRequestFence() const { | 129 | [[nodiscard]] u64 CurrentSyncRequestFence() const { |
| 127 | return current_flush_fence.load(std::memory_order_relaxed); | 130 | return current_sync_fence.load(std::memory_order_relaxed); |
| 131 | } | ||
| 132 | |||
| 133 | void WaitForSyncOperation(const u64 fence) { | ||
| 134 | std::unique_lock lck{sync_request_mutex}; | ||
| 135 | sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); | ||
| 128 | } | 136 | } |
| 129 | 137 | ||
| 130 | /// Tick pending requests within the GPU. | 138 | /// Tick pending requests within the GPU. |
| 131 | void TickWork() { | 139 | void TickWork() { |
| 132 | std::unique_lock lck{flush_request_mutex}; | 140 | std::unique_lock lck{sync_request_mutex}; |
| 133 | while (!flush_requests.empty()) { | 141 | while (!sync_requests.empty()) { |
| 134 | auto& request = flush_requests.front(); | 142 | auto request = std::move(sync_requests.front()); |
| 135 | const u64 fence = request.fence; | 143 | sync_requests.pop_front(); |
| 136 | const VAddr addr = request.addr; | 144 | sync_request_mutex.unlock(); |
| 137 | const std::size_t size = request.size; | 145 | request(); |
| 138 | flush_requests.pop_front(); | 146 | current_sync_fence.fetch_add(1, std::memory_order_release); |
| 139 | flush_request_mutex.unlock(); | 147 | sync_request_mutex.lock(); |
| 140 | rasterizer->FlushRegion(addr, size); | 148 | sync_request_cv.notify_all(); |
| 141 | current_flush_fence.store(fence); | ||
| 142 | flush_request_mutex.lock(); | ||
| 143 | } | 149 | } |
| 144 | } | 150 | } |
| 145 | 151 | ||
| @@ -207,78 +213,26 @@ struct GPU::Impl { | |||
| 207 | 213 | ||
| 208 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | 214 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |
| 209 | void WaitFence(u32 syncpoint_id, u32 value) { | 215 | void WaitFence(u32 syncpoint_id, u32 value) { |
| 210 | // Synced GPU, is always in sync | ||
| 211 | if (!is_async) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | if (syncpoint_id == UINT32_MAX) { | 216 | if (syncpoint_id == UINT32_MAX) { |
| 215 | // TODO: Research what this does. | ||
| 216 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 217 | return; | 217 | return; |
| 218 | } | 218 | } |
| 219 | MICROPROFILE_SCOPE(GPU_wait); | 219 | MICROPROFILE_SCOPE(GPU_wait); |
| 220 | std::unique_lock lock{sync_mutex}; | 220 | host1x.GetSyncpointManager().WaitHost(syncpoint_id, value); |
| 221 | sync_cv.wait(lock, [=, this] { | ||
| 222 | if (shutting_down.load(std::memory_order_relaxed)) { | ||
| 223 | // We're shutting down, ensure no threads continue to wait for the next syncpoint | ||
| 224 | return true; | ||
| 225 | } | ||
| 226 | return syncpoints.at(syncpoint_id).load() >= value; | ||
| 227 | }); | ||
| 228 | } | 221 | } |
| 229 | 222 | ||
| 230 | void IncrementSyncPoint(u32 syncpoint_id) { | 223 | void IncrementSyncPoint(u32 syncpoint_id) { |
| 231 | auto& syncpoint = syncpoints.at(syncpoint_id); | 224 | host1x.GetSyncpointManager().IncrementHost(syncpoint_id); |
| 232 | syncpoint++; | ||
| 233 | std::scoped_lock lock{sync_mutex}; | ||
| 234 | sync_cv.notify_all(); | ||
| 235 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 236 | if (!interrupt.empty()) { | ||
| 237 | u32 value = syncpoint.load(); | ||
| 238 | auto it = interrupt.begin(); | ||
| 239 | while (it != interrupt.end()) { | ||
| 240 | if (value >= *it) { | ||
| 241 | TriggerCpuInterrupt(syncpoint_id, *it); | ||
| 242 | it = interrupt.erase(it); | ||
| 243 | continue; | ||
| 244 | } | ||
| 245 | it++; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | } | 225 | } |
| 249 | 226 | ||
| 250 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { | 227 | [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { |
| 251 | return syncpoints.at(syncpoint_id).load(); | 228 | return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); |
| 252 | } | 229 | } |
| 253 | 230 | ||
| 254 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { | 231 | void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { |
| 255 | std::scoped_lock lock{sync_mutex}; | 232 | auto& syncpoint_manager = host1x.GetSyncpointManager(); |
| 256 | u32 current_value = syncpoints.at(syncpoint_id).load(); | 233 | syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() { |
| 257 | if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) { | ||
| 258 | TriggerCpuInterrupt(syncpoint_id, value); | 234 | TriggerCpuInterrupt(syncpoint_id, value); |
| 259 | return; | 235 | }); |
| 260 | } | ||
| 261 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 262 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | ||
| 263 | [value](u32 in_value) { return in_value == value; }); | ||
| 264 | if (contains) { | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | interrupt.emplace_back(value); | ||
| 268 | } | ||
| 269 | |||
| 270 | [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 271 | std::scoped_lock lock{sync_mutex}; | ||
| 272 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); | ||
| 273 | const auto iter = | ||
| 274 | std::find_if(interrupt.begin(), interrupt.end(), | ||
| 275 | [value](u32 interrupt_value) { return value == interrupt_value; }); | ||
| 276 | |||
| 277 | if (iter == interrupt.end()) { | ||
| 278 | return false; | ||
| 279 | } | ||
| 280 | interrupt.erase(iter); | ||
| 281 | return true; | ||
| 282 | } | 236 | } |
| 283 | 237 | ||
| 284 | [[nodiscard]] u64 GetTicks() const { | 238 | [[nodiscard]] u64 GetTicks() const { |
| @@ -387,8 +341,48 @@ struct GPU::Impl { | |||
| 387 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 341 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 388 | } | 342 | } |
| 389 | 343 | ||
| 344 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||
| 345 | Service::Nvidia::NvFence* fences, size_t num_fences) { | ||
| 346 | size_t current_request_counter{}; | ||
| 347 | { | ||
| 348 | std::unique_lock<std::mutex> lk(request_swap_mutex); | ||
| 349 | if (free_swap_counters.empty()) { | ||
| 350 | current_request_counter = request_swap_counters.size(); | ||
| 351 | request_swap_counters.emplace_back(num_fences); | ||
| 352 | } else { | ||
| 353 | current_request_counter = free_swap_counters.front(); | ||
| 354 | request_swap_counters[current_request_counter] = num_fences; | ||
| 355 | free_swap_counters.pop_front(); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | const auto wait_fence = | ||
| 359 | RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { | ||
| 360 | auto& syncpoint_manager = host1x.GetSyncpointManager(); | ||
| 361 | if (num_fences == 0) { | ||
| 362 | renderer->SwapBuffers(framebuffer); | ||
| 363 | } | ||
| 364 | const auto executer = [this, current_request_counter, | ||
| 365 | framebuffer_copy = *framebuffer]() { | ||
| 366 | { | ||
| 367 | std::unique_lock<std::mutex> lk(request_swap_mutex); | ||
| 368 | if (--request_swap_counters[current_request_counter] != 0) { | ||
| 369 | return; | ||
| 370 | } | ||
| 371 | free_swap_counters.push_back(current_request_counter); | ||
| 372 | } | ||
| 373 | renderer->SwapBuffers(&framebuffer_copy); | ||
| 374 | }; | ||
| 375 | for (size_t i = 0; i < num_fences; i++) { | ||
| 376 | syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); | ||
| 377 | } | ||
| 378 | }); | ||
| 379 | gpu_thread.TickGPU(); | ||
| 380 | WaitForSyncOperation(wait_fence); | ||
| 381 | } | ||
| 382 | |||
| 390 | GPU& gpu; | 383 | GPU& gpu; |
| 391 | Core::System& system; | 384 | Core::System& system; |
| 385 | Host1x::Host1x& host1x; | ||
| 392 | 386 | ||
| 393 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; | 387 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; |
| 394 | std::unique_ptr<VideoCore::RendererBase> renderer; | 388 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| @@ -411,18 +405,11 @@ struct GPU::Impl { | |||
| 411 | 405 | ||
| 412 | std::condition_variable sync_cv; | 406 | std::condition_variable sync_cv; |
| 413 | 407 | ||
| 414 | struct FlushRequest { | 408 | std::list<std::function<void(void)>> sync_requests; |
| 415 | explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | 409 | std::atomic<u64> current_sync_fence{}; |
| 416 | : fence{fence_}, addr{addr_}, size{size_} {} | 410 | u64 last_sync_fence{}; |
| 417 | u64 fence; | 411 | std::mutex sync_request_mutex; |
| 418 | VAddr addr; | 412 | std::condition_variable sync_request_cv; |
| 419 | std::size_t size; | ||
| 420 | }; | ||
| 421 | |||
| 422 | std::list<FlushRequest> flush_requests; | ||
| 423 | std::atomic<u64> current_flush_fence{}; | ||
| 424 | u64 last_flush_fence{}; | ||
| 425 | std::mutex flush_request_mutex; | ||
| 426 | 413 | ||
| 427 | const bool is_async; | 414 | const bool is_async; |
| 428 | 415 | ||
| @@ -433,6 +420,10 @@ struct GPU::Impl { | |||
| 433 | std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; | 420 | std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; |
| 434 | Tegra::Control::ChannelState* current_channel; | 421 | Tegra::Control::ChannelState* current_channel; |
| 435 | s32 bound_channel{-1}; | 422 | s32 bound_channel{-1}; |
| 423 | |||
| 424 | std::deque<size_t> free_swap_counters; | ||
| 425 | std::deque<size_t> request_swap_counters; | ||
| 426 | std::mutex request_swap_mutex; | ||
| 436 | }; | 427 | }; |
| 437 | 428 | ||
| 438 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | 429 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) |
| @@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() { | |||
| 477 | } | 468 | } |
| 478 | 469 | ||
| 479 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | 470 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { |
| 480 | return impl->RequestFlush(addr, size); | 471 | return impl->RequestSyncOperation( |
| 472 | [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); | ||
| 473 | } | ||
| 474 | |||
| 475 | u64 GPU::CurrentSyncRequestFence() const { | ||
| 476 | return impl->CurrentSyncRequestFence(); | ||
| 481 | } | 477 | } |
| 482 | 478 | ||
| 483 | u64 GPU::CurrentFlushRequestFence() const { | 479 | void GPU::WaitForSyncOperation(u64 fence) { |
| 484 | return impl->CurrentFlushRequestFence(); | 480 | return impl->WaitForSyncOperation(fence); |
| 485 | } | 481 | } |
| 486 | 482 | ||
| 487 | void GPU::TickWork() { | 483 | void GPU::TickWork() { |
| 488 | impl->TickWork(); | 484 | impl->TickWork(); |
| 489 | } | 485 | } |
| 490 | 486 | ||
| 487 | /// Gets a mutable reference to the Host1x interface | ||
| 488 | Host1x::Host1x& GPU::Host1x() { | ||
| 489 | return impl->host1x; | ||
| 490 | } | ||
| 491 | |||
| 492 | /// Gets an immutable reference to the Host1x interface. | ||
| 493 | const Host1x::Host1x& GPU::Host1x() const { | ||
| 494 | return impl->host1x; | ||
| 495 | } | ||
| 496 | |||
| 491 | Engines::Maxwell3D& GPU::Maxwell3D() { | 497 | Engines::Maxwell3D& GPU::Maxwell3D() { |
| 492 | return impl->Maxwell3D(); | 498 | return impl->Maxwell3D(); |
| 493 | } | 499 | } |
| @@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { | |||
| 536 | return impl->ShaderNotify(); | 542 | return impl->ShaderNotify(); |
| 537 | } | 543 | } |
| 538 | 544 | ||
| 545 | void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||
| 546 | Service::Nvidia::NvFence* fences, size_t num_fences) { | ||
| 547 | impl->RequestSwapBuffers(framebuffer, fences, num_fences); | ||
| 548 | } | ||
| 549 | |||
| 539 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { | 550 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { |
| 540 | impl->WaitFence(syncpoint_id, value); | 551 | impl->WaitFence(syncpoint_id, value); |
| 541 | } | 552 | } |
| @@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { | |||
| 552 | impl->RegisterSyncptInterrupt(syncpoint_id, value); | 563 | impl->RegisterSyncptInterrupt(syncpoint_id, value); |
| 553 | } | 564 | } |
| 554 | 565 | ||
| 555 | bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { | ||
| 556 | return impl->CancelSyncptInterrupt(syncpoint_id, value); | ||
| 557 | } | ||
| 558 | |||
| 559 | u64 GPU::GetTicks() const { | 566 | u64 GPU::GetTicks() const { |
| 560 | return impl->GetTicks(); | 567 | return impl->GetTicks(); |
| 561 | } | 568 | } |