diff options
| author | 2021-01-10 22:09:56 -0700 | |
|---|---|---|
| committer | 2021-01-10 22:09:56 -0700 | |
| commit | 7a3c884e39fccfbb498b855080bffabc9ce2e7f1 (patch) | |
| tree | 5056f9406dec188439cb0deb87603498243a9412 /src/video_core/gpu.cpp | |
| parent | More forgetting... duh (diff) | |
| parent | Merge pull request #5229 from Morph1984/fullscreen-opt (diff) | |
| download | yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.gz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.tar.xz yuzu-7a3c884e39fccfbb498b855080bffabc9ce2e7f1.zip | |
Merge remote-tracking branch 'upstream/master' into int-flags
Diffstat (limited to 'src/video_core/gpu.cpp')
| -rw-r--r-- | src/video_core/gpu.cpp | 179 |
1 files changed, 133 insertions, 46 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4bb9256e9..6ab06775f 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| 11 | #include "core/core_timing_util.h" | 11 | #include "core/core_timing_util.h" |
| 12 | #include "core/frontend/emu_window.h" | 12 | #include "core/frontend/emu_window.h" |
| 13 | #include "core/hardware_interrupt_manager.h" | ||
| 13 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 14 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 15 | #include "video_core/engines/fermi_2d.h" | 16 | #include "video_core/engines/fermi_2d.h" |
| @@ -27,15 +28,17 @@ namespace Tegra { | |||
| 27 | 28 | ||
| 28 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | 29 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); |
| 29 | 30 | ||
| 30 | GPU::GPU(Core::System& system_, bool is_async_) | 31 | GPU::GPU(Core::System& system_, bool is_async_, bool use_nvdec_) |
| 31 | : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, | 32 | : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, |
| 32 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, | 33 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, |
| 34 | cdma_pusher{std::make_unique<Tegra::CDmaPusher>(*this)}, use_nvdec{use_nvdec_}, | ||
| 33 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, | 35 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, |
| 34 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, | 36 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, |
| 35 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | 37 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, |
| 36 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, | 38 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, |
| 37 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, | 39 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, |
| 38 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} | 40 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, |
| 41 | gpu_thread{system_, is_async_} {} | ||
| 39 | 42 | ||
| 40 | GPU::~GPU() = default; | 43 | GPU::~GPU() = default; |
| 41 | 44 | ||
| @@ -77,31 +80,46 @@ DmaPusher& GPU::DmaPusher() { | |||
| 77 | return *dma_pusher; | 80 | return *dma_pusher; |
| 78 | } | 81 | } |
| 79 | 82 | ||
| 83 | Tegra::CDmaPusher& GPU::CDmaPusher() { | ||
| 84 | return *cdma_pusher; | ||
| 85 | } | ||
| 86 | |||
| 80 | const DmaPusher& GPU::DmaPusher() const { | 87 | const DmaPusher& GPU::DmaPusher() const { |
| 81 | return *dma_pusher; | 88 | return *dma_pusher; |
| 82 | } | 89 | } |
| 83 | 90 | ||
| 91 | const Tegra::CDmaPusher& GPU::CDmaPusher() const { | ||
| 92 | return *cdma_pusher; | ||
| 93 | } | ||
| 94 | |||
| 84 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { | 95 | void GPU::WaitFence(u32 syncpoint_id, u32 value) { |
| 85 | // Synced GPU, is always in sync | 96 | // Synced GPU, is always in sync |
| 86 | if (!is_async) { | 97 | if (!is_async) { |
| 87 | return; | 98 | return; |
| 88 | } | 99 | } |
| 100 | if (syncpoint_id == UINT32_MAX) { | ||
| 101 | // TODO: Research what this does. | ||
| 102 | LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); | ||
| 103 | return; | ||
| 104 | } | ||
| 89 | MICROPROFILE_SCOPE(GPU_wait); | 105 | MICROPROFILE_SCOPE(GPU_wait); |
| 90 | std::unique_lock lock{sync_mutex}; | 106 | std::unique_lock lock{sync_mutex}; |
| 91 | sync_cv.wait(lock, [=, this] { return syncpoints[syncpoint_id].load() >= value; }); | 107 | sync_cv.wait(lock, [=, this] { return syncpoints.at(syncpoint_id).load() >= value; }); |
| 92 | } | 108 | } |
| 93 | 109 | ||
| 94 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | 110 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { |
| 95 | syncpoints[syncpoint_id]++; | 111 | auto& syncpoint = syncpoints.at(syncpoint_id); |
| 112 | syncpoint++; | ||
| 96 | std::lock_guard lock{sync_mutex}; | 113 | std::lock_guard lock{sync_mutex}; |
| 97 | sync_cv.notify_all(); | 114 | sync_cv.notify_all(); |
| 98 | if (!syncpt_interrupts[syncpoint_id].empty()) { | 115 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); |
| 99 | u32 value = syncpoints[syncpoint_id].load(); | 116 | if (!interrupt.empty()) { |
| 100 | auto it = syncpt_interrupts[syncpoint_id].begin(); | 117 | u32 value = syncpoint.load(); |
| 101 | while (it != syncpt_interrupts[syncpoint_id].end()) { | 118 | auto it = interrupt.begin(); |
| 119 | while (it != interrupt.end()) { | ||
| 102 | if (value >= *it) { | 120 | if (value >= *it) { |
| 103 | TriggerCpuInterrupt(syncpoint_id, *it); | 121 | TriggerCpuInterrupt(syncpoint_id, *it); |
| 104 | it = syncpt_interrupts[syncpoint_id].erase(it); | 122 | it = interrupt.erase(it); |
| 105 | continue; | 123 | continue; |
| 106 | } | 124 | } |
| 107 | it++; | 125 | it++; |
| @@ -110,22 +128,22 @@ void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | |||
| 110 | } | 128 | } |
| 111 | 129 | ||
| 112 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { | 130 | u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { |
| 113 | return syncpoints[syncpoint_id].load(); | 131 | return syncpoints.at(syncpoint_id).load(); |
| 114 | } | 132 | } |
| 115 | 133 | ||
| 116 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | 134 | void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) { |
| 117 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | 135 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); |
| 118 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), | 136 | bool contains = std::any_of(interrupt.begin(), interrupt.end(), |
| 119 | [value](u32 in_value) { return in_value == value; }); | 137 | [value](u32 in_value) { return in_value == value; }); |
| 120 | if (contains) { | 138 | if (contains) { |
| 121 | return; | 139 | return; |
| 122 | } | 140 | } |
| 123 | syncpt_interrupts[syncpoint_id].emplace_back(value); | 141 | interrupt.emplace_back(value); |
| 124 | } | 142 | } |
| 125 | 143 | ||
| 126 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | 144 | bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { |
| 127 | std::lock_guard lock{sync_mutex}; | 145 | std::lock_guard lock{sync_mutex}; |
| 128 | auto& interrupt = syncpt_interrupts[syncpoint_id]; | 146 | auto& interrupt = syncpt_interrupts.at(syncpoint_id); |
| 129 | const auto iter = | 147 | const auto iter = |
| 130 | std::find_if(interrupt.begin(), interrupt.end(), | 148 | std::find_if(interrupt.begin(), interrupt.end(), |
| 131 | [value](u32 interrupt_value) { return value == interrupt_value; }); | 149 | [value](u32 interrupt_value) { return value == interrupt_value; }); |
| @@ -182,34 +200,6 @@ void GPU::SyncGuestHost() { | |||
| 182 | renderer->Rasterizer().SyncGuestHost(); | 200 | renderer->Rasterizer().SyncGuestHost(); |
| 183 | } | 201 | } |
| 184 | 202 | ||
| 185 | void GPU::OnCommandListEnd() { | ||
| 186 | renderer->Rasterizer().ReleaseFences(); | ||
| 187 | } | ||
| 188 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||
| 189 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||
| 190 | // So the values you see in docs might be multiplied by 4. | ||
| 191 | enum class BufferMethods { | ||
| 192 | BindObject = 0x0, | ||
| 193 | Nop = 0x2, | ||
| 194 | SemaphoreAddressHigh = 0x4, | ||
| 195 | SemaphoreAddressLow = 0x5, | ||
| 196 | SemaphoreSequence = 0x6, | ||
| 197 | SemaphoreTrigger = 0x7, | ||
| 198 | NotifyIntr = 0x8, | ||
| 199 | WrcacheFlush = 0x9, | ||
| 200 | Unk28 = 0xA, | ||
| 201 | UnkCacheFlush = 0xB, | ||
| 202 | RefCnt = 0x14, | ||
| 203 | SemaphoreAcquire = 0x1A, | ||
| 204 | SemaphoreRelease = 0x1B, | ||
| 205 | FenceValue = 0x1C, | ||
| 206 | FenceAction = 0x1D, | ||
| 207 | Unk78 = 0x1E, | ||
| 208 | Unk7c = 0x1F, | ||
| 209 | Yield = 0x20, | ||
| 210 | NonPullerMethods = 0x40, | ||
| 211 | }; | ||
| 212 | |||
| 213 | enum class GpuSemaphoreOperation { | 203 | enum class GpuSemaphoreOperation { |
| 214 | AcquireEqual = 0x1, | 204 | AcquireEqual = 0x1, |
| 215 | WriteLong = 0x2, | 205 | WriteLong = 0x2, |
| @@ -240,8 +230,12 @@ void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 | |||
| 240 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | 230 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); |
| 241 | } else { | 231 | } else { |
| 242 | for (std::size_t i = 0; i < amount; i++) { | 232 | for (std::size_t i = 0; i < amount; i++) { |
| 243 | CallPullerMethod( | 233 | CallPullerMethod(MethodCall{ |
| 244 | {method, base_start[i], subchannel, methods_pending - static_cast<u32>(i)}); | 234 | method, |
| 235 | base_start[i], | ||
| 236 | subchannel, | ||
| 237 | methods_pending - static_cast<u32>(i), | ||
| 238 | }); | ||
| 245 | } | 239 | } |
| 246 | } | 240 | } |
| 247 | } | 241 | } |
| @@ -268,7 +262,12 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 268 | case BufferMethods::UnkCacheFlush: | 262 | case BufferMethods::UnkCacheFlush: |
| 269 | case BufferMethods::WrcacheFlush: | 263 | case BufferMethods::WrcacheFlush: |
| 270 | case BufferMethods::FenceValue: | 264 | case BufferMethods::FenceValue: |
| 265 | break; | ||
| 271 | case BufferMethods::FenceAction: | 266 | case BufferMethods::FenceAction: |
| 267 | ProcessFenceActionMethod(); | ||
| 268 | break; | ||
| 269 | case BufferMethods::WaitForInterrupt: | ||
| 270 | ProcessWaitForInterruptMethod(); | ||
| 272 | break; | 271 | break; |
| 273 | case BufferMethods::SemaphoreTrigger: { | 272 | case BufferMethods::SemaphoreTrigger: { |
| 274 | ProcessSemaphoreTriggerMethod(); | 273 | ProcessSemaphoreTriggerMethod(); |
| @@ -298,8 +297,7 @@ void GPU::CallPullerMethod(const MethodCall& method_call) { | |||
| 298 | break; | 297 | break; |
| 299 | } | 298 | } |
| 300 | default: | 299 | default: |
| 301 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", | 300 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); |
| 302 | static_cast<u32>(method)); | ||
| 303 | break; | 301 | break; |
| 304 | } | 302 | } |
| 305 | } | 303 | } |
| @@ -378,10 +376,28 @@ void GPU::ProcessBindMethod(const MethodCall& method_call) { | |||
| 378 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | 376 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); |
| 379 | break; | 377 | break; |
| 380 | default: | 378 | default: |
| 381 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", static_cast<u32>(engine_id)); | 379 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); |
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | void GPU::ProcessFenceActionMethod() { | ||
| 384 | switch (regs.fence_action.op) { | ||
| 385 | case FenceOperation::Acquire: | ||
| 386 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 387 | break; | ||
| 388 | case FenceOperation::Increment: | ||
| 389 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 390 | break; | ||
| 391 | default: | ||
| 392 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 382 | } | 393 | } |
| 383 | } | 394 | } |
| 384 | 395 | ||
| 396 | void GPU::ProcessWaitForInterruptMethod() { | ||
| 397 | // TODO(bunnei) ImplementMe | ||
| 398 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 399 | } | ||
| 400 | |||
| 385 | void GPU::ProcessSemaphoreTriggerMethod() { | 401 | void GPU::ProcessSemaphoreTriggerMethod() { |
| 386 | const auto semaphoreOperationMask = 0xF; | 402 | const auto semaphoreOperationMask = 0xF; |
| 387 | const auto op = | 403 | const auto op = |
| @@ -443,4 +459,75 @@ void GPU::ProcessSemaphoreAcquire() { | |||
| 443 | } | 459 | } |
| 444 | } | 460 | } |
| 445 | 461 | ||
| 462 | void GPU::Start() { | ||
| 463 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher, *cdma_pusher); | ||
| 464 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | ||
| 465 | cpu_context->MakeCurrent(); | ||
| 466 | } | ||
| 467 | |||
| 468 | void GPU::ObtainContext() { | ||
| 469 | cpu_context->MakeCurrent(); | ||
| 470 | } | ||
| 471 | |||
| 472 | void GPU::ReleaseContext() { | ||
| 473 | cpu_context->DoneCurrent(); | ||
| 474 | } | ||
| 475 | |||
| 476 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 477 | gpu_thread.SubmitList(std::move(entries)); | ||
| 478 | } | ||
| 479 | |||
| 480 | void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { | ||
| 481 | if (!use_nvdec) { | ||
| 482 | return; | ||
| 483 | } | ||
| 484 | // This condition fires when a video stream ends, clear all intermediary data | ||
| 485 | if (entries[0].raw == 0xDEADB33F) { | ||
| 486 | cdma_pusher.reset(); | ||
| 487 | return; | ||
| 488 | } | ||
| 489 | if (!cdma_pusher) { | ||
| 490 | cdma_pusher = std::make_unique<Tegra::CDmaPusher>(*this); | ||
| 491 | } | ||
| 492 | |||
| 493 | // SubmitCommandBuffer would make the nvdec operations async, this is not currently working | ||
| 494 | // TODO(ameerj): RE proper async nvdec operation | ||
| 495 | // gpu_thread.SubmitCommandBuffer(std::move(entries)); | ||
| 496 | |||
| 497 | cdma_pusher->Push(std::move(entries)); | ||
| 498 | cdma_pusher->DispatchCalls(); | ||
| 499 | } | ||
| 500 | |||
| 501 | void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | ||
| 502 | gpu_thread.SwapBuffers(framebuffer); | ||
| 503 | } | ||
| 504 | |||
| 505 | void GPU::FlushRegion(VAddr addr, u64 size) { | ||
| 506 | gpu_thread.FlushRegion(addr, size); | ||
| 507 | } | ||
| 508 | |||
| 509 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | ||
| 510 | gpu_thread.InvalidateRegion(addr, size); | ||
| 511 | } | ||
| 512 | |||
| 513 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 514 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 515 | } | ||
| 516 | |||
| 517 | void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||
| 518 | auto& interrupt_manager = system.InterruptManager(); | ||
| 519 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||
| 520 | } | ||
| 521 | |||
| 522 | void GPU::WaitIdle() const { | ||
| 523 | gpu_thread.WaitIdle(); | ||
| 524 | } | ||
| 525 | |||
| 526 | void GPU::OnCommandListEnd() { | ||
| 527 | if (is_async) { | ||
| 528 | // This command only applies to asynchronous GPU mode | ||
| 529 | gpu_thread.OnCommandListEnd(); | ||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 446 | } // namespace Tegra | 533 | } // namespace Tegra |