diff options
| author | 2021-11-05 15:52:31 +0100 | |
|---|---|---|
| committer | 2022-10-06 21:00:51 +0200 | |
| commit | 139ea93512aeead8a4aee3910a3de86eb109a838 (patch) | |
| tree | 857643fc08617b7035656a51728c399f30c8c2cb /src/video_core/gpu.cpp | |
| parent | NVASGPU: Fix Remap. (diff) | |
| download | yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.gz yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.xz yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.zip | |
VideoCore: implement channels on gpu caches.
Diffstat (limited to 'src/video_core/gpu.cpp')
| -rw-r--r-- | src/video_core/gpu.cpp | 468 |
1 files changed, 81 insertions, 387 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 33431f2a0..80a1c69e0 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #include "core/hle/service/nvdrv/nvdata.h" | 18 | #include "core/hle/service/nvdrv/nvdata.h" |
| 19 | #include "core/perf_stats.h" | 19 | #include "core/perf_stats.h" |
| 20 | #include "video_core/cdma_pusher.h" | 20 | #include "video_core/cdma_pusher.h" |
| 21 | #include "video_core/control/channel_state.h" | ||
| 22 | #include "video_core/control/scheduler.h" | ||
| 21 | #include "video_core/dma_pusher.h" | 23 | #include "video_core/dma_pusher.h" |
| 22 | #include "video_core/engines/fermi_2d.h" | 24 | #include "video_core/engines/fermi_2d.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | 25 | #include "video_core/engines/kepler_compute.h" |
| @@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | |||
| 36 | 38 | ||
| 37 | struct GPU::Impl { | 39 | struct GPU::Impl { |
| 38 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) | 40 | explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) |
| 39 | : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>( | 41 | : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, |
| 40 | system)}, | ||
| 41 | dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_}, | ||
| 42 | maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, | ||
| 43 | fermi_2d{std::make_unique<Engines::Fermi2D>()}, | ||
| 44 | kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, | ||
| 45 | maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, | ||
| 46 | kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, | ||
| 47 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, | 42 | shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, |
| 48 | gpu_thread{system_, is_async_} {} | 43 | gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} |
| 49 | 44 | ||
| 50 | ~Impl() = default; | 45 | ~Impl() = default; |
| 51 | 46 | ||
| 52 | /// Binds a renderer to the GPU. | 47 | std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) { |
| 53 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | 48 | auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id); |
| 54 | renderer = std::move(renderer_); | 49 | channels.emplace(channel_id, channel_state); |
| 55 | rasterizer = renderer->ReadRasterizer(); | 50 | scheduler->DeclareChannel(channel_state); |
| 56 | 51 | return channel_state; | |
| 57 | memory_manager->BindRasterizer(rasterizer); | ||
| 58 | maxwell_3d->BindRasterizer(rasterizer); | ||
| 59 | fermi_2d->BindRasterizer(rasterizer); | ||
| 60 | kepler_compute->BindRasterizer(rasterizer); | ||
| 61 | kepler_memory->BindRasterizer(rasterizer); | ||
| 62 | maxwell_dma->BindRasterizer(rasterizer); | ||
| 63 | } | 52 | } |
| 64 | 53 | ||
| 65 | /// Calls a GPU method. | 54 | void BindChannel(s32 channel_id) { |
| 66 | void CallMethod(const GPU::MethodCall& method_call) { | 55 | if (bound_channel == channel_id) { |
| 67 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, | 56 | return; |
| 68 | method_call.subchannel); | 57 | } |
| 58 | auto it = channels.find(channel_id); | ||
| 59 | ASSERT(it != channels.end()); | ||
| 60 | bound_channel = channel_id; | ||
| 61 | current_channel = it->second.get(); | ||
| 69 | 62 | ||
| 70 | ASSERT(method_call.subchannel < bound_engines.size()); | 63 | rasterizer->BindChannel(*current_channel); |
| 64 | } | ||
| 71 | 65 | ||
| 72 | if (ExecuteMethodOnEngine(method_call.method)) { | 66 | std::shared_ptr<Control::ChannelState> AllocateChannel() { |
| 73 | CallEngineMethod(method_call); | 67 | return CreateChannel(new_channel_id++); |
| 74 | } else { | ||
| 75 | CallPullerMethod(method_call); | ||
| 76 | } | ||
| 77 | } | 68 | } |
| 78 | 69 | ||
| 79 | /// Calls a GPU multivalue method. | 70 | void InitChannel(Control::ChannelState& to_init) { |
| 80 | void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | 71 | to_init.Init(system, gpu); |
| 81 | u32 methods_pending) { | 72 | to_init.BindRasterizer(rasterizer); |
| 82 | LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); | 73 | rasterizer->InitializeChannel(to_init); |
| 74 | } | ||
| 83 | 75 | ||
| 84 | ASSERT(subchannel < bound_engines.size()); | 76 | void ReleaseChannel(Control::ChannelState& to_release) { |
| 77 | UNIMPLEMENTED(); | ||
| 78 | } | ||
| 85 | 79 | ||
| 86 | if (ExecuteMethodOnEngine(method)) { | 80 | void CreateHost1xChannel() { |
| 87 | CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); | 81 | if (host1x_channel) { |
| 88 | } else { | 82 | return; |
| 89 | for (std::size_t i = 0; i < amount; i++) { | ||
| 90 | CallPullerMethod(GPU::MethodCall{ | ||
| 91 | method, | ||
| 92 | base_start[i], | ||
| 93 | subchannel, | ||
| 94 | methods_pending - static_cast<u32>(i), | ||
| 95 | }); | ||
| 96 | } | ||
| 97 | } | 83 | } |
| 84 | host1x_channel = CreateChannel(0); | ||
| 85 | host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system); | ||
| 86 | InitChannel(*host1x_channel); | ||
| 87 | } | ||
| 88 | |||
| 89 | /// Binds a renderer to the GPU. | ||
| 90 | void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | ||
| 91 | renderer = std::move(renderer_); | ||
| 92 | rasterizer = renderer->ReadRasterizer(); | ||
| 98 | } | 93 | } |
| 99 | 94 | ||
| 100 | /// Flush all current written commands into the host GPU for execution. | 95 | /// Flush all current written commands into the host GPU for execution. |
| @@ -146,42 +141,44 @@ struct GPU::Impl { | |||
| 146 | 141 | ||
| 147 | /// Returns a reference to the Maxwell3D GPU engine. | 142 | /// Returns a reference to the Maxwell3D GPU engine. |
| 148 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { | 143 | [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { |
| 149 | return *maxwell_3d; | 144 | ASSERT(current_channel); |
| 145 | return *current_channel->maxwell_3d; | ||
| 150 | } | 146 | } |
| 151 | 147 | ||
| 152 | /// Returns a const reference to the Maxwell3D GPU engine. | 148 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 153 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { | 149 | [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { |
| 154 | return *maxwell_3d; | 150 | ASSERT(current_channel); |
| 151 | return *current_channel->maxwell_3d; | ||
| 155 | } | 152 | } |
| 156 | 153 | ||
| 157 | /// Returns a reference to the KeplerCompute GPU engine. | 154 | /// Returns a reference to the KeplerCompute GPU engine. |
| 158 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { | 155 | [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { |
| 159 | return *kepler_compute; | 156 | ASSERT(current_channel); |
| 157 | return *current_channel->kepler_compute; | ||
| 160 | } | 158 | } |
| 161 | 159 | ||
| 162 | /// Returns a reference to the KeplerCompute GPU engine. | 160 | /// Returns a reference to the KeplerCompute GPU engine. |
| 163 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { | 161 | [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { |
| 164 | return *kepler_compute; | 162 | ASSERT(current_channel); |
| 163 | return *current_channel->kepler_compute; | ||
| 165 | } | 164 | } |
| 166 | 165 | ||
| 167 | /// Returns a reference to the GPU memory manager. | 166 | /// Returns a reference to the GPU memory manager. |
| 168 | [[nodiscard]] Tegra::MemoryManager& MemoryManager() { | 167 | [[nodiscard]] Tegra::MemoryManager& MemoryManager() { |
| 169 | return *memory_manager; | 168 | CreateHost1xChannel(); |
| 170 | } | 169 | return *host1x_channel->memory_manager; |
| 171 | |||
| 172 | /// Returns a const reference to the GPU memory manager. | ||
| 173 | [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const { | ||
| 174 | return *memory_manager; | ||
| 175 | } | 170 | } |
| 176 | 171 | ||
| 177 | /// Returns a reference to the GPU DMA pusher. | 172 | /// Returns a reference to the GPU DMA pusher. |
| 178 | [[nodiscard]] Tegra::DmaPusher& DmaPusher() { | 173 | [[nodiscard]] Tegra::DmaPusher& DmaPusher() { |
| 179 | return *dma_pusher; | 174 | ASSERT(current_channel); |
| 175 | return *current_channel->dma_pusher; | ||
| 180 | } | 176 | } |
| 181 | 177 | ||
| 182 | /// Returns a const reference to the GPU DMA pusher. | 178 | /// Returns a const reference to the GPU DMA pusher. |
| 183 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { | 179 | [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { |
| 184 | return *dma_pusher; | 180 | ASSERT(current_channel); |
| 181 | return *current_channel->dma_pusher; | ||
| 185 | } | 182 | } |
| 186 | 183 | ||
| 187 | /// Returns a reference to the underlying renderer. | 184 | /// Returns a reference to the underlying renderer. |
| @@ -306,7 +303,7 @@ struct GPU::Impl { | |||
| 306 | /// This can be used to launch any necessary threads and register any necessary | 303 | /// This can be used to launch any necessary threads and register any necessary |
| 307 | /// core timing events. | 304 | /// core timing events. |
| 308 | void Start() { | 305 | void Start() { |
| 309 | gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); | 306 | gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); |
| 310 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); | 307 | cpu_context = renderer->GetRenderWindow().CreateSharedContext(); |
| 311 | cpu_context->MakeCurrent(); | 308 | cpu_context->MakeCurrent(); |
| 312 | } | 309 | } |
| @@ -328,8 +325,8 @@ struct GPU::Impl { | |||
| 328 | } | 325 | } |
| 329 | 326 | ||
| 330 | /// Push GPU command entries to be processed | 327 | /// Push GPU command entries to be processed |
| 331 | void PushGPUEntries(Tegra::CommandList&& entries) { | 328 | void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { |
| 332 | gpu_thread.SubmitList(std::move(entries)); | 329 | gpu_thread.SubmitList(channel, std::move(entries)); |
| 333 | } | 330 | } |
| 334 | 331 | ||
| 335 | /// Push GPU command buffer entries to be processed | 332 | /// Push GPU command buffer entries to be processed |
| @@ -381,303 +378,16 @@ struct GPU::Impl { | |||
| 381 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 378 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 382 | } | 379 | } |
| 383 | 380 | ||
| 384 | void ProcessBindMethod(const GPU::MethodCall& method_call) { | ||
| 385 | // Bind the current subchannel to the desired engine id. | ||
| 386 | LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, | ||
| 387 | method_call.argument); | ||
| 388 | const auto engine_id = static_cast<EngineID>(method_call.argument); | ||
| 389 | bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id); | ||
| 390 | switch (engine_id) { | ||
| 391 | case EngineID::FERMI_TWOD_A: | ||
| 392 | dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel); | ||
| 393 | break; | ||
| 394 | case EngineID::MAXWELL_B: | ||
| 395 | dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel); | ||
| 396 | break; | ||
| 397 | case EngineID::KEPLER_COMPUTE_B: | ||
| 398 | dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel); | ||
| 399 | break; | ||
| 400 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 401 | dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel); | ||
| 402 | break; | ||
| 403 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 404 | dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel); | ||
| 405 | break; | ||
| 406 | default: | ||
| 407 | UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | void ProcessFenceActionMethod() { | ||
| 412 | switch (regs.fence_action.op) { | ||
| 413 | case GPU::FenceOperation::Acquire: | ||
| 414 | WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); | ||
| 415 | break; | ||
| 416 | case GPU::FenceOperation::Increment: | ||
| 417 | IncrementSyncPoint(regs.fence_action.syncpoint_id); | ||
| 418 | break; | ||
| 419 | default: | ||
| 420 | UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | void ProcessWaitForInterruptMethod() { | ||
| 425 | // TODO(bunnei) ImplementMe | ||
| 426 | LOG_WARNING(HW_GPU, "(STUBBED) called"); | ||
| 427 | } | ||
| 428 | |||
| 429 | void ProcessSemaphoreTriggerMethod() { | ||
| 430 | const auto semaphoreOperationMask = 0xF; | ||
| 431 | const auto op = | ||
| 432 | static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); | ||
| 433 | if (op == GpuSemaphoreOperation::WriteLong) { | ||
| 434 | struct Block { | ||
| 435 | u32 sequence; | ||
| 436 | u32 zeros = 0; | ||
| 437 | u64 timestamp; | ||
| 438 | }; | ||
| 439 | |||
| 440 | Block block{}; | ||
| 441 | block.sequence = regs.semaphore_sequence; | ||
| 442 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | ||
| 443 | // CoreTiming | ||
| 444 | block.timestamp = GetTicks(); | ||
| 445 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | ||
| 446 | sizeof(block)); | ||
| 447 | } else { | ||
| 448 | const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||
| 449 | if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || | ||
| 450 | (op == GpuSemaphoreOperation::AcquireGequal && | ||
| 451 | static_cast<s32>(word - regs.semaphore_sequence) > 0) || | ||
| 452 | (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { | ||
| 453 | // Nothing to do in this case | ||
| 454 | } else { | ||
| 455 | regs.acquire_source = true; | ||
| 456 | regs.acquire_value = regs.semaphore_sequence; | ||
| 457 | if (op == GpuSemaphoreOperation::AcquireEqual) { | ||
| 458 | regs.acquire_active = true; | ||
| 459 | regs.acquire_mode = false; | ||
| 460 | } else if (op == GpuSemaphoreOperation::AcquireGequal) { | ||
| 461 | regs.acquire_active = true; | ||
| 462 | regs.acquire_mode = true; | ||
| 463 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||
| 464 | // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with | ||
| 465 | // semaphore_sequence, gives a non-0 result | ||
| 466 | LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); | ||
| 467 | } else { | ||
| 468 | LOG_ERROR(HW_GPU, "Invalid semaphore operation"); | ||
| 469 | } | ||
| 470 | } | ||
| 471 | } | ||
| 472 | } | ||
| 473 | |||
| 474 | void ProcessSemaphoreRelease() { | ||
| 475 | memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(), | ||
| 476 | regs.semaphore_release); | ||
| 477 | } | ||
| 478 | |||
| 479 | void ProcessSemaphoreAcquire() { | ||
| 480 | const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||
| 481 | const auto value = regs.semaphore_acquire; | ||
| 482 | if (word != value) { | ||
| 483 | regs.acquire_active = true; | ||
| 484 | regs.acquire_value = value; | ||
| 485 | // TODO(kemathe73) figure out how to do the acquire_timeout | ||
| 486 | regs.acquire_mode = false; | ||
| 487 | regs.acquire_source = false; | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | /// Calls a GPU puller method. | ||
| 492 | void CallPullerMethod(const GPU::MethodCall& method_call) { | ||
| 493 | regs.reg_array[method_call.method] = method_call.argument; | ||
| 494 | const auto method = static_cast<BufferMethods>(method_call.method); | ||
| 495 | |||
| 496 | switch (method) { | ||
| 497 | case BufferMethods::BindObject: { | ||
| 498 | ProcessBindMethod(method_call); | ||
| 499 | break; | ||
| 500 | } | ||
| 501 | case BufferMethods::Nop: | ||
| 502 | case BufferMethods::SemaphoreAddressHigh: | ||
| 503 | case BufferMethods::SemaphoreAddressLow: | ||
| 504 | case BufferMethods::SemaphoreSequence: | ||
| 505 | break; | ||
| 506 | case BufferMethods::UnkCacheFlush: | ||
| 507 | rasterizer->SyncGuestHost(); | ||
| 508 | break; | ||
| 509 | case BufferMethods::WrcacheFlush: | ||
| 510 | rasterizer->SignalReference(); | ||
| 511 | break; | ||
| 512 | case BufferMethods::FenceValue: | ||
| 513 | break; | ||
| 514 | case BufferMethods::RefCnt: | ||
| 515 | rasterizer->SignalReference(); | ||
| 516 | break; | ||
| 517 | case BufferMethods::FenceAction: | ||
| 518 | ProcessFenceActionMethod(); | ||
| 519 | break; | ||
| 520 | case BufferMethods::WaitForInterrupt: | ||
| 521 | rasterizer->WaitForIdle(); | ||
| 522 | break; | ||
| 523 | case BufferMethods::SemaphoreTrigger: { | ||
| 524 | ProcessSemaphoreTriggerMethod(); | ||
| 525 | break; | ||
| 526 | } | ||
| 527 | case BufferMethods::NotifyIntr: { | ||
| 528 | // TODO(Kmather73): Research and implement this method. | ||
| 529 | LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); | ||
| 530 | break; | ||
| 531 | } | ||
| 532 | case BufferMethods::Unk28: { | ||
| 533 | // TODO(Kmather73): Research and implement this method. | ||
| 534 | LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); | ||
| 535 | break; | ||
| 536 | } | ||
| 537 | case BufferMethods::SemaphoreAcquire: { | ||
| 538 | ProcessSemaphoreAcquire(); | ||
| 539 | break; | ||
| 540 | } | ||
| 541 | case BufferMethods::SemaphoreRelease: { | ||
| 542 | ProcessSemaphoreRelease(); | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | case BufferMethods::Yield: { | ||
| 546 | // TODO(Kmather73): Research and implement this method. | ||
| 547 | LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented"); | ||
| 548 | break; | ||
| 549 | } | ||
| 550 | default: | ||
| 551 | LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method); | ||
| 552 | break; | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 556 | /// Calls a GPU engine method. | ||
| 557 | void CallEngineMethod(const GPU::MethodCall& method_call) { | ||
| 558 | const EngineID engine = bound_engines[method_call.subchannel]; | ||
| 559 | |||
| 560 | switch (engine) { | ||
| 561 | case EngineID::FERMI_TWOD_A: | ||
| 562 | fermi_2d->CallMethod(method_call.method, method_call.argument, | ||
| 563 | method_call.IsLastCall()); | ||
| 564 | break; | ||
| 565 | case EngineID::MAXWELL_B: | ||
| 566 | maxwell_3d->CallMethod(method_call.method, method_call.argument, | ||
| 567 | method_call.IsLastCall()); | ||
| 568 | break; | ||
| 569 | case EngineID::KEPLER_COMPUTE_B: | ||
| 570 | kepler_compute->CallMethod(method_call.method, method_call.argument, | ||
| 571 | method_call.IsLastCall()); | ||
| 572 | break; | ||
| 573 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 574 | maxwell_dma->CallMethod(method_call.method, method_call.argument, | ||
| 575 | method_call.IsLastCall()); | ||
| 576 | break; | ||
| 577 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 578 | kepler_memory->CallMethod(method_call.method, method_call.argument, | ||
| 579 | method_call.IsLastCall()); | ||
| 580 | break; | ||
| 581 | default: | ||
| 582 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 583 | } | ||
| 584 | } | ||
| 585 | |||
| 586 | /// Calls a GPU engine multivalue method. | ||
| 587 | void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||
| 588 | u32 methods_pending) { | ||
| 589 | const EngineID engine = bound_engines[subchannel]; | ||
| 590 | |||
| 591 | switch (engine) { | ||
| 592 | case EngineID::FERMI_TWOD_A: | ||
| 593 | fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 594 | break; | ||
| 595 | case EngineID::MAXWELL_B: | ||
| 596 | maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 597 | break; | ||
| 598 | case EngineID::KEPLER_COMPUTE_B: | ||
| 599 | kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 600 | break; | ||
| 601 | case EngineID::MAXWELL_DMA_COPY_A: | ||
| 602 | maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 603 | break; | ||
| 604 | case EngineID::KEPLER_INLINE_TO_MEMORY_B: | ||
| 605 | kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); | ||
| 606 | break; | ||
| 607 | default: | ||
| 608 | UNIMPLEMENTED_MSG("Unimplemented engine"); | ||
| 609 | } | ||
| 610 | } | ||
| 611 | |||
| 612 | /// Determines where the method should be executed. | ||
| 613 | [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) { | ||
| 614 | const auto buffer_method = static_cast<BufferMethods>(method); | ||
| 615 | return buffer_method >= BufferMethods::NonPullerMethods; | ||
| 616 | } | ||
| 617 | |||
| 618 | struct Regs { | ||
| 619 | static constexpr size_t NUM_REGS = 0x40; | ||
| 620 | |||
| 621 | union { | ||
| 622 | struct { | ||
| 623 | INSERT_PADDING_WORDS_NOINIT(0x4); | ||
| 624 | struct { | ||
| 625 | u32 address_high; | ||
| 626 | u32 address_low; | ||
| 627 | |||
| 628 | [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||
| 629 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 630 | address_low); | ||
| 631 | } | ||
| 632 | } semaphore_address; | ||
| 633 | |||
| 634 | u32 semaphore_sequence; | ||
| 635 | u32 semaphore_trigger; | ||
| 636 | INSERT_PADDING_WORDS_NOINIT(0xC); | ||
| 637 | |||
| 638 | // The pusher and the puller share the reference counter, the pusher only has read | ||
| 639 | // access | ||
| 640 | u32 reference_count; | ||
| 641 | INSERT_PADDING_WORDS_NOINIT(0x5); | ||
| 642 | |||
| 643 | u32 semaphore_acquire; | ||
| 644 | u32 semaphore_release; | ||
| 645 | u32 fence_value; | ||
| 646 | GPU::FenceAction fence_action; | ||
| 647 | INSERT_PADDING_WORDS_NOINIT(0xE2); | ||
| 648 | |||
| 649 | // Puller state | ||
| 650 | u32 acquire_mode; | ||
| 651 | u32 acquire_source; | ||
| 652 | u32 acquire_active; | ||
| 653 | u32 acquire_timeout; | ||
| 654 | u32 acquire_value; | ||
| 655 | }; | ||
| 656 | std::array<u32, NUM_REGS> reg_array; | ||
| 657 | }; | ||
| 658 | } regs{}; | ||
| 659 | |||
| 660 | GPU& gpu; | 381 | GPU& gpu; |
| 661 | Core::System& system; | 382 | Core::System& system; |
| 662 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 383 | |
| 663 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||
| 664 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; | 384 | std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; |
| 665 | std::unique_ptr<VideoCore::RendererBase> renderer; | 385 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 666 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 386 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 667 | const bool use_nvdec; | 387 | const bool use_nvdec; |
| 668 | 388 | ||
| 669 | /// Mapping of command subchannels to their bound engine ids | 389 | std::shared_ptr<Control::ChannelState> host1x_channel; |
| 670 | std::array<EngineID, 8> bound_engines{}; | 390 | s32 new_channel_id{1}; |
| 671 | /// 3D engine | ||
| 672 | std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||
| 673 | /// 2D engine | ||
| 674 | std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||
| 675 | /// Compute engine | ||
| 676 | std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||
| 677 | /// DMA engine | ||
| 678 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||
| 679 | /// Inline memory engine | ||
| 680 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||
| 681 | /// Shader build notifier | 391 | /// Shader build notifier |
| 682 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | 392 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; |
| 683 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks | 393 | /// When true, we are about to shut down emulation session, so terminate outstanding tasks |
| @@ -710,33 +420,10 @@ struct GPU::Impl { | |||
| 710 | VideoCommon::GPUThread::ThreadManager gpu_thread; | 420 | VideoCommon::GPUThread::ThreadManager gpu_thread; |
| 711 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | 421 | std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; |
| 712 | 422 | ||
| 713 | #define ASSERT_REG_POSITION(field_name, position) \ | 423 | std::unique_ptr<Tegra::Control::Scheduler> scheduler; |
| 714 | static_assert(offsetof(Regs, field_name) == position * 4, \ | 424 | std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; |
| 715 | "Field " #field_name " has invalid position") | 425 | Tegra::Control::ChannelState* current_channel; |
| 716 | 426 | s32 bound_channel{-1}; | |
| 717 | ASSERT_REG_POSITION(semaphore_address, 0x4); | ||
| 718 | ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||
| 719 | ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||
| 720 | ASSERT_REG_POSITION(reference_count, 0x14); | ||
| 721 | ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||
| 722 | ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||
| 723 | ASSERT_REG_POSITION(fence_value, 0x1C); | ||
| 724 | ASSERT_REG_POSITION(fence_action, 0x1D); | ||
| 725 | |||
| 726 | ASSERT_REG_POSITION(acquire_mode, 0x100); | ||
| 727 | ASSERT_REG_POSITION(acquire_source, 0x101); | ||
| 728 | ASSERT_REG_POSITION(acquire_active, 0x102); | ||
| 729 | ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||
| 730 | ASSERT_REG_POSITION(acquire_value, 0x104); | ||
| 731 | |||
| 732 | #undef ASSERT_REG_POSITION | ||
| 733 | |||
| 734 | enum class GpuSemaphoreOperation { | ||
| 735 | AcquireEqual = 0x1, | ||
| 736 | WriteLong = 0x2, | ||
| 737 | AcquireGequal = 0x4, | ||
| 738 | AcquireMask = 0x8, | ||
| 739 | }; | ||
| 740 | }; | 427 | }; |
| 741 | 428 | ||
| 742 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | 429 | GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) |
| @@ -744,17 +431,24 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) | |||
| 744 | 431 | ||
| 745 | GPU::~GPU() = default; | 432 | GPU::~GPU() = default; |
| 746 | 433 | ||
| 747 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | 434 | std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() { |
| 748 | impl->BindRenderer(std::move(renderer)); | 435 | return impl->AllocateChannel(); |
| 436 | } | ||
| 437 | |||
| 438 | void GPU::InitChannel(Control::ChannelState& to_init) { | ||
| 439 | impl->InitChannel(to_init); | ||
| 749 | } | 440 | } |
| 750 | 441 | ||
| 751 | void GPU::CallMethod(const MethodCall& method_call) { | 442 | void GPU::BindChannel(s32 channel_id) { |
| 752 | impl->CallMethod(method_call); | 443 | impl->BindChannel(channel_id); |
| 753 | } | 444 | } |
| 754 | 445 | ||
| 755 | void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | 446 | void GPU::ReleaseChannel(Control::ChannelState& to_release) { |
| 756 | u32 methods_pending) { | 447 | impl->ReleaseChannel(to_release); |
| 757 | impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending); | 448 | } |
| 449 | |||
| 450 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | ||
| 451 | impl->BindRenderer(std::move(renderer)); | ||
| 758 | } | 452 | } |
| 759 | 453 | ||
| 760 | void GPU::FlushCommands() { | 454 | void GPU::FlushCommands() { |
| @@ -881,8 +575,8 @@ void GPU::ReleaseContext() { | |||
| 881 | impl->ReleaseContext(); | 575 | impl->ReleaseContext(); |
| 882 | } | 576 | } |
| 883 | 577 | ||
| 884 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | 578 | void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) { |
| 885 | impl->PushGPUEntries(std::move(entries)); | 579 | impl->PushGPUEntries(channel, std::move(entries)); |
| 886 | } | 580 | } |
| 887 | 581 | ||
| 888 | void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { | 582 | void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { |