From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 26 Oct 2020 23:07:36 -0400 Subject: video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com> --- src/video_core/gpu_thread.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/video_core/gpu_thread.cpp') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index bf761abf2..4b8f58283 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -18,7 +18,7 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, - SynchState& state) { + SynchState& state, Tegra::CDmaPusher& cdma_pusher) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); Common::SetCurrentThreadName(name.c_str()); @@ -42,6 +42,10 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, if (const auto submit_list = std::get_if(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); + } else if (const auto command_list = std::get_if(&next.data)) { + // NVDEC + cdma_pusher.Push(std::move(command_list->entries)); + cdma_pusher.DispatchCalls(); } else if (const auto data = std::get_if(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative(next.data)) { @@ -75,15 +79,19 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, - Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(system), std::ref(renderer), - std::ref(context), std::ref(dma_pusher), std::ref(state)}; + Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { + thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), + std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { PushCommand(SubmitListCommand(std::move(entries))); } +void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) { + PushCommand(SubmitChCommandEntries(std::move(entries))); +} + void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); } -- cgit v1.2.3 From 677a8b208d47d0d2397197ce74c7039a8ea79d20 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 4 Dec 2020 14:39:12 -0500 Subject: video_core: Resolve more variable shadowing scenarios Resolves variable shadowing scenarios up to the end of the OpenGL code to make it nicer to review. The rest will be resolved in a following commit. --- src/video_core/gpu_thread.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/video_core/gpu_thread.cpp') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4b8f58283..e27218b96 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -39,23 +39,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, CommandDataContainer next; while (state.is_running) { next = state.queue.PopWait(); - if (const auto submit_list = std::get_if(&next.data)) { + if (auto* submit_list = std::get_if(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); - } else if (const auto command_list = std::get_if(&next.data)) { + } else if (auto* command_list = std::get_if(&next.data)) { // NVDEC cdma_pusher.Push(std::move(command_list->entries)); cdma_pusher.DispatchCalls(); - } else if (const auto data = std::get_if(&next.data)) { + } else if (const auto* data = std::get_if(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative(next.data)) { renderer.Rasterizer().ReleaseFences(); } else if (std::holds_alternative(next.data)) { system.GPU().TickWork(); - } else if (const auto data = std::get_if(&next.data)) { - renderer.Rasterizer().FlushRegion(data->addr, data->size); - } else if (const auto data = std::get_if(&next.data)) { - renderer.Rasterizer().OnCPUWrite(data->addr, data->size); + } else if (const auto* flush = std::get_if(&next.data)) { + renderer.Rasterizer().FlushRegion(flush->addr, flush->size); + } else if (const auto* invalidate = std::get_if(&next.data)) { + renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative(next.data)) { return; } else { @@ -65,7 +65,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } } -ThreadManager::ThreadManager(Core::System& system) : system{system} {} +ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} ThreadManager::~ThreadManager() { if (!thread.joinable()) { -- cgit v1.2.3 From 40571c073faa02a6a4301e7f0ce365ef50a400aa Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 12 Dec 2020 00:24:33 -0800 Subject: video_core: gpu: Implement synchronous mode using threaded GPU. --- src/video_core/gpu_thread.cpp | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'src/video_core/gpu_thread.cpp') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index e27218b96..56b9621b1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -65,7 +65,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } } -ThreadManager::ThreadManager(Core::System& system_) : system{system_} {} +ThreadManager::ThreadManager(Core::System& system_, bool is_async_) + : system{system_}, is_async{is_async_} {} ThreadManager::~ThreadManager() { if (!thread.joinable()) { @@ -97,19 +98,30 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { } void ThreadManager::FlushRegion(VAddr addr, u64 size) { - if (!Settings::IsGPULevelHigh()) { + if (!is_async) { + // Always flush with synchronous GPU mode PushCommand(FlushRegionCommand(addr, size)); return; } - if (!Settings::IsGPULevelExtreme()) { - return; - } - if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { + + // Asynchronous GPU mode + switch (Settings::values.gpu_accuracy.GetValue()) { + case Settings::GPUAccuracy::Normal: + PushCommand(FlushRegionCommand(addr, size)); + break; + case Settings::GPUAccuracy::High: + // TODO(bunnei): Is this right? Preserving existing behavior for now + break; + case Settings::GPUAccuracy::Extreme: { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); PushCommand(GPUTickCommand()); while (fence > gpu.CurrentFlushRequestFence()) { } + break; + } + default: + UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); } } @@ -134,6 +146,12 @@ void ThreadManager::OnCommandListEnd() { u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); + + if (!is_async) { + // In synchronous GPU mode, block the caller until the command has executed + WaitIdle(); + } + return fence; } -- cgit v1.2.3 From 4991620f899ce21bcde1e57f585fee4081e053d0 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 12 Dec 2020 01:37:06 -0800 Subject: video_core: gpu_thread: Do not wait when system is powered down. --- src/video_core/gpu_thread.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/video_core/gpu_thread.cpp') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 56b9621b1..1e95d80c3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -135,7 +135,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && + system.IsPoweredOn()) { } } -- cgit v1.2.3 From 954341763a3d8e0b9734fc2234368c40d65bace4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Thu, 24 Dec 2020 23:28:46 -0800 Subject: gpu: gpu_thread: Ensure MicroProfile is shutdown on exit. --- src/video_core/gpu_thread.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/video_core/gpu_thread.cpp') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1e95d80c3..7e490bcc3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" @@ -21,6 +22,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, SynchState& state, Tegra::CDmaPusher& cdma_pusher) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); + SCOPE_EXIT({ MicroProfileOnThreadExit(); }); + Common::SetCurrentThreadName(name.c_str()); Common::SetCurrentThreadPriority(Common::ThreadPriority::High); system.RegisterHostThread(); -- cgit v1.2.3