From eb67a45ca82bc01ac843c853fd3c17f2a90e0250 Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 26 Oct 2020 23:07:36 -0400 Subject: video_core: NVDEC Implementation This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com> --- src/video_core/cdma_pusher.cpp | 171 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 src/video_core/cdma_pusher.cpp (limited to 'src/video_core/cdma_pusher.cpp') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp new file mode 100644 index 000000000..d774db107 --- /dev/null +++ b/src/video_core/cdma_pusher.cpp @@ -0,0 +1,171 @@ +// MIT License +// +// Copyright (c) Ryujinx Team and Contributors +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +// associated documentation files (the "Software"), to deal in the Software without restriction, +// including without limitation the rights to use, copy, modify, merge, publish, distribute, +// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// + +#include "command_classes/host1x.h" +#include "command_classes/nvdec.h" +#include "command_classes/vic.h" +#include "common/bit_util.h" +#include "video_core/cdma_pusher.h" +#include "video_core/command_classes/nvdec_common.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/memory_manager.h" + +namespace Tegra { +CDmaPusher::CDmaPusher(GPU& gpu) + : gpu(gpu), nvdec_processor(std::make_shared(gpu)), + vic_processor(std::make_unique(gpu, nvdec_processor)), + host1x_processor(std::make_unique(gpu)), + nvdec_sync(std::make_unique(gpu)), + vic_sync(std::make_unique(gpu)) {} + +CDmaPusher::~CDmaPusher() = default; + +void CDmaPusher::Push(ChCommandHeaderList&& entries) { + cdma_queue.push(std::move(entries)); +} + +void CDmaPusher::DispatchCalls() { + while (!cdma_queue.empty()) { + Step(); + } +} + +void CDmaPusher::Step() { + const auto entries{cdma_queue.front()}; + cdma_queue.pop(); + + std::vector values(entries.size()); + std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32)); + + for (const u32 value : values) { + if (mask != 0) { + const u32 lbs = Common::CountTrailingZeroes32(mask); + mask &= ~(1U << lbs); + ExecuteCommand(static_cast(offset + lbs), value); + continue; + } else if (count != 0) { + --count; + ExecuteCommand(static_cast(offset), value); + if (incrementing) { + ++offset; + } + continue; + } + const auto mode = static_cast((value >> 28) & 0xf); + switch (mode) { + case ChSubmissionMode::SetClass: { + mask = value & 0x3f; + offset = (value >> 16) & 0xfff; + current_class = static_cast((value >> 6) & 0x3ff); + break; + } + case ChSubmissionMode::Incrementing: + case ChSubmissionMode::NonIncrementing: + count = value & 0xffff; + offset = (value >> 16) & 0xfff; + incrementing = mode == ChSubmissionMode::Incrementing; + break; + case ChSubmissionMode::Mask: + mask = value & 0xffff; + offset = (value >> 16) & 0xfff; + break; + case ChSubmissionMode::Immediate: { + const u32 data = value & 0xfff; + offset = (value >> 16) & 0xfff; + ExecuteCommand(static_cast(offset), data); + break; + } + default: + UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast(mode)); + break; + } + } +} + +void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { + switch (current_class) { + case ChClassId::NvDec: + ThiStateWrite(nvdec_thi_state, offset, {data}); + switch (static_cast(offset)) { + case ThiMethod::IncSyncpt: { + LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); + const auto syncpoint_id = static_cast(data & 0xFF); + const auto cond = static_cast((data >> 8) & 0xFF); + if (cond == 0) { + nvdec_sync->Increment(syncpoint_id); + } else { + nvdec_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); + nvdec_sync->SignalDone(syncpoint_id); + } + break; + } + case ThiMethod::SetMethod1: + LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", + static_cast(nvdec_thi_state.method_0)); + nvdec_processor->ProcessMethod( + static_cast(nvdec_thi_state.method_0), {data}); + break; + default: + break; + } + break; + case ChClassId::GraphicsVic: + ThiStateWrite(vic_thi_state, static_cast(offset), {data}); + switch (static_cast(offset)) { + case ThiMethod::IncSyncpt: { + LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); + const auto syncpoint_id = static_cast(data & 0xFF); + const auto cond = static_cast((data >> 8) & 0xFF); + if (cond == 0) { + vic_sync->Increment(syncpoint_id); + } else { + vic_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); + vic_sync->SignalDone(syncpoint_id); + } + break; + } + case ThiMethod::SetMethod1: + LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", + static_cast(vic_thi_state.method_0)); + vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), + {data}); + break; + default: + break; + } + break; + case ChClassId::Host1x: + // This device is mainly for syncpoint synchronization + LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); + host1x_processor->ProcessMethod(static_cast(offset), {data}); + break; + default: + UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast(current_class)); + break; + } +} + +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector& arguments) { + u8* const state_offset = reinterpret_cast(&state) + sizeof(u32) * offset; + std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); +} + +} // namespace Tegra -- cgit v1.2.3 From 94eca09cf6541634a885526cf0a850fc4fb1e56a Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 28 Oct 2020 16:03:35 -0700 Subject: video_core: cdma_pusher: Add missing LOG_DEBUG field in ExecuteCommand. --- src/video_core/cdma_pusher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/cdma_pusher.cpp') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index d774db107..b60f86260 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -144,7 +144,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { } case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", - static_cast(vic_thi_state.method_0)); + static_cast(vic_thi_state.method_0), data); vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), {data}); break; -- cgit v1.2.3 From 677a8b208d47d0d2397197ce74c7039a8ea79d20 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 4 Dec 2020 14:39:12 -0500 Subject: video_core: Resolve more variable shadowing scenarios Resolves variable shadowing scenarios up to the end of the OpenGL code to make it nicer to review. The rest will be resolved in a following commit. --- src/video_core/cdma_pusher.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/video_core/cdma_pusher.cpp') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index b60f86260..e3e7432f7 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -29,8 +29,8 @@ #include "video_core/memory_manager.h" namespace Tegra { -CDmaPusher::CDmaPusher(GPU& gpu) - : gpu(gpu), nvdec_processor(std::make_shared(gpu)), +CDmaPusher::CDmaPusher(GPU& gpu_) + : gpu{gpu_}, nvdec_processor(std::make_shared(gpu)), vic_processor(std::make_unique(gpu, nvdec_processor)), host1x_processor(std::make_unique(gpu)), nvdec_sync(std::make_unique(gpu)), @@ -100,11 +100,11 @@ void CDmaPusher::Step() { } } -void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { +void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { switch (current_class) { case ChClassId::NvDec: - ThiStateWrite(nvdec_thi_state, offset, {data}); - switch (static_cast(offset)) { + ThiStateWrite(nvdec_thi_state, state_offset, {data}); + switch (static_cast(state_offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method"); const auto syncpoint_id = static_cast(data & 0xFF); @@ -120,16 +120,16 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod( - static_cast(nvdec_thi_state.method_0), {data}); + nvdec_processor->ProcessMethod(static_cast(nvdec_thi_state.method_0), + {data}); break; default: break; } break; case ChClassId::GraphicsVic: - ThiStateWrite(vic_thi_state, static_cast(offset), {data}); - switch (static_cast(offset)) { + ThiStateWrite(vic_thi_state, static_cast(state_offset), {data}); + switch (static_cast(state_offset)) { case ThiMethod::IncSyncpt: { LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method"); const auto syncpoint_id = static_cast(data & 0xFF); @@ -145,8 +145,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", static_cast(vic_thi_state.method_0), data); - vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), - {data}); + vic_processor->ProcessMethod(static_cast(vic_thi_state.method_0), {data}); break; default: break; @@ -155,7 +154,7 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { case ChClassId::Host1x: // This device is mainly for syncpoint synchronization LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); - host1x_processor->ProcessMethod(static_cast(offset), {data}); + host1x_processor->ProcessMethod(static_cast(state_offset), {data}); break; default: UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast(current_class)); @@ -163,9 +162,10 @@ void CDmaPusher::ExecuteCommand(u32 offset, u32 data) { } } -void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector& arguments) { - u8* const state_offset = reinterpret_cast(&state) + sizeof(u32) * offset; - std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size()); +void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 state_offset, + const std::vector& arguments) { + u8* const state_offset_ptr = reinterpret_cast(&state) + sizeof(u32) * state_offset; + std::memcpy(state_offset_ptr, arguments.data(), sizeof(u32) * arguments.size()); } } // namespace Tegra -- cgit v1.2.3 From 2c27127d04a155fe0f893e84263d58f14473785d Mon Sep 17 00:00:00 2001 From: ameerj Date: Mon, 28 Dec 2020 01:02:06 -0500 Subject: nvdec syncpt incorporation laying the groundwork for async gpu, although this does not fully implement async nvdec operations --- src/video_core/cdma_pusher.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/video_core/cdma_pusher.cpp') diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index e3e7432f7..94679d5d1 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_) : gpu{gpu_}, nvdec_processor(std::make_shared(gpu)), vic_processor(std::make_unique(gpu, nvdec_processor)), host1x_processor(std::make_unique(gpu)), - nvdec_sync(std::make_unique(gpu)), - vic_sync(std::make_unique(gpu)) {} + sync_manager(std::make_unique(gpu)) {} CDmaPusher::~CDmaPusher() = default; @@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); if (cond == 0) { - nvdec_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - nvdec_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); - nvdec_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast(current_class), syncpoint_id)); } break; } @@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { const auto syncpoint_id = static_cast(data & 0xFF); const auto cond = static_cast((data >> 8) & 0xFF); if (cond == 0) { - vic_sync->Increment(syncpoint_id); + sync_manager->Increment(syncpoint_id); } else { - vic_sync->IncrementWhenDone(static_cast(current_class), syncpoint_id); - vic_sync->SignalDone(syncpoint_id); + sync_manager->SignalDone( + sync_manager->IncrementWhenDone(static_cast(current_class), syncpoint_id)); } break; } -- cgit v1.2.3