video_core: NVDEC Implementation

This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com>
author: ameerj 2020-10-26 23:07:36 -0400
committer: ameerj 2020-10-26 23:07:36 -0400
commit: eb67a45ca82bc01ac843c853fd3c17f2a90e0250 (patch)
tree: 11e78a1b728ef0a608fae43d966b613eb4e6d58a /src/video_core/cdma_pusher.cpp
parent: Merge pull request #4827 from lioncash/trunc (diff)
download: yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.gz
yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.xz
yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.zip
1 files changed, 171 insertions, 0 deletions
diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp
new file mode 100644
index 000000000..d774db107
--- /dev/null
+++ b/src/video_core/cdma_pusher.cpp
@@ -0,0 +1,171 @@
+// MIT License
+//
+// Copyright (c) Ryujinx Team and Contributors
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
+// associated documentation files (the "Software"), to deal in the Software without restriction,
+// including without limitation the rights to use, copy, modify, merge, publish, distribute,
+// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or
+// substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
+// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+#include "command_classes/host1x.h"
+#include "command_classes/nvdec.h"
+#include "command_classes/vic.h"
+#include "common/bit_util.h"
+#include "video_core/cdma_pusher.h"
+#include "video_core/command_classes/nvdec_common.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+namespace Tegra {
+CDmaPusher::CDmaPusher(GPU& gpu)
+    : gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)),
+      vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
+      host1x_processor(std::make_unique<Host1x>(gpu)),
+      nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
+      vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
+CDmaPusher::~CDmaPusher() = default;
+void CDmaPusher::Push(ChCommandHeaderList&& entries) {
+    cdma_queue.push(std::move(entries));
+}
+void CDmaPusher::DispatchCalls() {
+    while (!cdma_queue.empty()) {
+        Step();
+    }
+}
+void CDmaPusher::Step() {
+    const auto entries{cdma_queue.front()};
+    cdma_queue.pop();
+    std::vector<u32> values(entries.size());
+    std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
+    for (const u32 value : values) {
+        if (mask != 0) {
+            const u32 lbs = Common::CountTrailingZeroes32(mask);
+            mask &= ~(1U << lbs);
+            ExecuteCommand(static_cast<u32>(offset + lbs), value);
+            continue;
+        } else if (count != 0) {
+            --count;
+            ExecuteCommand(static_cast<u32>(offset), value);
+            if (incrementing) {
+                ++offset;
+            }
+            continue;
+        }
+        const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
+        switch (mode) {
+        case ChSubmissionMode::SetClass: {
+            mask = value & 0x3f;
+            offset = (value >> 16) & 0xfff;
+            current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
+            break;
+        }
+        case ChSubmissionMode::Incrementing:
+        case ChSubmissionMode::NonIncrementing:
+            count = value & 0xffff;
+            offset = (value >> 16) & 0xfff;
+            incrementing = mode == ChSubmissionMode::Incrementing;
+            break;
+        case ChSubmissionMode::Mask:
+            mask = value & 0xffff;
+            offset = (value >> 16) & 0xfff;
+            break;
+        case ChSubmissionMode::Immediate: {
+            const u32 data = value & 0xfff;
+            offset = (value >> 16) & 0xfff;
+            ExecuteCommand(static_cast<u32>(offset), data);
+            break;
+        }
+        default:
+            UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
+            break;
+        }
+    }
+}
+void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
+    switch (current_class) {
+    case ChClassId::NvDec:
+        ThiStateWrite(nvdec_thi_state, offset, {data});
+        switch (static_cast<ThiMethod>(offset)) {
+        case ThiMethod::IncSyncpt: {
+            LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
+            const auto syncpoint_id = static_cast<u32>(data & 0xFF);
+            const auto cond = static_cast<u32>((data >> 8) & 0xFF);
+            if (cond == 0) {
+                nvdec_sync->Increment(syncpoint_id);
+            } else {
+                nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
+                nvdec_sync->SignalDone(syncpoint_id);
+            }
+            break;
+        }
+        case ThiMethod::SetMethod1:
+            LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
+                      static_cast<u32>(nvdec_thi_state.method_0));
+            nvdec_processor->ProcessMethod(
+                static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data});
+            break;
+        default:
+            break;
+        }
+        break;
+    case ChClassId::GraphicsVic:
+        ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data});
+        switch (static_cast<ThiMethod>(offset)) {
+        case ThiMethod::IncSyncpt: {
+            LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
+            const auto syncpoint_id = static_cast<u32>(data & 0xFF);
+            const auto cond = static_cast<u32>((data >> 8) & 0xFF);
+            if (cond == 0) {
+                vic_sync->Increment(syncpoint_id);
+            } else {
+                vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
+                vic_sync->SignalDone(syncpoint_id);
+            }
+            break;
+        }
+        case ThiMethod::SetMethod1:
+            LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
+                      static_cast<u32>(vic_thi_state.method_0));
+            vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0),
+                                         {data});
+            break;
+        default:
+            break;
+        }
+        break;
+    case ChClassId::Host1x:
+        // This device is mainly for syncpoint synchronization
+        LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
+        host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data});
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
+        break;
+    }
+}
+void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) {
+    u8* const state_offset = reinterpret_cast<u8*>(&state) + sizeof(u32) * offset;
+    std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size());
+}
+} // namespace Tegra
author	ameerj	2020-10-26 23:07:36 -0400
committer	ameerj	2020-10-26 23:07:36 -0400
commit	eb67a45ca82bc01ac843c853fd3c17f2a90e0250 (patch)
tree	11e78a1b728ef0a608fae43d966b613eb4e6d58a /src/video_core/cdma_pusher.cpp
parent	Merge pull request #4827 from lioncash/trunc (diff)
download	yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.gz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.xz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.zip

diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp new file mode 100644 index 000000000..d774db107 --- /dev/null +++ b/src/video_core/cdma_pusher.cpp
@@ -0,0 +1,171 @@
	1	// MIT License
	2	//
	3	// Copyright (c) Ryujinx Team and Contributors
	4	//
	5	// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
	6	// associated documentation files (the "Software"), to deal in the Software without restriction,
	7	// including without limitation the rights to use, copy, modify, merge, publish, distribute,
	8	// sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
	9	// furnished to do so, subject to the following conditions:
	10	//
	11	// The above copyright notice and this permission notice shall be included in all copies or
	12	// substantial portions of the Software.
	13	//
	14	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
	15	// NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	16	// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
	17	// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	18	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
	19	//
	20
	21	#include "command_classes/host1x.h"
	22	#include "command_classes/nvdec.h"
	23	#include "command_classes/vic.h"
	24	#include "common/bit_util.h"
	25	#include "video_core/cdma_pusher.h"
	26	#include "video_core/command_classes/nvdec_common.h"
	27	#include "video_core/engines/maxwell_3d.h"
	28	#include "video_core/gpu.h"
	29	#include "video_core/memory_manager.h"
	30
	31	namespace Tegra {
	32	CDmaPusher::CDmaPusher(GPU& gpu)
	33	: gpu(gpu), nvdec_processor(std::make_shared<Nvdec>(gpu)),
	34	vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
	35	host1x_processor(std::make_unique<Host1x>(gpu)),
	36	nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
	37	vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
	38
	39	CDmaPusher::~CDmaPusher() = default;
	40
	41	void CDmaPusher::Push(ChCommandHeaderList&& entries) {
	42	cdma_queue.push(std::move(entries));
	43	}
	44
	45	void CDmaPusher::DispatchCalls() {
	46	while (!cdma_queue.empty()) {
	47	Step();
	48	}
	49	}
	50
	51	void CDmaPusher::Step() {
	52	const auto entries{cdma_queue.front()};
	53	cdma_queue.pop();
	54
	55	std::vector<u32> values(entries.size());
	56	std::memcpy(values.data(), entries.data(), entries.size() * sizeof(u32));
	57
	58	for (const u32 value : values) {
	59	if (mask != 0) {
	60	const u32 lbs = Common::CountTrailingZeroes32(mask);
	61	mask &= ~(1U << lbs);
	62	ExecuteCommand(static_cast<u32>(offset + lbs), value);
	63	continue;
	64	} else if (count != 0) {
	65	--count;
	66	ExecuteCommand(static_cast<u32>(offset), value);
	67	if (incrementing) {
	68	++offset;
	69	}
	70	continue;
	71	}
	72	const auto mode = static_cast<ChSubmissionMode>((value >> 28) & 0xf);
	73	switch (mode) {
	74	case ChSubmissionMode::SetClass: {
	75	mask = value & 0x3f;
	76	offset = (value >> 16) & 0xfff;
	77	current_class = static_cast<ChClassId>((value >> 6) & 0x3ff);
	78	break;
	79	}
	80	case ChSubmissionMode::Incrementing:
	81	case ChSubmissionMode::NonIncrementing:
	82	count = value & 0xffff;
	83	offset = (value >> 16) & 0xfff;
	84	incrementing = mode == ChSubmissionMode::Incrementing;
	85	break;
	86	case ChSubmissionMode::Mask:
	87	mask = value & 0xffff;
	88	offset = (value >> 16) & 0xfff;
	89	break;
	90	case ChSubmissionMode::Immediate: {
	91	const u32 data = value & 0xfff;
	92	offset = (value >> 16) & 0xfff;
	93	ExecuteCommand(static_cast<u32>(offset), data);
	94	break;
	95	}
	96	default:
	97	UNIMPLEMENTED_MSG("ChSubmission mode {} is not implemented!", static_cast<u32>(mode));
	98	break;
	99	}
	100	}
	101	}
	102
	103	void CDmaPusher::ExecuteCommand(u32 offset, u32 data) {
	104	switch (current_class) {
	105	case ChClassId::NvDec:
	106	ThiStateWrite(nvdec_thi_state, offset, {data});
	107	switch (static_cast<ThiMethod>(offset)) {
	108	case ThiMethod::IncSyncpt: {
	109	LOG_DEBUG(Service_NVDRV, "NVDEC Class IncSyncpt Method");
	110	const auto syncpoint_id = static_cast<u32>(data & 0xFF);
	111	const auto cond = static_cast<u32>((data >> 8) & 0xFF);
	112	if (cond == 0) {
	113	nvdec_sync->Increment(syncpoint_id);
	114	} else {
	115	nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
	116	nvdec_sync->SignalDone(syncpoint_id);
	117	}
	118	break;
	119	}
	120	case ThiMethod::SetMethod1:
	121	LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}",
	122	static_cast<u32>(nvdec_thi_state.method_0));
	123	nvdec_processor->ProcessMethod(
	124	static_cast<Tegra::Nvdec::Method>(nvdec_thi_state.method_0), {data});
	125	break;
	126	default:
	127	break;
	128	}
	129	break;
	130	case ChClassId::GraphicsVic:
	131	ThiStateWrite(vic_thi_state, static_cast<u32>(offset), {data});
	132	switch (static_cast<ThiMethod>(offset)) {
	133	case ThiMethod::IncSyncpt: {
	134	LOG_DEBUG(Service_NVDRV, "VIC Class IncSyncpt Method");
	135	const auto syncpoint_id = static_cast<u32>(data & 0xFF);
	136	const auto cond = static_cast<u32>((data >> 8) & 0xFF);
	137	if (cond == 0) {
	138	vic_sync->Increment(syncpoint_id);
	139	} else {
	140	vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
	141	vic_sync->SignalDone(syncpoint_id);
	142	}
	143	break;
	144	}
	145	case ThiMethod::SetMethod1:
	146	LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})",
	147	static_cast<u32>(vic_thi_state.method_0));
	148	vic_processor->ProcessMethod(static_cast<Tegra::Vic::Method>(vic_thi_state.method_0),
	149	{data});
	150	break;
	151	default:
	152	break;
	153	}
	154	break;
	155	case ChClassId::Host1x:
	156	// This device is mainly for syncpoint synchronization
	157	LOG_DEBUG(Service_NVDRV, "Host1X Class Method");
	158	host1x_processor->ProcessMethod(static_cast<Tegra::Host1x::Method>(offset), {data});
	159	break;
	160	default:
	161	UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class));
	162	break;
	163	}
	164	}
	165
	166	void CDmaPusher::ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments) {
	167	u8* const state_offset = reinterpret_cast<u8>(&state) + sizeof(u32) offset;
	168	std::memcpy(state_offset, arguments.data(), sizeof(u32) * arguments.size());
	169	}
	170
	171	} // namespace Tegra