video_core: NVDEC Implementation

This commit aims to implement the NVDEC (Nvidia Decoder) functionality, with video frame decoding being handled by the FFmpeg library. The process begins with Ioctl commands being sent to the NVDEC and VIC (Video Image Composer) emulated devices. These allocate the necessary GPU buffers for the frame data, along with providing information on the incoming video data. A Submit command then signals the GPU to process and decode the frame data. To decode the frame, the respective codec's header must be manually composed from the information provided by NVDEC, then sent with the raw frame data to the ffmpeg library. Currently, H264 and VP9 are supported, with VP9 having some minor artifacting issues related mainly to the reference frame composition in its uncompressed header. Async GPU is not properly implemented at the moment. Co-Authored-By: David <25727384+ogniK5377@users.noreply.github.com>
author: ameerj 2020-10-26 23:07:36 -0400
committer: ameerj 2020-10-26 23:07:36 -0400
commit: eb67a45ca82bc01ac843c853fd3c17f2a90e0250 (patch)
tree: 11e78a1b728ef0a608fae43d966b613eb4e6d58a /src/video_core/cdma_pusher.h
parent: Merge pull request #4827 from lioncash/trunc (diff)
download: yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.gz
yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.xz
yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.zip
1 files changed, 138 insertions, 0 deletions
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
new file mode 100644
index 000000000..982f309c5
--- /dev/null
+++ b/src/video_core/cdma_pusher.h
@@ -0,0 +1,138 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#pragma once
+#include <memory>
+#include <unordered_map>
+#include <vector>
+#include <queue>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/command_classes/sync_manager.h"
+namespace Tegra {
+class GPU;
+class Nvdec;
+class Vic;
+class Host1x;
+enum class ChSubmissionMode : u32 {
+    SetClass = 0,
+    Incrementing = 1,
+    NonIncrementing = 2,
+    Mask = 3,
+    Immediate = 4,
+    Restart = 5,
+    Gather = 6,
+};
+enum class ChClassId : u32 {
+    NoClass = 0x0,
+    Host1x = 0x1,
+    VideoEncodeMpeg = 0x20,
+    VideoEncodeNvEnc = 0x21,
+    VideoStreamingVi = 0x30,
+    VideoStreamingIsp = 0x32,
+    VideoStreamingIspB = 0x34,
+    VideoStreamingViI2c = 0x36,
+    GraphicsVic = 0x5d,
+    Graphics3D = 0x60,
+    GraphicsGpu = 0x61,
+    Tsec = 0xe0,
+    TsecB = 0xe1,
+    NvJpg = 0xc0,
+    NvDec = 0xf0
+};
+enum class ChMethod : u32 {
+    Empty = 0,
+    SetMethod = 0x10,
+    SetData = 0x11,
+};
+union ChCommandHeader {
+    u32 raw;
+    BitField<0, 16, u32> value;
+    BitField<16, 12, ChMethod> method_offset;
+    BitField<28, 4, ChSubmissionMode> submission_mode;
+};
+static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
+struct ChCommand {
+    ChClassId class_id{};
+    int method_offset{};
+    std::vector<u32> arguments;
+};
+using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>;
+using ChCommandList = std::vector<Tegra::ChCommand>;
+struct ThiRegisters {
+    u32_le increment_syncpt{};
+    INSERT_PADDING_WORDS(1);
+    u32_le increment_syncpt_error{};
+    u32_le ctx_switch_incremement_syncpt{};
+    INSERT_PADDING_WORDS(4);
+    u32_le ctx_switch{};
+    INSERT_PADDING_WORDS(1);
+    u32_le ctx_syncpt_eof{};
+    INSERT_PADDING_WORDS(5);
+    u32_le method_0{};
+    u32_le method_1{};
+    INSERT_PADDING_WORDS(12);
+    u32_le int_status{};
+    u32_le int_mask{};
+};
+enum class ThiMethod : u32 {
+    IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
+    SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
+    SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
+};
+class CDmaPusher {
+public:
+    explicit CDmaPusher(GPU& gpu);
+    ~CDmaPusher();
+    /// Push NVDEC command buffer entries into queue
+    void Push(ChCommandHeaderList&& entries);
+    /// Process queued command buffer entries
+    void DispatchCalls();
+    /// Process one queue element
+    void Step();
+    /// Invoke command class devices to execute the command based on the current state
+    void ExecuteCommand(u32 offset, u32 data);
+private:
+    /// Write arguments value to the ThiRegisters member at the specified offset
+    void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments);
+    GPU& gpu;
+    std::shared_ptr<Tegra::Nvdec> nvdec_processor;
+    std::unique_ptr<Tegra::Vic> vic_processor;
+    std::unique_ptr<Tegra::Host1x> host1x_processor;
+    std::unique_ptr<SyncptIncrManager> nvdec_sync;
+    std::unique_ptr<SyncptIncrManager> vic_sync;
+    ChClassId current_class{};
+    ThiRegisters vic_thi_state{};
+    ThiRegisters nvdec_thi_state{};
+    s32 count{};
+    s32 offset{};
+    s32 mask{};
+    bool incrementing{};
+    // Queue of command lists to be processed
+    std::queue<ChCommandHeaderList> cdma_queue;
+};
+} // namespace Tegra
author	ameerj	2020-10-26 23:07:36 -0400
committer	ameerj	2020-10-26 23:07:36 -0400
commit	eb67a45ca82bc01ac843c853fd3c17f2a90e0250 (patch)
tree	11e78a1b728ef0a608fae43d966b613eb4e6d58a /src/video_core/cdma_pusher.h
parent	Merge pull request #4827 from lioncash/trunc (diff)
download	yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.gz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.tar.xz yuzu-eb67a45ca82bc01ac843c853fd3c17f2a90e0250.zip

diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h new file mode 100644 index 000000000..982f309c5 --- /dev/null +++ b/src/video_core/cdma_pusher.h
@@ -0,0 +1,138 @@
	1	// Copyright 2020 yuzu Emulator Project
	2	// Licensed under GPLv2 or any later version
	3	// Refer to the license.txt file included.
	4
	5	#pragma once
	6
	7	#include <memory>
	8	#include <unordered_map>
	9	#include <vector>
	10	#include <queue>
	11
	12	#include "common/bit_field.h"
	13	#include "common/common_types.h"
	14	#include "video_core/command_classes/sync_manager.h"
	15
	16	namespace Tegra {
	17
	18	class GPU;
	19	class Nvdec;
	20	class Vic;
	21	class Host1x;
	22
	23	enum class ChSubmissionMode : u32 {
	24	SetClass = 0,
	25	Incrementing = 1,
	26	NonIncrementing = 2,
	27	Mask = 3,
	28	Immediate = 4,
	29	Restart = 5,
	30	Gather = 6,
	31	};
	32
	33	enum class ChClassId : u32 {
	34	NoClass = 0x0,
	35	Host1x = 0x1,
	36	VideoEncodeMpeg = 0x20,
	37	VideoEncodeNvEnc = 0x21,
	38	VideoStreamingVi = 0x30,
	39	VideoStreamingIsp = 0x32,
	40	VideoStreamingIspB = 0x34,
	41	VideoStreamingViI2c = 0x36,
	42	GraphicsVic = 0x5d,
	43	Graphics3D = 0x60,
	44	GraphicsGpu = 0x61,
	45	Tsec = 0xe0,
	46	TsecB = 0xe1,
	47	NvJpg = 0xc0,
	48	NvDec = 0xf0
	49	};
	50
	51	enum class ChMethod : u32 {
	52	Empty = 0,
	53	SetMethod = 0x10,
	54	SetData = 0x11,
	55	};
	56
	57	union ChCommandHeader {
	58	u32 raw;
	59	BitField<0, 16, u32> value;
	60	BitField<16, 12, ChMethod> method_offset;
	61	BitField<28, 4, ChSubmissionMode> submission_mode;
	62	};
	63	static_assert(sizeof(ChCommandHeader) == sizeof(u32), "ChCommand header is an invalid size");
	64
	65	struct ChCommand {
	66	ChClassId class_id{};
	67	int method_offset{};
	68	std::vector<u32> arguments;
	69	};
	70
	71	using ChCommandHeaderList = std::vector<Tegra::ChCommandHeader>;
	72	using ChCommandList = std::vector<Tegra::ChCommand>;
	73
	74	struct ThiRegisters {
	75	u32_le increment_syncpt{};
	76	INSERT_PADDING_WORDS(1);
	77	u32_le increment_syncpt_error{};
	78	u32_le ctx_switch_incremement_syncpt{};
	79	INSERT_PADDING_WORDS(4);
	80	u32_le ctx_switch{};
	81	INSERT_PADDING_WORDS(1);
	82	u32_le ctx_syncpt_eof{};
	83	INSERT_PADDING_WORDS(5);
	84	u32_le method_0{};
	85	u32_le method_1{};
	86	INSERT_PADDING_WORDS(12);
	87	u32_le int_status{};
	88	u32_le int_mask{};
	89	};
	90
	91	enum class ThiMethod : u32 {
	92	IncSyncpt = offsetof(ThiRegisters, increment_syncpt) / sizeof(u32),
	93	SetMethod0 = offsetof(ThiRegisters, method_0) / sizeof(u32),
	94	SetMethod1 = offsetof(ThiRegisters, method_1) / sizeof(u32),
	95	};
	96
	97	class CDmaPusher {
	98	public:
	99	explicit CDmaPusher(GPU& gpu);
	100	~CDmaPusher();
	101
	102	/// Push NVDEC command buffer entries into queue
	103	void Push(ChCommandHeaderList&& entries);
	104
	105	/// Process queued command buffer entries
	106	void DispatchCalls();
	107
	108	/// Process one queue element
	109	void Step();
	110
	111	/// Invoke command class devices to execute the command based on the current state
	112	void ExecuteCommand(u32 offset, u32 data);
	113
	114	private:
	115	/// Write arguments value to the ThiRegisters member at the specified offset
	116	void ThiStateWrite(ThiRegisters& state, u32 offset, const std::vector<u32>& arguments);
	117
	118	GPU& gpu;
	119
	120	std::shared_ptr<Tegra::Nvdec> nvdec_processor;
	121	std::unique_ptr<Tegra::Vic> vic_processor;
	122	std::unique_ptr<Tegra::Host1x> host1x_processor;
	123	std::unique_ptr<SyncptIncrManager> nvdec_sync;
	124	std::unique_ptr<SyncptIncrManager> vic_sync;
	125	ChClassId current_class{};
	126	ThiRegisters vic_thi_state{};
	127	ThiRegisters nvdec_thi_state{};
	128
	129	s32 count{};
	130	s32 offset{};
	131	s32 mask{};
	132	bool incrementing{};
	133
	134	// Queue of command lists to be processed
	135	std::queue<ChCommandHeaderList> cdma_queue;
	136	};
	137
	138	} // namespace Tegra