2 files changed, 632 insertions, 0 deletions
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
new file mode 100644
index 000000000..dcd07e6d2
--- /dev/null
+++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
@@ -0,0 +1,419 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/scope_exit.h"
+#include "common/settings.h"
+#include "video_core/host1x/ffmpeg/ffmpeg.h"
+extern "C" {
+#ifdef LIBVA_FOUND
+// for querying VAAPI driver information
+#include <libavutil/hwcontext_vaapi.h>
+#endif
+}
+namespace FFmpeg {
+namespace {
+constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
+constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
+constexpr std::array PreferredGpuDecoders = {
+    AV_HWDEVICE_TYPE_CUDA,
+#ifdef _WIN32
+    AV_HWDEVICE_TYPE_D3D11VA,
+    AV_HWDEVICE_TYPE_DXVA2,
+#elif defined(__unix__)
+    AV_HWDEVICE_TYPE_VAAPI,
+    AV_HWDEVICE_TYPE_VDPAU,
+#endif
+    // last resort for Linux Flatpak (w/ NVIDIA)
+    AV_HWDEVICE_TYPE_VULKAN,
+};
+AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
+    for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
+        if (*p == codec_context->pix_fmt) {
+            return codec_context->pix_fmt;
+        }
+    }
+    LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
+    av_buffer_unref(&codec_context->hw_device_ctx);
+    codec_context->pix_fmt = PreferredCpuFormat;
+    return codec_context->pix_fmt;
+}
+std::string AVError(int errnum) {
+    char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
+    av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
+    return errbuf;
+}
+} // namespace
+Packet::Packet(std::span<const u8> data) {
+    m_packet = av_packet_alloc();
+    m_packet->data = const_cast<u8*>(data.data());
+    m_packet->size = static_cast<s32>(data.size());
+}
+Packet::~Packet() {
+    av_packet_free(&m_packet);
+}
+Frame::Frame() {
+    m_frame = av_frame_alloc();
+}
+Frame::~Frame() {
+    av_frame_free(&m_frame);
+}
+Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
+    const AVCodecID av_codec = [&] {
+        switch (codec) {
+        case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
+            return AV_CODEC_ID_H264;
+        case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
+            return AV_CODEC_ID_VP8;
+        case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
+            return AV_CODEC_ID_VP9;
+        default:
+            UNIMPLEMENTED_MSG("Unknown codec {}", codec);
+            return AV_CODEC_ID_NONE;
+        }
+    }();
+    m_codec = avcodec_find_decoder(av_codec);
+}
+bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
+    for (int i = 0;; i++) {
+        const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
+        if (!config) {
+            LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name,
+                      av_hwdevice_get_type_name(type));
+            break;
+        }
+        if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
+            config->device_type == type) {
+            LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
+            *out_pix_fmt = config->pix_fmt;
+            return true;
+        }
+    }
+    return false;
+}
+std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
+    std::vector<AVHWDeviceType> types;
+    AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
+    while (true) {
+        current_device_type = av_hwdevice_iterate_types(current_device_type);
+        if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
+            return types;
+        }
+        types.push_back(current_device_type);
+    }
+}
+HardwareContext::~HardwareContext() {
+    av_buffer_unref(&m_gpu_decoder);
+}
+bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
+                                           const Decoder& decoder) {
+    const auto supported_types = GetSupportedDeviceTypes();
+    for (const auto type : PreferredGpuDecoders) {
+        AVPixelFormat hw_pix_fmt;
+        if (std::ranges::find(supported_types, type) == supported_types.end()) {
+            LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
+            continue;
+        }
+        if (!this->InitializeWithType(type)) {
+            continue;
+        }
+        if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
+            decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
+            return true;
+        }
+    }
+    return false;
+}
+bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
+    av_buffer_unref(&m_gpu_decoder);
+    if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0);
+        ret < 0) {
+        LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type),
+                  AVError(ret));
+        return false;
+    }
+#ifdef LIBVA_FOUND
+    if (type == AV_HWDEVICE_TYPE_VAAPI) {
+        // We need to determine if this is an impersonated VAAPI driver.
+        auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
+        auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
+        const char* vendor_name = vaQueryVendorString(vactx->display);
+        if (strstr(vendor_name, "VDPAU backend")) {
+            // VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
+            LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
+            return false;
+        } else {
+            // According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
+            // Log the driver name just in case.
+            LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
+        }
+    }
+#endif
+    return true;
+}
+DecoderContext::DecoderContext(const Decoder& decoder) {
+    m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
+    av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
+    m_codec_context->thread_count = 0;
+    m_codec_context->thread_type &= ~FF_THREAD_FRAME;
+}
+DecoderContext::~DecoderContext() {
+    av_buffer_unref(&m_codec_context->hw_device_ctx);
+    avcodec_free_context(&m_codec_context);
+}
+void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context,
+                                               AVPixelFormat hw_pix_fmt) {
+    m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
+    m_codec_context->get_format = GetGpuFormat;
+    m_codec_context->pix_fmt = hw_pix_fmt;
+}
+bool DecoderContext::OpenContext(const Decoder& decoder) {
+    if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
+        LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
+        return false;
+    }
+    if (!m_codec_context->hw_device_ctx) {
+        LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
+    }
+    return true;
+}
+bool DecoderContext::SendPacket(const Packet& packet) {
+    if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
+        LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
+        return false;
+    }
+    return true;
+}
+std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
+    auto dst_frame = std::make_unique<Frame>();
+    const auto ReceiveImpl = [&](AVFrame* frame) {
+        if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
+            LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
+            return false;
+        }
+        *out_is_interlaced = frame->interlaced_frame != 0;
+        return true;
+    };
+    if (m_codec_context->hw_device_ctx) {
+        // If we have a hardware context, make a separate frame here to receive the
+        // hardware result before sending it to the output.
+        Frame intermediate_frame;
+        if (!ReceiveImpl(intermediate_frame.GetFrame())) {
+            return {};
+        }
+        dst_frame->SetFormat(PreferredGpuFormat);
+        if (const int ret =
+                av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
+            ret < 0) {
+            LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
+            return {};
+        }
+    } else {
+        // Otherwise, decode the frame as normal.
+        if (!ReceiveImpl(dst_frame->GetFrame())) {
+            return {};
+        }
+    }
+    return dst_frame;
+}
+DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
+    const AVFilter* buffer_src = avfilter_get_by_name("buffer");
+    const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
+    AVFilterInOut* inputs = avfilter_inout_alloc();
+    AVFilterInOut* outputs = avfilter_inout_alloc();
+    SCOPE_EXIT({
+        avfilter_inout_free(&inputs);
+        avfilter_inout_free(&outputs);
+    });
+    // Don't know how to get the accurate time_base but it doesn't matter for yadif filter
+    // so just use 1/1 to make buffer filter happy
+    std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
+                                   frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
+    m_filter_graph = avfilter_graph_alloc();
+    int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
+                                           nullptr, m_filter_graph);
+    if (ret < 0) {
+        LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
+        return;
+    }
+    ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
+                                       m_filter_graph);
+    if (ret < 0) {
+        LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
+        return;
+    }
+    inputs->name = av_strdup("out");
+    inputs->filter_ctx = m_sink_context;
+    inputs->pad_idx = 0;
+    inputs->next = nullptr;
+    outputs->name = av_strdup("in");
+    outputs->filter_ctx = m_source_context;
+    outputs->pad_idx = 0;
+    outputs->next = nullptr;
+    const char* description = "yadif=1:-1:0";
+    ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
+    if (ret < 0) {
+        LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
+        return;
+    }
+    ret = avfilter_graph_config(m_filter_graph, nullptr);
+    if (ret < 0) {
+        LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
+        return;
+    }
+    m_initialized = true;
+}
+bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
+    if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
+                                                     AV_BUFFERSRC_FLAG_KEEP_REF);
+        ret < 0) {
+        LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
+        return false;
+    }
+    return true;
+}
+std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
+    auto dst_frame = std::make_unique<Frame>();
+    const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
+    if (ret == AVERROR(EAGAIN) || ret == AVERROR(AVERROR_EOF)) {
+        return {};
+    }
+    if (ret < 0) {
+        LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
+        return {};
+    }
+    return dst_frame;
+}
+DeinterlaceFilter::~DeinterlaceFilter() {
+    avfilter_graph_free(&m_filter_graph);
+}
+void DecodeApi::Reset() {
+    m_deinterlace_filter.reset();
+    m_hardware_context.reset();
+    m_decoder_context.reset();
+    m_decoder.reset();
+}
+bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
+    this->Reset();
+    m_decoder.emplace(codec);
+    m_decoder_context.emplace(*m_decoder);
+    // Enable GPU decoding if requested.
+    if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
+        m_hardware_context.emplace();
+        m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
+    }
+    // Open the decoder context.
+    if (!m_decoder_context->OpenContext(*m_decoder)) {
+        this->Reset();
+        return false;
+    }
+    return true;
+}
+bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
+    FFmpeg::Packet packet(packet_data);
+    return m_decoder_context->SendPacket(packet);
+}
+void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
+    // Receive raw frame from decoder.
+    bool is_interlaced;
+    auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
+    if (!frame) {
+        return;
+    }
+    if (!is_interlaced) {
+        // If the frame is not interlaced, we can pend it now.
+        frame_queue.push(std::move(frame));
+    } else {
+        // Create the deinterlacer if needed.
+        if (!m_deinterlace_filter) {
+            m_deinterlace_filter.emplace(*frame);
+        }
+        // Add the frame we just received.
+        if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
+            return;
+        }
+        // Pend output fields.
+        while (true) {
+            auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
+            if (!filter_frame) {
+                break;
+            }
+            frame_queue.push(std::move(filter_frame));
+        }
+    }
+}
+} // namespace FFmpeg
diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h
new file mode 100644
index 000000000..1de0bbd83
--- /dev/null
+++ b/src/video_core/host1x/ffmpeg/ffmpeg.h
@@ -0,0 +1,213 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+#pragma once
+#include <memory>
+#include <optional>
+#include <span>
+#include <vector>
+#include <queue>
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/host1x/nvdec_common.h"
+extern "C" {
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+#include <libavcodec/avcodec.h>
+#include <libavfilter/avfilter.h>
+#include <libavfilter/buffersink.h>
+#include <libavfilter/buffersrc.h>
+#include <libavutil/avutil.h>
+#include <libavutil/opt.h>
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+}
+namespace FFmpeg {
+class Packet;
+class Frame;
+class Decoder;
+class HardwareContext;
+class DecoderContext;
+class DeinterlaceFilter;
+// Wraps an AVPacket, a container for compressed bitstream data.
+class Packet {
+public:
+    YUZU_NON_COPYABLE(Packet);
+    YUZU_NON_MOVEABLE(Packet);
+    explicit Packet(std::span<const u8> data);
+    ~Packet();
+    AVPacket* GetPacket() const {
+        return m_packet;
+    }
+private:
+    AVPacket* m_packet{};
+};
+// Wraps an AVFrame, a container for audio and video stream data.
+class Frame {
+public:
+    YUZU_NON_COPYABLE(Frame);
+    YUZU_NON_MOVEABLE(Frame);
+    explicit Frame();
+    ~Frame();
+    int GetWidth() const {
+        return m_frame->width;
+    }
+    int GetHeight() const {
+        return m_frame->height;
+    }
+    AVPixelFormat GetPixelFormat() const {
+        return static_cast<AVPixelFormat>(m_frame->format);
+    }
+    int GetStride(int plane) const {
+        return m_frame->linesize[plane];
+    }
+    int* GetStrides() const {
+        return m_frame->linesize;
+    }
+    u8* GetData(int plane) const {
+        return m_frame->data[plane];
+    }
+    u8** GetPlanes() const {
+        return m_frame->data;
+    }
+    void SetFormat(int format) {
+        m_frame->format = format;
+    }
+    AVFrame* GetFrame() const {
+        return m_frame;
+    }
+private:
+    AVFrame* m_frame{};
+};
+// Wraps an AVCodec, a type containing information about a codec.
+class Decoder {
+public:
+    YUZU_NON_COPYABLE(Decoder);
+    YUZU_NON_MOVEABLE(Decoder);
+    explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
+    ~Decoder() = default;
+    bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
+    const AVCodec* GetCodec() const {
+        return m_codec;
+    }
+private:
+    const AVCodec* m_codec{};
+};
+// Wraps AVBufferRef for an accelerated decoder.
+class HardwareContext {
+public:
+    YUZU_NON_COPYABLE(HardwareContext);
+    YUZU_NON_MOVEABLE(HardwareContext);
+    static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
+    explicit HardwareContext() = default;
+    ~HardwareContext();
+    bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
+    AVBufferRef* GetBufferRef() const {
+        return m_gpu_decoder;
+    }
+private:
+    bool InitializeWithType(AVHWDeviceType type);
+    AVBufferRef* m_gpu_decoder{};
+};
+// Wraps an AVCodecContext.
+class DecoderContext {
+public:
+    YUZU_NON_COPYABLE(DecoderContext);
+    YUZU_NON_MOVEABLE(DecoderContext);
+    explicit DecoderContext(const Decoder& decoder);
+    ~DecoderContext();
+    void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
+    bool OpenContext(const Decoder& decoder);
+    bool SendPacket(const Packet& packet);
+    std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
+    AVCodecContext* GetCodecContext() const {
+        return m_codec_context;
+    }
+private:
+    AVCodecContext* m_codec_context{};
+};
+// Wraps an AVFilterGraph.
+class DeinterlaceFilter {
+public:
+    YUZU_NON_COPYABLE(DeinterlaceFilter);
+    YUZU_NON_MOVEABLE(DeinterlaceFilter);
+    explicit DeinterlaceFilter(const Frame& frame);
+    ~DeinterlaceFilter();
+    bool AddSourceFrame(const Frame& frame);
+    std::unique_ptr<Frame> DrainSinkFrame();
+private:
+    AVFilterGraph* m_filter_graph{};
+    AVFilterContext* m_source_context{};
+    AVFilterContext* m_sink_context{};
+    bool m_initialized{};
+};
+class DecodeApi {
+public:
+    YUZU_NON_COPYABLE(DecodeApi);
+    YUZU_NON_MOVEABLE(DecodeApi);
+    DecodeApi() = default;
+    ~DecodeApi() = default;
+    bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
+    void Reset();
+    bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
+    void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
+private:
+    std::optional<FFmpeg::Decoder> m_decoder;
+    std::optional<FFmpeg::DecoderContext> m_decoder_context;
+    std::optional<FFmpeg::HardwareContext> m_hardware_context;
+    std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
+};
+} // namespace FFmpeg

diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp new file mode 100644 index 000000000..dcd07e6d2 --- /dev/null +++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp
@@ -0,0 +1,419 @@
	1	// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
	2	// SPDX-License-Identifier: GPL-2.0-or-later
	3
	4	#include "common/assert.h"
	5	#include "common/logging/log.h"
	6	#include "common/scope_exit.h"
	7	#include "common/settings.h"
	8	#include "video_core/host1x/ffmpeg/ffmpeg.h"
	9
	10	extern "C" {
	11	#ifdef LIBVA_FOUND
	12	// for querying VAAPI driver information
	13	#include <libavutil/hwcontext_vaapi.h>
	14	#endif
	15	}
	16
	17	namespace FFmpeg {
	18
	19	namespace {
	20
	21	constexpr AVPixelFormat PreferredGpuFormat = AV_PIX_FMT_NV12;
	22	constexpr AVPixelFormat PreferredCpuFormat = AV_PIX_FMT_YUV420P;
	23	constexpr std::array PreferredGpuDecoders = {
	24	AV_HWDEVICE_TYPE_CUDA,
	25	#ifdef _WIN32
	26	AV_HWDEVICE_TYPE_D3D11VA,
	27	AV_HWDEVICE_TYPE_DXVA2,
	28	#elif defined(__unix__)
	29	AV_HWDEVICE_TYPE_VAAPI,
	30	AV_HWDEVICE_TYPE_VDPAU,
	31	#endif
	32	// last resort for Linux Flatpak (w/ NVIDIA)
	33	AV_HWDEVICE_TYPE_VULKAN,
	34	};
	35
	36	AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
	37	for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
	38	if (*p == codec_context->pix_fmt) {
	39	return codec_context->pix_fmt;
	40	}
	41	}
	42
	43	LOG_INFO(HW_GPU, "Could not find compatible GPU AV format, falling back to CPU");
	44	av_buffer_unref(&codec_context->hw_device_ctx);
	45
	46	codec_context->pix_fmt = PreferredCpuFormat;
	47	return codec_context->pix_fmt;
	48	}
	49
	50	std::string AVError(int errnum) {
	51	char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
	52	av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
	53	return errbuf;
	54	}
	55
	56	} // namespace
	57
	58	Packet::Packet(std::span<const u8> data) {
	59	m_packet = av_packet_alloc();
	60	m_packet->data = const_cast<u8*>(data.data());
	61	m_packet->size = static_cast<s32>(data.size());
	62	}
	63
	64	Packet::~Packet() {
	65	av_packet_free(&m_packet);
	66	}
	67
	68	Frame::Frame() {
	69	m_frame = av_frame_alloc();
	70	}
	71
	72	Frame::~Frame() {
	73	av_frame_free(&m_frame);
	74	}
	75
	76	Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
	77	const AVCodecID av_codec = [&] {
	78	switch (codec) {
	79	case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
	80	return AV_CODEC_ID_H264;
	81	case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
	82	return AV_CODEC_ID_VP8;
	83	case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
	84	return AV_CODEC_ID_VP9;
	85	default:
	86	UNIMPLEMENTED_MSG("Unknown codec {}", codec);
	87	return AV_CODEC_ID_NONE;
	88	}
	89	}();
	90
	91	m_codec = avcodec_find_decoder(av_codec);
	92	}
	93
	94	bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
	95	for (int i = 0;; i++) {
	96	const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
	97	if (!config) {
	98	LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name,
	99	av_hwdevice_get_type_name(type));
	100	break;
	101	}
	102	if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) != 0 &&
	103	config->device_type == type) {
	104	LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
	105	*out_pix_fmt = config->pix_fmt;
	106	return true;
	107	}
	108	}
	109
	110	return false;
	111	}
	112
	113	std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
	114	std::vector<AVHWDeviceType> types;
	115	AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
	116
	117	while (true) {
	118	current_device_type = av_hwdevice_iterate_types(current_device_type);
	119	if (current_device_type == AV_HWDEVICE_TYPE_NONE) {
	120	return types;
	121	}
	122
	123	types.push_back(current_device_type);
	124	}
	125	}
	126
	127	HardwareContext::~HardwareContext() {
	128	av_buffer_unref(&m_gpu_decoder);
	129	}
	130
	131	bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context,
	132	const Decoder& decoder) {
	133	const auto supported_types = GetSupportedDeviceTypes();
	134	for (const auto type : PreferredGpuDecoders) {
	135	AVPixelFormat hw_pix_fmt;
	136
	137	if (std::ranges::find(supported_types, type) == supported_types.end()) {
	138	LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
	139	continue;
	140	}
	141
	142	if (!this->InitializeWithType(type)) {
	143	continue;
	144	}
	145
	146	if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
	147	decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
	148	return true;
	149	}
	150	}
	151
	152	return false;
	153	}
	154
	155	bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
	156	av_buffer_unref(&m_gpu_decoder);
	157
	158	if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0);
	159	ret < 0) {
	160	LOG_DEBUG(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type),
	161	AVError(ret));
	162	return false;
	163	}
	164
	165	#ifdef LIBVA_FOUND
	166	if (type == AV_HWDEVICE_TYPE_VAAPI) {
	167	// We need to determine if this is an impersonated VAAPI driver.
	168	auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
	169	auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
	170	const char* vendor_name = vaQueryVendorString(vactx->display);
	171	if (strstr(vendor_name, "VDPAU backend")) {
	172	// VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
	173	LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
	174	return false;
	175	} else {
	176	// According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
	177	// Log the driver name just in case.
	178	LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
	179	}
	180	}
	181	#endif
	182
	183	return true;
	184	}
	185
	186	DecoderContext::DecoderContext(const Decoder& decoder) {
	187	m_codec_context = avcodec_alloc_context3(decoder.GetCodec());
	188	av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
	189	m_codec_context->thread_count = 0;
	190	m_codec_context->thread_type &= ~FF_THREAD_FRAME;
	191	}
	192
	193	DecoderContext::~DecoderContext() {
	194	av_buffer_unref(&m_codec_context->hw_device_ctx);
	195	avcodec_free_context(&m_codec_context);
	196	}
	197
	198	void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context,
	199	AVPixelFormat hw_pix_fmt) {
	200	m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
	201	m_codec_context->get_format = GetGpuFormat;
	202	m_codec_context->pix_fmt = hw_pix_fmt;
	203	}
	204
	205	bool DecoderContext::OpenContext(const Decoder& decoder) {
	206	if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
	207	LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
	208	return false;
	209	}
	210
	211	if (!m_codec_context->hw_device_ctx) {
	212	LOG_INFO(HW_GPU, "Using FFmpeg software decoding");
	213	}
	214
	215	return true;
	216	}
	217
	218	bool DecoderContext::SendPacket(const Packet& packet) {
	219	if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0) {
	220	LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
	221	return false;
	222	}
	223
	224	return true;
	225	}
	226
	227	std::unique_ptr<Frame> DecoderContext::ReceiveFrame(bool* out_is_interlaced) {
	228	auto dst_frame = std::make_unique<Frame>();
	229
	230	const auto ReceiveImpl = [&](AVFrame* frame) {
	231	if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0) {
	232	LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
	233	return false;
	234	}
	235
	236	*out_is_interlaced = frame->interlaced_frame != 0;
	237	return true;
	238	};
	239
	240	if (m_codec_context->hw_device_ctx) {
	241	// If we have a hardware context, make a separate frame here to receive the
	242	// hardware result before sending it to the output.
	243	Frame intermediate_frame;
	244
	245	if (!ReceiveImpl(intermediate_frame.GetFrame())) {
	246	return {};
	247	}
	248
	249	dst_frame->SetFormat(PreferredGpuFormat);
	250	if (const int ret =
	251	av_hwframe_transfer_data(dst_frame->GetFrame(), intermediate_frame.GetFrame(), 0);
	252	ret < 0) {
	253	LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
	254	return {};
	255	}
	256	} else {
	257	// Otherwise, decode the frame as normal.
	258	if (!ReceiveImpl(dst_frame->GetFrame())) {
	259	return {};
	260	}
	261	}
	262
	263	return dst_frame;
	264	}
	265
	266	DeinterlaceFilter::DeinterlaceFilter(const Frame& frame) {
	267	const AVFilter* buffer_src = avfilter_get_by_name("buffer");
	268	const AVFilter* buffer_sink = avfilter_get_by_name("buffersink");
	269	AVFilterInOut* inputs = avfilter_inout_alloc();
	270	AVFilterInOut* outputs = avfilter_inout_alloc();
	271	SCOPE_EXIT({
	272	avfilter_inout_free(&inputs);
	273	avfilter_inout_free(&outputs);
	274	});
	275
	276	// Don't know how to get the accurate time_base but it doesn't matter for yadif filter
	277	// so just use 1/1 to make buffer filter happy
	278	std::string args = fmt::format("video_size={}x{}:pix_fmt={}:time_base=1/1", frame.GetWidth(),
	279	frame.GetHeight(), static_cast<int>(frame.GetPixelFormat()));
	280
	281	m_filter_graph = avfilter_graph_alloc();
	282	int ret = avfilter_graph_create_filter(&m_source_context, buffer_src, "in", args.c_str(),
	283	nullptr, m_filter_graph);
	284	if (ret < 0) {
	285	LOG_ERROR(HW_GPU, "avfilter_graph_create_filter source error: {}", AVError(ret));
	286	return;
	287	}
	288
	289	ret = avfilter_graph_create_filter(&m_sink_context, buffer_sink, "out", nullptr, nullptr,
	290	m_filter_graph);
	291	if (ret < 0) {
	292	LOG_ERROR(HW_GPU, "avfilter_graph_create_filter sink error: {}", AVError(ret));
	293	return;
	294	}
	295
	296	inputs->name = av_strdup("out");
	297	inputs->filter_ctx = m_sink_context;
	298	inputs->pad_idx = 0;
	299	inputs->next = nullptr;
	300
	301	outputs->name = av_strdup("in");
	302	outputs->filter_ctx = m_source_context;
	303	outputs->pad_idx = 0;
	304	outputs->next = nullptr;
	305
	306	const char* description = "yadif=1:-1:0";
	307	ret = avfilter_graph_parse_ptr(m_filter_graph, description, &inputs, &outputs, nullptr);
	308	if (ret < 0) {
	309	LOG_ERROR(HW_GPU, "avfilter_graph_parse_ptr error: {}", AVError(ret));
	310	return;
	311	}
	312
	313	ret = avfilter_graph_config(m_filter_graph, nullptr);
	314	if (ret < 0) {
	315	LOG_ERROR(HW_GPU, "avfilter_graph_config error: {}", AVError(ret));
	316	return;
	317	}
	318
	319	m_initialized = true;
	320	}
	321
	322	bool DeinterlaceFilter::AddSourceFrame(const Frame& frame) {
	323	if (const int ret = av_buffersrc_add_frame_flags(m_source_context, frame.GetFrame(),
	324	AV_BUFFERSRC_FLAG_KEEP_REF);
	325	ret < 0) {
	326	LOG_ERROR(HW_GPU, "av_buffersrc_add_frame_flags error: {}", AVError(ret));
	327	return false;
	328	}
	329
	330	return true;
	331	}
	332
	333	std::unique_ptr<Frame> DeinterlaceFilter::DrainSinkFrame() {
	334	auto dst_frame = std::make_unique<Frame>();
	335	const int ret = av_buffersink_get_frame(m_sink_context, dst_frame->GetFrame());
	336
	337	if (ret == AVERROR(EAGAIN) \|\| ret == AVERROR(AVERROR_EOF)) {
	338	return {};
	339	}
	340
	341	if (ret < 0) {
	342	LOG_ERROR(HW_GPU, "av_buffersink_get_frame error: {}", AVError(ret));
	343	return {};
	344	}
	345
	346	return dst_frame;
	347	}
	348
	349	DeinterlaceFilter::~DeinterlaceFilter() {
	350	avfilter_graph_free(&m_filter_graph);
	351	}
	352
	353	void DecodeApi::Reset() {
	354	m_deinterlace_filter.reset();
	355	m_hardware_context.reset();
	356	m_decoder_context.reset();
	357	m_decoder.reset();
	358	}
	359
	360	bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
	361	this->Reset();
	362	m_decoder.emplace(codec);
	363	m_decoder_context.emplace(*m_decoder);
	364
	365	// Enable GPU decoding if requested.
	366	if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
	367	m_hardware_context.emplace();
	368	m_hardware_context->InitializeForDecoder(m_decoder_context, m_decoder);
	369	}
	370
	371	// Open the decoder context.
	372	if (!m_decoder_context->OpenContext(*m_decoder)) {
	373	this->Reset();
	374	return false;
	375	}
	376
	377	return true;
	378	}
	379
	380	bool DecodeApi::SendPacket(std::span<const u8> packet_data, size_t configuration_size) {
	381	FFmpeg::Packet packet(packet_data);
	382	return m_decoder_context->SendPacket(packet);
	383	}
	384
	385	void DecodeApi::ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue) {
	386	// Receive raw frame from decoder.
	387	bool is_interlaced;
	388	auto frame = m_decoder_context->ReceiveFrame(&is_interlaced);
	389	if (!frame) {
	390	return;
	391	}
	392
	393	if (!is_interlaced) {
	394	// If the frame is not interlaced, we can pend it now.
	395	frame_queue.push(std::move(frame));
	396	} else {
	397	// Create the deinterlacer if needed.
	398	if (!m_deinterlace_filter) {
	399	m_deinterlace_filter.emplace(*frame);
	400	}
	401
	402	// Add the frame we just received.
	403	if (!m_deinterlace_filter->AddSourceFrame(*frame)) {
	404	return;
	405	}
	406
	407	// Pend output fields.
	408	while (true) {
	409	auto filter_frame = m_deinterlace_filter->DrainSinkFrame();
	410	if (!filter_frame) {
	411	break;
	412	}
	413
	414	frame_queue.push(std::move(filter_frame));
	415	}
	416	}
	417	}
	418
	419	} // namespace FFmpeg


diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h new file mode 100644 index 000000000..1de0bbd83 --- /dev/null +++ b/src/video_core/host1x/ffmpeg/ffmpeg.h
@@ -0,0 +1,213 @@
	1	// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
	2	// SPDX-License-Identifier: GPL-2.0-or-later
	3
	4	#pragma once
	5
	6	#include <memory>
	7	#include <optional>
	8	#include <span>
	9	#include <vector>
	10	#include <queue>
	11
	12	#include "common/common_funcs.h"
	13	#include "common/common_types.h"
	14	#include "video_core/host1x/nvdec_common.h"
	15
	16	extern "C" {
	17	#if defined(__GNUC__) \|\| defined(__clang__)
	18	#pragma GCC diagnostic push
	19	#pragma GCC diagnostic ignored "-Wconversion"
	20	#endif
	21
	22	#include <libavcodec/avcodec.h>
	23	#include <libavfilter/avfilter.h>
	24	#include <libavfilter/buffersink.h>
	25	#include <libavfilter/buffersrc.h>
	26	#include <libavutil/avutil.h>
	27	#include <libavutil/opt.h>
	28
	29	#if defined(__GNUC__) \|\| defined(__clang__)
	30	#pragma GCC diagnostic pop
	31	#endif
	32	}
	33
	34	namespace FFmpeg {
	35
	36	class Packet;
	37	class Frame;
	38	class Decoder;
	39	class HardwareContext;
	40	class DecoderContext;
	41	class DeinterlaceFilter;
	42
	43	// Wraps an AVPacket, a container for compressed bitstream data.
	44	class Packet {
	45	public:
	46	YUZU_NON_COPYABLE(Packet);
	47	YUZU_NON_MOVEABLE(Packet);
	48
	49	explicit Packet(std::span<const u8> data);
	50	~Packet();
	51
	52	AVPacket* GetPacket() const {
	53	return m_packet;
	54	}
	55
	56	private:
	57	AVPacket* m_packet{};
	58	};
	59
	60	// Wraps an AVFrame, a container for audio and video stream data.
	61	class Frame {
	62	public:
	63	YUZU_NON_COPYABLE(Frame);
	64	YUZU_NON_MOVEABLE(Frame);
	65
	66	explicit Frame();
	67	~Frame();
	68
	69	int GetWidth() const {
	70	return m_frame->width;
	71	}
	72
	73	int GetHeight() const {
	74	return m_frame->height;
	75	}
	76
	77	AVPixelFormat GetPixelFormat() const {
	78	return static_cast<AVPixelFormat>(m_frame->format);
	79	}
	80
	81	int GetStride(int plane) const {
	82	return m_frame->linesize[plane];
	83	}
	84
	85	int* GetStrides() const {
	86	return m_frame->linesize;
	87	}
	88
	89	u8* GetData(int plane) const {
	90	return m_frame->data[plane];
	91	}
	92
	93	u8** GetPlanes() const {
	94	return m_frame->data;
	95	}
	96
	97	void SetFormat(int format) {
	98	m_frame->format = format;
	99	}
	100
	101	AVFrame* GetFrame() const {
	102	return m_frame;
	103	}
	104
	105	private:
	106	AVFrame* m_frame{};
	107	};
	108
	109	// Wraps an AVCodec, a type containing information about a codec.
	110	class Decoder {
	111	public:
	112	YUZU_NON_COPYABLE(Decoder);
	113	YUZU_NON_MOVEABLE(Decoder);
	114
	115	explicit Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec);
	116	~Decoder() = default;
	117
	118	bool SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const;
	119
	120	const AVCodec* GetCodec() const {
	121	return m_codec;
	122	}
	123
	124	private:
	125	const AVCodec* m_codec{};
	126	};
	127
	128	// Wraps AVBufferRef for an accelerated decoder.
	129	class HardwareContext {
	130	public:
	131	YUZU_NON_COPYABLE(HardwareContext);
	132	YUZU_NON_MOVEABLE(HardwareContext);
	133
	134	static std::vector<AVHWDeviceType> GetSupportedDeviceTypes();
	135
	136	explicit HardwareContext() = default;
	137	~HardwareContext();
	138
	139	bool InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder);
	140
	141	AVBufferRef* GetBufferRef() const {
	142	return m_gpu_decoder;
	143	}
	144
	145	private:
	146	bool InitializeWithType(AVHWDeviceType type);
	147
	148	AVBufferRef* m_gpu_decoder{};
	149	};
	150
	151	// Wraps an AVCodecContext.
	152	class DecoderContext {
	153	public:
	154	YUZU_NON_COPYABLE(DecoderContext);
	155	YUZU_NON_MOVEABLE(DecoderContext);
	156
	157	explicit DecoderContext(const Decoder& decoder);
	158	~DecoderContext();
	159
	160	void InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt);
	161	bool OpenContext(const Decoder& decoder);
	162	bool SendPacket(const Packet& packet);
	163	std::unique_ptr<Frame> ReceiveFrame(bool* out_is_interlaced);
	164
	165	AVCodecContext* GetCodecContext() const {
	166	return m_codec_context;
	167	}
	168
	169	private:
	170	AVCodecContext* m_codec_context{};
	171	};
	172
	173	// Wraps an AVFilterGraph.
	174	class DeinterlaceFilter {
	175	public:
	176	YUZU_NON_COPYABLE(DeinterlaceFilter);
	177	YUZU_NON_MOVEABLE(DeinterlaceFilter);
	178
	179	explicit DeinterlaceFilter(const Frame& frame);
	180	~DeinterlaceFilter();
	181
	182	bool AddSourceFrame(const Frame& frame);
	183	std::unique_ptr<Frame> DrainSinkFrame();
	184
	185	private:
	186	AVFilterGraph* m_filter_graph{};
	187	AVFilterContext* m_source_context{};
	188	AVFilterContext* m_sink_context{};
	189	bool m_initialized{};
	190	};
	191
	192	class DecodeApi {
	193	public:
	194	YUZU_NON_COPYABLE(DecodeApi);
	195	YUZU_NON_MOVEABLE(DecodeApi);
	196
	197	DecodeApi() = default;
	198	~DecodeApi() = default;
	199
	200	bool Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec);
	201	void Reset();
	202
	203	bool SendPacket(std::span<const u8> packet_data, size_t configuration_size);
	204	void ReceiveFrames(std::queue<std::unique_ptr<Frame>>& frame_queue);
	205
	206	private:
	207	std::optional<FFmpeg::Decoder> m_decoder;
	208	std::optional<FFmpeg::DecoderContext> m_decoder_context;
	209	std::optional<FFmpeg::HardwareContext> m_hardware_context;
	210	std::optional<FFmpeg::DeinterlaceFilter> m_deinterlace_filter;
	211	};
	212
	213	} // namespace FFmpeg