summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar yzct123452021-08-04 03:43:11 +0000
committerGravatar GitHub2021-08-03 23:43:11 -0400
commit2868d4ba84f43c9bf3c7b6997ddcafb6e65c4a02 (patch)
tree7cb19f8de5b5b37db87fa331d9b3c951ce372b4b /src
parentMerge pull request #6805 from lat9nq/fix-user-profiles (diff)
downloadyuzu-2868d4ba84f43c9bf3c7b6997ddcafb6e65c4a02.tar.gz
yuzu-2868d4ba84f43c9bf3c7b6997ddcafb6e65c4a02.tar.xz
yuzu-2868d4ba84f43c9bf3c7b6997ddcafb6e65c4a02.zip
nvdec: Implement VA-API hardware video acceleration (#6713)
* nvdec: VA-API * Verify formatting * Forgot a semicolon for Windows * Clarify comment about AV_PIX_FMT_NV12 * Fix assert log spam from missing negation * vic: Remove forgotten debug code * Address lioncash's review * Mention VA-API is Intel/AMD * Address v1993's review * Hopefully fix CMakeLists style this time * vic: Improve cache locality * vic: Fix off-by-one error * codec: Async * codec: Forgot the GetValue() * nvdec: Address ameerj's review * codec: Fallback to CPU without VA-API support * cmake: Address lat9nq's review * cmake: Make VA-API optional * vaapi: Multiple GPU * Apply suggestions from code review Co-authored-by: Ameer J <52414509+ameerj@users.noreply.github.com> * nvdec: Address ameerj's review * codec: Use anonymous instead of static * nvdec: Remove enum and fix memory leak * nvdec: Address ameerj's review * codec: Remove preparation for threading Co-authored-by: Ameer J <52414509+ameerj@users.noreply.github.com>
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/command_classes/codecs/codec.cpp144
-rw-r--r--src/video_core/command_classes/codecs/codec.h4
-rw-r--r--src/video_core/command_classes/vic.cpp87
-rw-r--r--src/video_core/command_classes/vic.h7
5 files changed, 175 insertions, 72 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 333f6f35f..1eb67c051 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,5 +1,10 @@
1add_subdirectory(host_shaders) 1add_subdirectory(host_shaders)
2 2
3if(LIBVA_FOUND)
4 set_source_files_properties(command_classes/codecs/codec.cpp
5 PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1)
6endif()
7
3add_library(video_core STATIC 8add_library(video_core STATIC
4 buffer_cache/buffer_base.h 9 buffer_cache/buffer_base.h
5 buffer_cache/buffer_cache.cpp 10 buffer_cache/buffer_cache.cpp
diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp
index 1b4bbc8ac..f798a0053 100644
--- a/src/video_core/command_classes/codecs/codec.cpp
+++ b/src/video_core/command_classes/codecs/codec.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <fstream> 5#include <fstream>
7#include <vector> 6#include <vector>
8#include "common/assert.h" 7#include "common/assert.h"
@@ -17,10 +16,47 @@ extern "C" {
17} 16}
18 17
19namespace Tegra { 18namespace Tegra {
19#if defined(LIBVA_FOUND)
20// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
21namespace {
22constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
23 "i915",
24 "amdgpu",
25};
26
27AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
28 for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
29 if (*p == AV_PIX_FMT_VAAPI) {
30 return AV_PIX_FMT_VAAPI;
31 }
32 }
33 LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
34 return *pix_fmts;
35}
36
37bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
38 AVDictionary* hwdevice_options = nullptr;
39 av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
40 for (const auto& driver : VAAPI_DRIVERS) {
41 av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
42 const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
43 nullptr, hwdevice_options, 0);
44 if (hwdevice_error >= 0) {
45 LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
46 av_dict_free(&hwdevice_options);
47 return true;
48 }
49 LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
50 }
51 LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
52 av_dict_free(&hwdevice_options);
53 return false;
54}
55} // namespace
56#endif
20 57
21void AVFrameDeleter(AVFrame* ptr) { 58void AVFrameDeleter(AVFrame* ptr) {
22 av_frame_unref(ptr); 59 av_frame_free(&ptr);
23 av_free(ptr);
24} 60}
25 61
26Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) 62Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs)
@@ -32,19 +68,31 @@ Codec::~Codec() {
32 return; 68 return;
33 } 69 }
34 // Free libav memory 70 // Free libav memory
35 AVFrame* av_frame{nullptr};
36 avcodec_send_packet(av_codec_ctx, nullptr); 71 avcodec_send_packet(av_codec_ctx, nullptr);
37 av_frame = av_frame_alloc(); 72 AVFrame* av_frame = av_frame_alloc();
38 avcodec_receive_frame(av_codec_ctx, av_frame); 73 avcodec_receive_frame(av_codec_ctx, av_frame);
39 avcodec_flush_buffers(av_codec_ctx); 74 avcodec_flush_buffers(av_codec_ctx);
40 75 av_frame_free(&av_frame);
41 av_frame_unref(av_frame);
42 av_free(av_frame);
43 avcodec_close(av_codec_ctx); 76 avcodec_close(av_codec_ctx);
77 av_buffer_unref(&av_hw_device);
78}
79
80void Codec::InitializeHwdec() {
81 // Prioritize integrated GPU to mitigate bandwidth bottlenecks
82#if defined(LIBVA_FOUND)
83 if (CreateVaapiHwdevice(&av_hw_device)) {
84 const auto hw_device_ctx = av_buffer_ref(av_hw_device);
85 ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
86 av_codec_ctx->hw_device_ctx = hw_device_ctx;
87 av_codec_ctx->get_format = GetHwFormat;
88 return;
89 }
90#endif
91 // TODO more GPU accelerated decoders
44} 92}
45 93
46void Codec::Initialize() { 94void Codec::Initialize() {
47 AVCodecID codec{AV_CODEC_ID_NONE}; 95 AVCodecID codec;
48 switch (current_codec) { 96 switch (current_codec) {
49 case NvdecCommon::VideoCodec::H264: 97 case NvdecCommon::VideoCodec::H264:
50 codec = AV_CODEC_ID_H264; 98 codec = AV_CODEC_ID_H264;
@@ -53,22 +101,24 @@ void Codec::Initialize() {
53 codec = AV_CODEC_ID_VP9; 101 codec = AV_CODEC_ID_VP9;
54 break; 102 break;
55 default: 103 default:
104 UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
56 return; 105 return;
57 } 106 }
58 av_codec = avcodec_find_decoder(codec); 107 av_codec = avcodec_find_decoder(codec);
59 av_codec_ctx = avcodec_alloc_context3(av_codec); 108 av_codec_ctx = avcodec_alloc_context3(av_codec);
60 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); 109 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
61 110 InitializeHwdec();
62 // TODO(ameerj): libavcodec gpu hw acceleration 111 if (!av_codec_ctx->hw_device_ctx) {
63 112 LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
113 }
64 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); 114 const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
65 if (av_error < 0) { 115 if (av_error < 0) {
66 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); 116 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
67 avcodec_close(av_codec_ctx); 117 avcodec_close(av_codec_ctx);
118 av_buffer_unref(&av_hw_device);
68 return; 119 return;
69 } 120 }
70 initialized = true; 121 initialized = true;
71 return;
72} 122}
73 123
74void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { 124void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
@@ -80,36 +130,64 @@ void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) {
80 130
81void Codec::Decode() { 131void Codec::Decode() {
82 const bool is_first_frame = !initialized; 132 const bool is_first_frame = !initialized;
83 if (!initialized) { 133 if (is_first_frame) {
84 Initialize(); 134 Initialize();
85 } 135 }
86
87 bool vp9_hidden_frame = false; 136 bool vp9_hidden_frame = false;
88 AVPacket packet{};
89 av_init_packet(&packet);
90 std::vector<u8> frame_data; 137 std::vector<u8> frame_data;
91
92 if (current_codec == NvdecCommon::VideoCodec::H264) { 138 if (current_codec == NvdecCommon::VideoCodec::H264) {
93 frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame); 139 frame_data = h264_decoder->ComposeFrameHeader(state, is_first_frame);
94 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { 140 } else if (current_codec == NvdecCommon::VideoCodec::Vp9) {
95 frame_data = vp9_decoder->ComposeFrameHeader(state); 141 frame_data = vp9_decoder->ComposeFrameHeader(state);
96 vp9_hidden_frame = vp9_decoder->WasFrameHidden(); 142 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
97 } 143 }
98 144 AVPacket packet{};
145 av_init_packet(&packet);
99 packet.data = frame_data.data(); 146 packet.data = frame_data.data();
100 packet.size = static_cast<s32>(frame_data.size()); 147 packet.size = static_cast<s32>(frame_data.size());
101 148 if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
102 avcodec_send_packet(av_codec_ctx, &packet); 149 LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
103 150 return;
104 if (!vp9_hidden_frame) { 151 }
105 // Only receive/store visible frames 152 // Only receive/store visible frames
106 AVFramePtr frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; 153 if (vp9_hidden_frame) {
107 avcodec_receive_frame(av_codec_ctx, frame.get()); 154 return;
108 av_frames.push(std::move(frame)); 155 }
109 // Limit queue to 10 frames. Workaround for ZLA decode and queue spam 156 AVFrame* hw_frame = av_frame_alloc();
110 if (av_frames.size() > 10) { 157 AVFrame* sw_frame = hw_frame;
111 av_frames.pop(); 158 ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
112 } 159 if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
160 LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
161 av_frame_free(&hw_frame);
162 return;
163 }
164 if (!hw_frame->width || !hw_frame->height) {
165 LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
166 av_frame_free(&hw_frame);
167 return;
168 }
169#if defined(LIBVA_FOUND)
170 // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
171 if (hw_frame->format == AV_PIX_FMT_VAAPI) {
172 sw_frame = av_frame_alloc();
173 ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
174 // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
175 // because Intel drivers crash unless using AV_PIX_FMT_NV12
176 sw_frame->format = AV_PIX_FMT_NV12;
177 const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
178 ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
179 av_frame_free(&hw_frame);
180 }
181#endif
182 if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
183 UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
184 av_frame_free(&sw_frame);
185 return;
186 }
187 av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
188 if (av_frames.size() > 10) {
189 LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
190 av_frames.pop();
113 } 191 }
114} 192}
115 193
@@ -119,7 +197,6 @@ AVFramePtr Codec::GetCurrentFrame() {
119 if (av_frames.empty()) { 197 if (av_frames.empty()) {
120 return AVFramePtr{nullptr, AVFrameDeleter}; 198 return AVFramePtr{nullptr, AVFrameDeleter};
121 } 199 }
122
123 AVFramePtr frame = std::move(av_frames.front()); 200 AVFramePtr frame = std::move(av_frames.front());
124 av_frames.pop(); 201 av_frames.pop();
125 return frame; 202 return frame;
@@ -144,6 +221,5 @@ std::string_view Codec::GetCurrentCodecName() const {
144 default: 221 default:
145 return "Unknown"; 222 return "Unknown";
146 } 223 }
147}; 224}
148
149} // namespace Tegra 225} // namespace Tegra
diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h
index 96c823c76..71936203f 100644
--- a/src/video_core/command_classes/codecs/codec.h
+++ b/src/video_core/command_classes/codecs/codec.h
@@ -22,7 +22,6 @@ extern "C" {
22 22
23namespace Tegra { 23namespace Tegra {
24class GPU; 24class GPU;
25struct VicRegisters;
26 25
27void AVFrameDeleter(AVFrame* ptr); 26void AVFrameDeleter(AVFrame* ptr);
28using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>; 27using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
@@ -55,10 +54,13 @@ public:
55 [[nodiscard]] std::string_view GetCurrentCodecName() const; 54 [[nodiscard]] std::string_view GetCurrentCodecName() const;
56 55
57private: 56private:
57 void InitializeHwdec();
58
58 bool initialized{}; 59 bool initialized{};
59 NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; 60 NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
60 61
61 AVCodec* av_codec{nullptr}; 62 AVCodec* av_codec{nullptr};
63 AVBufferRef* av_hw_device{nullptr};
62 AVCodecContext* av_codec_ctx{nullptr}; 64 AVCodecContext* av_codec_ctx{nullptr};
63 65
64 GPU& gpu; 66 GPU& gpu;
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp
index ffb7c82a1..d5e77941c 100644
--- a/src/video_core/command_classes/vic.cpp
+++ b/src/video_core/command_classes/vic.cpp
@@ -46,11 +46,8 @@ void Vic::ProcessMethod(Method method, u32 argument) {
46 case Method::SetOutputSurfaceLumaOffset: 46 case Method::SetOutputSurfaceLumaOffset:
47 output_surface_luma_address = arg; 47 output_surface_luma_address = arg;
48 break; 48 break;
49 case Method::SetOutputSurfaceChromaUOffset: 49 case Method::SetOutputSurfaceChromaOffset:
50 output_surface_chroma_u_address = arg; 50 output_surface_chroma_address = arg;
51 break;
52 case Method::SetOutputSurfaceChromaVOffset:
53 output_surface_chroma_v_address = arg;
54 break; 51 break;
55 default: 52 default:
56 break; 53 break;
@@ -65,11 +62,10 @@ void Vic::Execute() {
65 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)}; 62 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
66 const AVFramePtr frame_ptr = nvdec_processor->GetFrame(); 63 const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
67 const auto* frame = frame_ptr.get(); 64 const auto* frame = frame_ptr.get();
68 if (!frame || frame->width == 0 || frame->height == 0) { 65 if (!frame) {
69 return; 66 return;
70 } 67 }
71 const VideoPixelFormat pixel_format = 68 const auto pixel_format = static_cast<VideoPixelFormat>(config.pixel_format.Value());
72 static_cast<VideoPixelFormat>(config.pixel_format.Value());
73 switch (pixel_format) { 69 switch (pixel_format) {
74 case VideoPixelFormat::BGRA8: 70 case VideoPixelFormat::BGRA8:
75 case VideoPixelFormat::RGBA8: { 71 case VideoPixelFormat::RGBA8: {
@@ -83,16 +79,18 @@ void Vic::Execute() {
83 sws_freeContext(scaler_ctx); 79 sws_freeContext(scaler_ctx);
84 scaler_ctx = nullptr; 80 scaler_ctx = nullptr;
85 81
86 // FFmpeg returns all frames in YUV420, convert it into expected format 82 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired format
87 scaler_ctx = 83 scaler_ctx = sws_getContext(frame->width, frame->height,
88 sws_getContext(frame->width, frame->height, AV_PIX_FMT_YUV420P, frame->width, 84 static_cast<AVPixelFormat>(frame->format), frame->width,
89 frame->height, target_format, 0, nullptr, nullptr, nullptr); 85 frame->height, target_format, 0, nullptr, nullptr, nullptr);
90 86
91 scaler_width = frame->width; 87 scaler_width = frame->width;
92 scaler_height = frame->height; 88 scaler_height = frame->height;
93 } 89 }
94 // Get Converted frame 90 // Get Converted frame
95 const std::size_t linear_size = frame->width * frame->height * 4; 91 const u32 width = static_cast<u32>(frame->width);
92 const u32 height = static_cast<u32>(frame->height);
93 const std::size_t linear_size = width * height * 4;
96 94
97 // Only allocate frame_buffer once per stream, as the size is not expected to change 95 // Only allocate frame_buffer once per stream, as the size is not expected to change
98 if (!converted_frame_buffer) { 96 if (!converted_frame_buffer) {
@@ -109,11 +107,10 @@ void Vic::Execute() {
109 if (blk_kind != 0) { 107 if (blk_kind != 0) {
110 // swizzle pitch linear to block linear 108 // swizzle pitch linear to block linear
111 const u32 block_height = static_cast<u32>(config.block_linear_height_log2); 109 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
112 const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1, 110 const auto size =
113 block_height, 0); 111 Tegra::Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
114 luma_buffer.resize(size); 112 luma_buffer.resize(size);
115 Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4, 113 Tegra::Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
116 frame->width, 4, luma_buffer.data(),
117 converted_frame_buffer.get(), block_height, 0, 0); 114 converted_frame_buffer.get(), block_height, 0, 0);
118 115
119 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); 116 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
@@ -131,41 +128,65 @@ void Vic::Execute() {
131 const std::size_t surface_height = config.surface_height_minus1 + 1; 128 const std::size_t surface_height = config.surface_height_minus1 + 1;
132 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width)); 129 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
133 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height)); 130 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
134 const std::size_t half_width = frame_width / 2; 131 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
135 const std::size_t half_height = frame_height / 2;
136 const std::size_t aligned_width = (surface_width + 0xff) & ~0xff;
137 132
138 const auto* luma_ptr = frame->data[0];
139 const auto* chroma_b_ptr = frame->data[1];
140 const auto* chroma_r_ptr = frame->data[2];
141 const auto stride = static_cast<size_t>(frame->linesize[0]); 133 const auto stride = static_cast<size_t>(frame->linesize[0]);
142 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
143 134
144 luma_buffer.resize(aligned_width * surface_height); 135 luma_buffer.resize(aligned_width * surface_height);
145 chroma_buffer.resize(aligned_width * surface_height / 2); 136 chroma_buffer.resize(aligned_width * surface_height / 2);
146 137
147 // Populate luma buffer 138 // Populate luma buffer
139 const u8* luma_src = frame->data[0];
148 for (std::size_t y = 0; y < frame_height; ++y) { 140 for (std::size_t y = 0; y < frame_height; ++y) {
149 const std::size_t src = y * stride; 141 const std::size_t src = y * stride;
150 const std::size_t dst = y * aligned_width; 142 const std::size_t dst = y * aligned_width;
151 for (std::size_t x = 0; x < frame_width; ++x) { 143 for (std::size_t x = 0; x < frame_width; ++x) {
152 luma_buffer[dst + x] = luma_ptr[src + x]; 144 luma_buffer[dst + x] = luma_src[src + x];
153 } 145 }
154 } 146 }
155 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), 147 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
156 luma_buffer.size()); 148 luma_buffer.size());
157 149
158 // Populate chroma buffer from both channels with interleaving. 150 // Chroma
159 for (std::size_t y = 0; y < half_height; ++y) { 151 const std::size_t half_height = frame_height / 2;
160 const std::size_t src = y * half_stride; 152 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
161 const std::size_t dst = y * aligned_width;
162 153
163 for (std::size_t x = 0; x < half_width; ++x) { 154 switch (frame->format) {
164 chroma_buffer[dst + x * 2] = chroma_b_ptr[src + x]; 155 case AV_PIX_FMT_YUV420P: {
165 chroma_buffer[dst + x * 2 + 1] = chroma_r_ptr[src + x]; 156 // Frame from FFmpeg software
157 // Populate chroma buffer from both channels with interleaving.
158 const std::size_t half_width = frame_width / 2;
159 const u8* chroma_b_src = frame->data[1];
160 const u8* chroma_r_src = frame->data[2];
161 for (std::size_t y = 0; y < half_height; ++y) {
162 const std::size_t src = y * half_stride;
163 const std::size_t dst = y * aligned_width;
164
165 for (std::size_t x = 0; x < half_width; ++x) {
166 chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
167 chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
168 }
169 }
170 break;
171 }
172 case AV_PIX_FMT_NV12: {
173 // Frame from VA-API hardware
174 // This is already interleaved so just copy
175 const u8* chroma_src = frame->data[1];
176 for (std::size_t y = 0; y < half_height; ++y) {
177 const std::size_t src = y * stride;
178 const std::size_t dst = y * aligned_width;
179 for (std::size_t x = 0; x < frame_width; ++x) {
180 chroma_buffer[dst + x] = chroma_src[src + x];
181 }
166 } 182 }
183 break;
184 }
185 default:
186 UNREACHABLE();
187 break;
167 } 188 }
168 gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), 189 gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
169 chroma_buffer.size()); 190 chroma_buffer.size());
170 break; 191 break;
171 } 192 }
diff --git a/src/video_core/command_classes/vic.h b/src/video_core/command_classes/vic.h
index f5a2ed100..74246e08c 100644
--- a/src/video_core/command_classes/vic.h
+++ b/src/video_core/command_classes/vic.h
@@ -22,8 +22,8 @@ public:
22 SetControlParams = 0x1c1, 22 SetControlParams = 0x1c1,
23 SetConfigStructOffset = 0x1c2, 23 SetConfigStructOffset = 0x1c2,
24 SetOutputSurfaceLumaOffset = 0x1c8, 24 SetOutputSurfaceLumaOffset = 0x1c8,
25 SetOutputSurfaceChromaUOffset = 0x1c9, 25 SetOutputSurfaceChromaOffset = 0x1c9,
26 SetOutputSurfaceChromaVOffset = 0x1ca 26 SetOutputSurfaceChromaUnusedOffset = 0x1ca
27 }; 27 };
28 28
29 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor); 29 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
@@ -64,8 +64,7 @@ private:
64 64
65 GPUVAddr config_struct_address{}; 65 GPUVAddr config_struct_address{};
66 GPUVAddr output_surface_luma_address{}; 66 GPUVAddr output_surface_luma_address{};
67 GPUVAddr output_surface_chroma_u_address{}; 67 GPUVAddr output_surface_chroma_address{};
68 GPUVAddr output_surface_chroma_v_address{};
69 68
70 SwsContext* scaler_ctx{}; 69 SwsContext* scaler_ctx{};
71 s32 scaler_width{}; 70 s32 scaler_width{};