summaryrefslogtreecommitdiff
path: root/src/video_core/host1x
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2022-01-30 10:31:13 +0100
committerGravatar Fernando Sahmkow2022-10-06 21:00:52 +0200
commit668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 (patch)
treea1c668d6c3d00eade849b1d31dba4116095e4c12 /src/video_core/host1x
parentTexture Cache: Fix GC and GPU Modified on Joins. (diff)
downloadyuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.gz
yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.xz
yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.zip
VideoCore: Refactor syncing.
Diffstat (limited to 'src/video_core/host1x')
-rw-r--r--src/video_core/host1x/codecs/codec.cpp310
-rw-r--r--src/video_core/host1x/codecs/codec.h81
-rw-r--r--src/video_core/host1x/codecs/h264.cpp277
-rw-r--r--src/video_core/host1x/codecs/h264.h173
-rw-r--r--src/video_core/host1x/codecs/vp8.cpp53
-rw-r--r--src/video_core/host1x/codecs/vp8.h74
-rw-r--r--src/video_core/host1x/codecs/vp9.cpp946
-rw-r--r--src/video_core/host1x/codecs/vp9.h194
-rw-r--r--src/video_core/host1x/codecs/vp9_types.h306
-rw-r--r--src/video_core/host1x/control.cpp35
-rw-r--r--src/video_core/host1x/control.h41
-rw-r--r--src/video_core/host1x/host1x.h33
-rw-r--r--src/video_core/host1x/nvdec.cpp47
-rw-r--r--src/video_core/host1x/nvdec.h38
-rw-r--r--src/video_core/host1x/nvdec_common.h97
-rw-r--r--src/video_core/host1x/sync_manager.cpp51
-rw-r--r--src/video_core/host1x/sync_manager.h53
-rw-r--r--src/video_core/host1x/syncpoint_manager.cpp93
-rw-r--r--src/video_core/host1x/syncpoint_manager.h99
-rw-r--r--src/video_core/host1x/vic.cpp243
-rw-r--r--src/video_core/host1x/vic.h66
21 files changed, 3310 insertions, 0 deletions
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
new file mode 100644
index 000000000..70c47ae03
--- /dev/null
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -0,0 +1,310 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <algorithm>
5#include <fstream>
6#include <vector>
7#include "common/assert.h"
8#include "common/settings.h"
9#include "video_core/gpu.h"
10#include "video_core/host1x/codecs/codec.h"
11#include "video_core/host1x/codecs/h264.h"
12#include "video_core/host1x/codecs/vp8.h"
13#include "video_core/host1x/codecs/vp9.h"
14#include "video_core/memory_manager.h"
15
16extern "C" {
17#include <libavutil/opt.h>
18#ifdef LIBVA_FOUND
19// for querying VAAPI driver information
20#include <libavutil/hwcontext_vaapi.h>
21#endif
22}
23
24namespace Tegra {
25namespace {
26constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
27constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
28constexpr std::array PREFERRED_GPU_DECODERS = {
29 AV_HWDEVICE_TYPE_CUDA,
30#ifdef _WIN32
31 AV_HWDEVICE_TYPE_D3D11VA,
32 AV_HWDEVICE_TYPE_DXVA2,
33#elif defined(__unix__)
34 AV_HWDEVICE_TYPE_VAAPI,
35 AV_HWDEVICE_TYPE_VDPAU,
36#endif
37 // last resort for Linux Flatpak (w/ NVIDIA)
38 AV_HWDEVICE_TYPE_VULKAN,
39};
40
41void AVPacketDeleter(AVPacket* ptr) {
42 av_packet_free(&ptr);
43}
44
45using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
46
47AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
48 for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
49 if (*p == av_codec_ctx->pix_fmt) {
50 return av_codec_ctx->pix_fmt;
51 }
52 }
53 LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
54 av_buffer_unref(&av_codec_ctx->hw_device_ctx);
55 av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
56 return PREFERRED_CPU_FMT;
57}
58
59// List all the currently available hwcontext in ffmpeg
60std::vector<AVHWDeviceType> ListSupportedContexts() {
61 std::vector<AVHWDeviceType> contexts{};
62 AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
63 do {
64 current_device_type = av_hwdevice_iterate_types(current_device_type);
65 contexts.push_back(current_device_type);
66 } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
67 return contexts;
68}
69
70} // namespace
71
72void AVFrameDeleter(AVFrame* ptr) {
73 av_frame_free(&ptr);
74}
75
76Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs)
77 : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)),
78 vp8_decoder(std::make_unique<Decoder::VP8>(gpu)),
79 vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {}
80
81Codec::~Codec() {
82 if (!initialized) {
83 return;
84 }
85 // Free libav memory
86 avcodec_free_context(&av_codec_ctx);
87 av_buffer_unref(&av_gpu_decoder);
88}
89
90bool Codec::CreateGpuAvDevice() {
91 static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
92 static const auto supported_contexts = ListSupportedContexts();
93 for (const auto& type : PREFERRED_GPU_DECODERS) {
94 if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
95 [&type](const auto& context) { return context == type; })) {
96 LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
97 continue;
98 }
99 // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
100 av_buffer_unref(&av_gpu_decoder);
101 const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
102 if (hwdevice_res < 0) {
103 LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
104 av_hwdevice_get_type_name(type), hwdevice_res);
105 continue;
106 }
107#ifdef LIBVA_FOUND
108 if (type == AV_HWDEVICE_TYPE_VAAPI) {
109 // we need to determine if this is an impersonated VAAPI driver
110 AVHWDeviceContext* hwctx =
111 static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
112 AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
113 const char* vendor_name = vaQueryVendorString(vactx->display);
114 if (strstr(vendor_name, "VDPAU backend")) {
115 // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
116 LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
117 continue;
118 } else {
119 // according to some user testing, certain vaapi driver (Intel?) could be buggy
120 // so let's log the driver name which may help the developers/supporters
121 LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
122 }
123 }
124#endif
125 for (int i = 0;; i++) {
126 const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
127 if (!config) {
128 LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
129 av_codec->name, av_hwdevice_get_type_name(type));
130 break;
131 }
132 if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
133#if defined(__unix__)
134 // Some linux decoding backends are reported to crash with this config method
135 // TODO(ameerj): Properly support this method
136 if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
137 // skip zero-copy decoders, we don't currently support them
138 LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
139 av_hwdevice_get_type_name(type), config->methods);
140 continue;
141 }
142#endif
143 LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
144 av_codec_ctx->pix_fmt = config->pix_fmt;
145 return true;
146 }
147 }
148 }
149 return false;
150}
151
152void Codec::InitializeAvCodecContext() {
153 av_codec_ctx = avcodec_alloc_context3(av_codec);
154 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
155}
156
157void Codec::InitializeGpuDecoder() {
158 if (!CreateGpuAvDevice()) {
159 av_buffer_unref(&av_gpu_decoder);
160 return;
161 }
162 auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
163 ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
164 av_codec_ctx->hw_device_ctx = hw_device_ctx;
165 av_codec_ctx->get_format = GetGpuFormat;
166}
167
168void Codec::Initialize() {
169 const AVCodecID codec = [&] {
170 switch (current_codec) {
171 case Host1x::NvdecCommon::VideoCodec::H264:
172 return AV_CODEC_ID_H264;
173 case Host1x::NvdecCommon::VideoCodec::VP8:
174 return AV_CODEC_ID_VP8;
175 case Host1x::NvdecCommon::VideoCodec::VP9:
176 return AV_CODEC_ID_VP9;
177 default:
178 UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
179 return AV_CODEC_ID_NONE;
180 }
181 }();
182 av_codec = avcodec_find_decoder(codec);
183
184 InitializeAvCodecContext();
185 if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
186 InitializeGpuDecoder();
187 }
188 if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
189 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
190 avcodec_free_context(&av_codec_ctx);
191 av_buffer_unref(&av_gpu_decoder);
192 return;
193 }
194 if (!av_codec_ctx->hw_device_ctx) {
195 LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
196 }
197 initialized = true;
198}
199
200void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
201 if (current_codec != codec) {
202 current_codec = codec;
203 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
204 }
205}
206
207void Codec::Decode() {
208 const bool is_first_frame = !initialized;
209 if (is_first_frame) {
210 Initialize();
211 }
212 if (!initialized) {
213 return;
214 }
215 bool vp9_hidden_frame = false;
216 const auto& frame_data = [&]() {
217 switch (current_codec) {
218 case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
219 return h264_decoder->ComposeFrame(state, is_first_frame);
220 case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
221 return vp8_decoder->ComposeFrame(state);
222 case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
223 vp9_decoder->ComposeFrame(state);
224 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
225 return vp9_decoder->GetFrameBytes();
226 default:
227 ASSERT(false);
228 return std::vector<u8>{};
229 }
230 }();
231 AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
232 if (!packet) {
233 LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
234 return;
235 }
236 packet->data = const_cast<u8*>(frame_data.data());
237 packet->size = static_cast<s32>(frame_data.size());
238 if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
239 LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
240 return;
241 }
242 // Only receive/store visible frames
243 if (vp9_hidden_frame) {
244 return;
245 }
246 AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
247 AVFramePtr final_frame{nullptr, AVFrameDeleter};
248 ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
249 if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
250 LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
251 return;
252 }
253 if (initial_frame->width == 0 || initial_frame->height == 0) {
254 LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
255 return;
256 }
257 if (av_codec_ctx->hw_device_ctx) {
258 final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
259 ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
260 // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
261 // because Intel drivers crash unless using AV_PIX_FMT_NV12
262 final_frame->format = PREFERRED_GPU_FMT;
263 const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
264 ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
265 } else {
266 final_frame = std::move(initial_frame);
267 }
268 if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
269 UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
270 return;
271 }
272 av_frames.push(std::move(final_frame));
273 if (av_frames.size() > 10) {
274 LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
275 av_frames.pop();
276 }
277}
278
279AVFramePtr Codec::GetCurrentFrame() {
280 // Sometimes VIC will request more frames than have been decoded.
281 // in this case, return a nullptr and don't overwrite previous frame data
282 if (av_frames.empty()) {
283 return AVFramePtr{nullptr, AVFrameDeleter};
284 }
285 AVFramePtr frame = std::move(av_frames.front());
286 av_frames.pop();
287 return frame;
288}
289
290Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
291 return current_codec;
292}
293
294std::string_view Codec::GetCurrentCodecName() const {
295 switch (current_codec) {
296 case Host1x::NvdecCommon::VideoCodec::None:
297 return "None";
298 case Host1x::NvdecCommon::VideoCodec::H264:
299 return "H264";
300 case Host1x::NvdecCommon::VideoCodec::VP8:
301 return "VP8";
302 case Host1x::NvdecCommon::VideoCodec::H265:
303 return "H265";
304 case Host1x::NvdecCommon::VideoCodec::VP9:
305 return "VP9";
306 default:
307 return "Unknown";
308 }
309}
310} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
new file mode 100644
index 000000000..117cb3ccd
--- /dev/null
+++ b/src/video_core/host1x/codecs/codec.h
@@ -0,0 +1,81 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <memory>
7#include <string_view>
8#include <queue>
9#include "common/common_types.h"
10#include "video_core/host1x/nvdec_common.h"
11
12extern "C" {
13#if defined(__GNUC__) || defined(__clang__)
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wconversion"
16#endif
17#include <libavcodec/avcodec.h>
18#if defined(__GNUC__) || defined(__clang__)
19#pragma GCC diagnostic pop
20#endif
21}
22
23namespace Tegra {
24class GPU;
25
26void AVFrameDeleter(AVFrame* ptr);
27using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
28
29namespace Decoder {
30class H264;
31class VP8;
32class VP9;
33} // namespace Decoder
34
35class Codec {
36public:
37 explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs);
38 ~Codec();
39
40 /// Initialize the codec, returning success or failure
41 void Initialize();
42
43 /// Sets NVDEC video stream codec
44 void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
45
46 /// Call decoders to construct headers, decode AVFrame with ffmpeg
47 void Decode();
48
49 /// Returns next decoded frame
50 [[nodiscard]] AVFramePtr GetCurrentFrame();
51
52 /// Returns the value of current_codec
53 [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
54
55 /// Return name of the current codec
56 [[nodiscard]] std::string_view GetCurrentCodecName() const;
57
58private:
59 void InitializeAvCodecContext();
60
61 void InitializeGpuDecoder();
62
63 bool CreateGpuAvDevice();
64
65 bool initialized{};
66 Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
67
68 const AVCodec* av_codec{nullptr};
69 AVCodecContext* av_codec_ctx{nullptr};
70 AVBufferRef* av_gpu_decoder{nullptr};
71
72 GPU& gpu;
73 const Host1x::NvdecCommon::NvdecRegisters& state;
74 std::unique_ptr<Decoder::H264> h264_decoder;
75 std::unique_ptr<Decoder::VP8> vp8_decoder;
76 std::unique_ptr<Decoder::VP9> vp9_decoder;
77
78 std::queue<AVFramePtr> av_frames{};
79};
80
81} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
new file mode 100644
index 000000000..95534bc85
--- /dev/null
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -0,0 +1,277 @@
1// SPDX-FileCopyrightText: Ryujinx Team and Contributors
2// SPDX-License-Identifier: MIT
3
4#include <array>
5#include <bit>
6
7#include "common/settings.h"
8#include "video_core/gpu.h"
9#include "video_core/host1x/codecs/h264.h"
10#include "video_core/memory_manager.h"
11
12namespace Tegra::Decoder {
13namespace {
14// ZigZag LUTs from libavcodec.
15constexpr std::array<u8, 64> zig_zag_direct{
16 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
17 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
18 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
19};
20
21constexpr std::array<u8, 16> zig_zag_scan{
22 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
23 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
24};
25} // Anonymous namespace
26
27H264::H264(GPU& gpu_) : gpu(gpu_) {}
28
29H264::~H264() = default;
30
31const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
32 bool is_first_frame) {
33 H264DecoderContext context;
34 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
35
36 const s64 frame_number = context.h264_parameter_set.frame_number.Value();
37 if (!is_first_frame && frame_number != 0) {
38 frame.resize(context.stream_len);
39 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
40 return frame;
41 }
42
43 // Encode header
44 H264BitWriter writer{};
45 writer.WriteU(1, 24);
46 writer.WriteU(0, 1);
47 writer.WriteU(3, 2);
48 writer.WriteU(7, 5);
49 writer.WriteU(100, 8);
50 writer.WriteU(0, 8);
51 writer.WriteU(31, 8);
52 writer.WriteUe(0);
53 const u32 chroma_format_idc =
54 static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
55 writer.WriteUe(chroma_format_idc);
56 if (chroma_format_idc == 3) {
57 writer.WriteBit(false);
58 }
59
60 writer.WriteUe(0);
61 writer.WriteUe(0);
62 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
63 writer.WriteBit(false); // Scaling matrix present flag
64
65 writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
66
67 const auto order_cnt_type =
68 static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
69 writer.WriteUe(order_cnt_type);
70 if (order_cnt_type == 0) {
71 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
72 } else if (order_cnt_type == 1) {
73 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
74
75 writer.WriteSe(0);
76 writer.WriteSe(0);
77 writer.WriteUe(0);
78 }
79
80 const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
81 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
82
83 // TODO (ameerj): Where do we get this number, it seems to be particular for each stream
84 const auto nvdec_decoding = Settings::values.nvdec_emulation.GetValue();
85 const bool uses_gpu_decoding = nvdec_decoding == Settings::NvdecEmulation::GPU;
86 const u32 max_num_ref_frames = uses_gpu_decoding ? 6u : 16u;
87 writer.WriteUe(max_num_ref_frames);
88 writer.WriteBit(false);
89 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
90 writer.WriteUe(pic_height - 1);
91 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
92
93 if (!context.h264_parameter_set.frame_mbs_only_flag) {
94 writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
95 }
96
97 writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
98 writer.WriteBit(false); // Frame cropping flag
99 writer.WriteBit(false); // VUI parameter present flag
100
101 writer.End();
102
103 // H264 PPS
104 writer.WriteU(1, 24);
105 writer.WriteU(0, 1);
106 writer.WriteU(3, 2);
107 writer.WriteU(8, 5);
108
109 writer.WriteUe(0);
110 writer.WriteUe(0);
111
112 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
113 writer.WriteBit(false);
114 writer.WriteUe(0);
115 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
116 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
117 writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
118 writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
119 s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
120 writer.WriteSe(pic_init_qp);
121 writer.WriteSe(0);
122 s32 chroma_qp_index_offset =
123 static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
124
125 writer.WriteSe(chroma_qp_index_offset);
126 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
127 writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
128 writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
129 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
130
131 writer.WriteBit(true);
132
133 for (s32 index = 0; index < 6; index++) {
134 writer.WriteBit(true);
135 std::span<const u8> matrix{context.weight_scale};
136 writer.WriteScalingList(matrix, index * 16, 16);
137 }
138
139 if (context.h264_parameter_set.transform_8x8_mode_flag) {
140 for (s32 index = 0; index < 2; index++) {
141 writer.WriteBit(true);
142 std::span<const u8> matrix{context.weight_scale_8x8};
143 writer.WriteScalingList(matrix, index * 64, 64);
144 }
145 }
146
147 s32 chroma_qp_index_offset2 =
148 static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
149
150 writer.WriteSe(chroma_qp_index_offset2);
151
152 writer.End();
153
154 const auto& encoded_header = writer.GetByteArray();
155 frame.resize(encoded_header.size() + context.stream_len);
156 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
157
158 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset,
159 frame.data() + encoded_header.size(), context.stream_len);
160
161 return frame;
162}
163
164H264BitWriter::H264BitWriter() = default;
165
166H264BitWriter::~H264BitWriter() = default;
167
168void H264BitWriter::WriteU(s32 value, s32 value_sz) {
169 WriteBits(value, value_sz);
170}
171
172void H264BitWriter::WriteSe(s32 value) {
173 WriteExpGolombCodedInt(value);
174}
175
176void H264BitWriter::WriteUe(u32 value) {
177 WriteExpGolombCodedUInt(value);
178}
179
180void H264BitWriter::End() {
181 WriteBit(true);
182 Flush();
183}
184
185void H264BitWriter::WriteBit(bool state) {
186 WriteBits(state ? 1 : 0, 1);
187}
188
189void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
190 std::vector<u8> scan(count);
191 if (count == 16) {
192 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
193 } else {
194 std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
195 }
196 u8 last_scale = 8;
197
198 for (s32 index = 0; index < count; index++) {
199 const u8 value = list[start + scan[index]];
200 const s32 delta_scale = static_cast<s32>(value - last_scale);
201
202 WriteSe(delta_scale);
203
204 last_scale = value;
205 }
206}
207
208std::vector<u8>& H264BitWriter::GetByteArray() {
209 return byte_array;
210}
211
212const std::vector<u8>& H264BitWriter::GetByteArray() const {
213 return byte_array;
214}
215
216void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
217 s32 value_pos = 0;
218
219 s32 remaining = bit_count;
220
221 while (remaining > 0) {
222 s32 copy_size = remaining;
223
224 const s32 free_bits = GetFreeBufferBits();
225
226 if (copy_size > free_bits) {
227 copy_size = free_bits;
228 }
229
230 const s32 mask = (1 << copy_size) - 1;
231
232 const s32 src_shift = (bit_count - value_pos) - copy_size;
233 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
234
235 buffer |= ((value >> src_shift) & mask) << dst_shift;
236
237 value_pos += copy_size;
238 buffer_pos += copy_size;
239 remaining -= copy_size;
240 }
241}
242
243void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
244 const s32 sign = value <= 0 ? 0 : 1;
245 if (value < 0) {
246 value = -value;
247 }
248 value = (value << 1) - sign;
249 WriteExpGolombCodedUInt(value);
250}
251
252void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
253 const s32 size = 32 - std::countl_zero(value + 1);
254 WriteBits(1, size);
255
256 value -= (1U << (size - 1)) - 1;
257 WriteBits(static_cast<s32>(value), size - 1);
258}
259
260s32 H264BitWriter::GetFreeBufferBits() {
261 if (buffer_pos == buffer_size) {
262 Flush();
263 }
264
265 return buffer_size - buffer_pos;
266}
267
268void H264BitWriter::Flush() {
269 if (buffer_pos == 0) {
270 return;
271 }
272 byte_array.push_back(static_cast<u8>(buffer));
273
274 buffer = 0;
275 buffer_pos = 0;
276}
277} // namespace Tegra::Decoder
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
new file mode 100644
index 000000000..a98730474
--- /dev/null
+++ b/src/video_core/host1x/codecs/h264.h
@@ -0,0 +1,173 @@
1// SPDX-FileCopyrightText: Ryujinx Team and Contributors
2// SPDX-License-Identifier: MIT
3
4#pragma once
5
6#include <span>
7#include <vector>
8#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "video_core/host1x/nvdec_common.h"
12
13namespace Tegra {
14class GPU;
15namespace Decoder {
16
17class H264BitWriter {
18public:
19 H264BitWriter();
20 ~H264BitWriter();
21
22 /// The following Write methods are based on clause 9.1 in the H.264 specification.
23 /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
24 void WriteU(s32 value, s32 value_sz);
25 void WriteSe(s32 value);
26 void WriteUe(u32 value);
27
28 /// Finalize the bitstream
29 void End();
30
31 /// append a bit to the stream, equivalent value to the state parameter
32 void WriteBit(bool state);
33
34 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
35 /// Writes the scaling matrices of the sream
36 void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
37
38 /// Return the bitstream as a vector.
39 [[nodiscard]] std::vector<u8>& GetByteArray();
40 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
41
42private:
43 void WriteBits(s32 value, s32 bit_count);
44 void WriteExpGolombCodedInt(s32 value);
45 void WriteExpGolombCodedUInt(u32 value);
46 [[nodiscard]] s32 GetFreeBufferBits();
47 void Flush();
48
49 s32 buffer_size{8};
50
51 s32 buffer{};
52 s32 buffer_pos{};
53 std::vector<u8> byte_array;
54};
55
56class H264 {
57public:
58 explicit H264(GPU& gpu);
59 ~H264();
60
61 /// Compose the H264 frame for FFmpeg decoding
62 [[nodiscard]] const std::vector<u8>& ComposeFrame(
63 const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
64
65private:
66 std::vector<u8> frame;
67 GPU& gpu;
68
69 struct H264ParameterSet {
70 s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
71 s32 delta_pic_order_always_zero_flag; ///< 0x04
72 s32 frame_mbs_only_flag; ///< 0x08
73 u32 pic_width_in_mbs; ///< 0x0C
74 u32 frame_height_in_map_units; ///< 0x10
75 union { ///< 0x14
76 BitField<0, 2, u32> tile_format;
77 BitField<2, 3, u32> gob_height;
78 };
79 u32 entropy_coding_mode_flag; ///< 0x18
80 s32 pic_order_present_flag; ///< 0x1C
81 s32 num_refidx_l0_default_active; ///< 0x20
82 s32 num_refidx_l1_default_active; ///< 0x24
83 s32 deblocking_filter_control_present_flag; ///< 0x28
84 s32 redundant_pic_cnt_present_flag; ///< 0x2C
85 u32 transform_8x8_mode_flag; ///< 0x30
86 u32 pitch_luma; ///< 0x34
87 u32 pitch_chroma; ///< 0x38
88 u32 luma_top_offset; ///< 0x3C
89 u32 luma_bot_offset; ///< 0x40
90 u32 luma_frame_offset; ///< 0x44
91 u32 chroma_top_offset; ///< 0x48
92 u32 chroma_bot_offset; ///< 0x4C
93 u32 chroma_frame_offset; ///< 0x50
94 u32 hist_buffer_size; ///< 0x54
95 union { ///< 0x58
96 union {
97 BitField<0, 1, u64> mbaff_frame;
98 BitField<1, 1, u64> direct_8x8_inference;
99 BitField<2, 1, u64> weighted_pred;
100 BitField<3, 1, u64> constrained_intra_pred;
101 BitField<4, 1, u64> ref_pic;
102 BitField<5, 1, u64> field_pic;
103 BitField<6, 1, u64> bottom_field;
104 BitField<7, 1, u64> second_field;
105 } flags;
106 BitField<8, 4, u64> log2_max_frame_num_minus4;
107 BitField<12, 2, u64> chroma_format_idc;
108 BitField<14, 2, u64> pic_order_cnt_type;
109 BitField<16, 6, s64> pic_init_qp_minus26;
110 BitField<22, 5, s64> chroma_qp_index_offset;
111 BitField<27, 5, s64> second_chroma_qp_index_offset;
112 BitField<32, 2, u64> weighted_bipred_idc;
113 BitField<34, 7, u64> curr_pic_idx;
114 BitField<41, 5, u64> curr_col_idx;
115 BitField<46, 16, u64> frame_number;
116 BitField<62, 1, u64> frame_surfaces;
117 BitField<63, 1, u64> output_memory_layout;
118 };
119 };
120 static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
121
122 struct H264DecoderContext {
123 INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
124 u32 stream_len; ///< 0x0048
125 INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
126 H264ParameterSet h264_parameter_set; ///< 0x0058
127 INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
128 std::array<u8, 0x60> weight_scale; ///< 0x01C0
129 std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
130 };
131 static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
132
133#define ASSERT_POSITION(field_name, position) \
134 static_assert(offsetof(H264ParameterSet, field_name) == position, \
135 "Field " #field_name " has invalid position")
136
137 ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
138 ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
139 ASSERT_POSITION(frame_mbs_only_flag, 0x08);
140 ASSERT_POSITION(pic_width_in_mbs, 0x0C);
141 ASSERT_POSITION(frame_height_in_map_units, 0x10);
142 ASSERT_POSITION(tile_format, 0x14);
143 ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
144 ASSERT_POSITION(pic_order_present_flag, 0x1C);
145 ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
146 ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
147 ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
148 ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
149 ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
150 ASSERT_POSITION(pitch_luma, 0x34);
151 ASSERT_POSITION(pitch_chroma, 0x38);
152 ASSERT_POSITION(luma_top_offset, 0x3C);
153 ASSERT_POSITION(luma_bot_offset, 0x40);
154 ASSERT_POSITION(luma_frame_offset, 0x44);
155 ASSERT_POSITION(chroma_top_offset, 0x48);
156 ASSERT_POSITION(chroma_bot_offset, 0x4C);
157 ASSERT_POSITION(chroma_frame_offset, 0x50);
158 ASSERT_POSITION(hist_buffer_size, 0x54);
159 ASSERT_POSITION(flags, 0x58);
160#undef ASSERT_POSITION
161
162#define ASSERT_POSITION(field_name, position) \
163 static_assert(offsetof(H264DecoderContext, field_name) == position, \
164 "Field " #field_name " has invalid position")
165
166 ASSERT_POSITION(stream_len, 0x48);
167 ASSERT_POSITION(h264_parameter_set, 0x58);
168 ASSERT_POSITION(weight_scale, 0x1C0);
169#undef ASSERT_POSITION
170};
171
172} // namespace Decoder
173} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
new file mode 100644
index 000000000..aac026e17
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -0,0 +1,53 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <vector>
5
6#include "video_core/gpu.h"
7#include "video_core/host1x/codecs/vp8.h"
8#include "video_core/memory_manager.h"
9
10namespace Tegra::Decoder {
11VP8::VP8(GPU& gpu_) : gpu(gpu_) {}
12
13VP8::~VP8() = default;
14
15const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
16 VP8PictureInfo info;
17 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
18
19 const bool is_key_frame = info.key_frame == 1u;
20 const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
21 const size_t header_size = is_key_frame ? 10u : 3u;
22 frame.resize(header_size + bitstream_size);
23
24 // Based on page 30 of the VP8 specification.
25 // https://datatracker.ietf.org/doc/rfc6386/
26 frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
27 frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number
28 frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
29
30 // The next 19-bits are the first partition size
31 frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u);
32 frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u);
33 frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u);
34
35 if (is_key_frame) {
36 frame[3] = 0x9du;
37 frame[4] = 0x01u;
38 frame[5] = 0x2au;
39 // TODO(ameerj): Horizontal/Vertical Scale
40 // 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
41 frame[6] = static_cast<u8>(info.frame_width & 0xff);
42 frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f));
43 // 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
44 frame[8] = static_cast<u8>(info.frame_height & 0xff);
45 frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
46 }
47 const u64 bitstream_offset = state.frame_bitstream_offset;
48 gpu.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
49
50 return frame;
51}
52
53} // namespace Tegra::Decoder
diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
new file mode 100644
index 000000000..a1dfa5f03
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -0,0 +1,74 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <vector>
8
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "video_core/host1x/nvdec_common.h"
12
13namespace Tegra {
14class GPU;
15namespace Decoder {
16
17class VP8 {
18public:
19 explicit VP8(GPU& gpu);
20 ~VP8();
21
22 /// Compose the VP8 frame for FFmpeg decoding
23 [[nodiscard]] const std::vector<u8>& ComposeFrame(
24 const Host1x::NvdecCommon::NvdecRegisters& state);
25
26private:
27 std::vector<u8> frame;
28 GPU& gpu;
29
30 struct VP8PictureInfo {
31 INSERT_PADDING_WORDS_NOINIT(14);
32 u16 frame_width; // actual frame width
33 u16 frame_height; // actual frame height
34 u8 key_frame;
35 u8 version;
36 union {
37 u8 raw;
38 BitField<0, 2, u8> tile_format;
39 BitField<2, 3, u8> gob_height;
40 BitField<5, 3, u8> reserverd_surface_format;
41 };
42 u8 error_conceal_on; // 1: error conceal on; 0: off
43 u32 first_part_size; // the size of first partition(frame header and mb header partition)
44 u32 hist_buffer_size; // in units of 256
45 u32 vld_buffer_size; // in units of 1
46 // Current frame buffers
47 std::array<u32, 2> frame_stride; // [y_c]
48 u32 luma_top_offset; // offset of luma top field in units of 256
49 u32 luma_bot_offset; // offset of luma bottom field in units of 256
50 u32 luma_frame_offset; // offset of luma frame in units of 256
51 u32 chroma_top_offset; // offset of chroma top field in units of 256
52 u32 chroma_bot_offset; // offset of chroma bottom field in units of 256
53 u32 chroma_frame_offset; // offset of chroma frame in units of 256
54
55 INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams
56
57 // Decode picture buffer related
58 s8 current_output_memory_layout;
59 // output NV12/NV24 setting. index 0: golden; 1: altref; 2: last
60 std::array<s8, 3> output_memory_layout;
61
62 u8 segmentation_feature_data_update;
63 INSERT_PADDING_BYTES_NOINIT(3);
64
65 // ucode return result
66 u32 result_value;
67 std::array<u32, 8> partition_offset;
68 INSERT_PADDING_WORDS_NOINIT(3);
69 };
70 static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
71};
72
73} // namespace Decoder
74} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
new file mode 100644
index 000000000..bc50c6ba4
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -0,0 +1,946 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <algorithm> // for std::copy
5#include <numeric>
6#include "common/assert.h"
7#include "video_core/gpu.h"
8#include "video_core/host1x/codecs/vp9.h"
9#include "video_core/memory_manager.h"
10
11namespace Tegra::Decoder {
12namespace {
13constexpr u32 diff_update_probability = 252;
14constexpr u32 frame_sync_code = 0x498342;
15
16// Default compressed header probabilities once frame context resets
17constexpr Vp9EntropyProbs default_probs{
18 .y_mode_prob{
19 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
20 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
21 },
22 .partition_prob{
23 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
24 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
25 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
26 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
27 },
28 .coef_probs{
29 195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32,
31 40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47,
32 69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31,
33 102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29,
34 156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31,
35 191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33,
37 41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26,
38 59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34,
39 114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34,
40 149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42,
41 214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28,
43 89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33,
44 108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38,
45 124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37,
46 141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47,
47 229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57,
49 83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39,
50 99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47,
51 127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52,
52 157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61,
53 125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29,
55 31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45,
56 23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24,
57 29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23,
58 57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29,
59 202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57,
61 33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21,
62 30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20,
63 32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22,
64 57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32,
65 212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66 74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25,
67 71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42,
68 64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30,
69 65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30,
70 65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34,
71 225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72 85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84,
73 75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28,
74 73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29,
75 62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32,
76 61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41,
77 7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78 50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18,
79 43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27,
80 23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23,
81 18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23,
82 15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20,
83 19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52,
85 35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28,
86 28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20,
87 20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20,
88 13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24,
89 211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28,
91 71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29,
92 57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27,
93 47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26,
94 26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28,
95 233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97,
97 85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30,
98 65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29,
99 40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26,
100 16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31,
101 17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16,
103 50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20,
104 40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17,
105 30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18,
106 10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17,
107 36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61,
109 85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30,
110 72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27,
111 55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22,
112 15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23,
113 181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23,
115 62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19,
116 55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16,
117 49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17,
118 13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23,
119 197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120 126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137,
121 111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91,
122 104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102,
123 84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79,
124 43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6,
125 },
126 .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
127 .inter_mode_prob{
128 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
129 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
130 },
131 .intra_inter_prob{9, 102, 187, 225},
132 .comp_inter_prob{9, 102, 187, 225, 0},
133 .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
134 .comp_ref_prob{50, 126, 123, 221, 226},
135 .tx_32x32_prob{3, 136, 37, 5, 52, 13},
136 .tx_16x16_prob{20, 152, 15, 101},
137 .tx_8x8_prob{100, 66},
138 .skip_probs{192, 128, 64},
139 .joints{32, 64, 96},
140 .sign{128, 128},
141 .classes{
142 224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
143 216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
144 },
145 .class_0{216, 208},
146 .prob_bits{
147 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
148 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
149 },
150 .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
151 .fr{64, 96, 64, 64, 96, 64},
152 .class_0_hp{160, 160},
153 .high_precision{128, 128},
154};
155
156constexpr std::array<u8, 256> norm_lut{
157 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165};
166
167constexpr std::array<u8, 254> map_lut{
168 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
169 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
170 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
171 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89,
172 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
173 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124,
174 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142,
175 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159,
176 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177,
177 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194,
178 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212,
179 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
180 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247,
181 248, 249, 250, 251, 252, 253, 19,
182};
183
184// 6.2.14 Tile size calculation
185
186[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
187 const s32 sb64_cols = (frame_width + 63) / 64;
188 s32 min_log2 = 0;
189
190 while ((64 << min_log2) < sb64_cols) {
191 min_log2++;
192 }
193
194 return min_log2;
195}
196
197[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
198 const s32 sb64_cols = (frame_width + 63) / 64;
199 s32 max_log2 = 1;
200
201 while ((sb64_cols >> max_log2) >= 4) {
202 max_log2++;
203 }
204
205 return max_log2 - 1;
206}
207
208// Recenters probability. Based on section 6.3.6 of VP9 Specification
209[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
210 if (new_prob > old_prob * 2) {
211 return new_prob;
212 }
213
214 if (new_prob >= old_prob) {
215 return (new_prob - old_prob) * 2;
216 }
217
218 return (old_prob - new_prob) * 2 - 1;
219}
220
221// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
222[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
223 new_prob--;
224 old_prob--;
225
226 std::size_t index{};
227
228 if (old_prob * 2 <= 0xff) {
229 index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
230 } else {
231 index = static_cast<std::size_t>(
232 std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
233 }
234
235 return static_cast<s32>(map_lut[index]);
236}
237} // Anonymous namespace
238
239VP9::VP9(GPU& gpu_) : gpu{gpu_} {}
240
241VP9::~VP9() = default;
242
243void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
244 const bool update = new_prob != old_prob;
245
246 writer.Write(update, diff_update_probability);
247
248 if (update) {
249 WriteProbabilityDelta(writer, new_prob, old_prob);
250 }
251}
252template <typename T, std::size_t N>
253void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
254 const std::array<T, N>& old_prob) {
255 for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
256 WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
257 }
258}
259
260template <typename T, std::size_t N>
261void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
262 const std::array<T, N>& old_prob) {
263 for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
264 WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
265 WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
266 WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
267 }
268}
269
270void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
271 const int delta = RemapProbability(new_prob, old_prob);
272
273 EncodeTermSubExp(writer, delta);
274}
275
276void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
277 if (WriteLessThan(writer, value, 16)) {
278 writer.Write(value, 4);
279 } else if (WriteLessThan(writer, value, 32)) {
280 writer.Write(value - 16, 4);
281 } else if (WriteLessThan(writer, value, 64)) {
282 writer.Write(value - 32, 5);
283 } else {
284 value -= 64;
285
286 constexpr s32 size = 8;
287
288 const s32 mask = (1 << size) - 191;
289
290 const s32 delta = value - mask;
291
292 if (delta < 0) {
293 writer.Write(value, size - 1);
294 } else {
295 writer.Write(delta / 2 + mask, size - 1);
296 writer.Write(delta & 1, 1);
297 }
298 }
299}
300
301bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
302 const bool is_lt = value < test;
303 writer.Write(!is_lt);
304 return is_lt;
305}
306
307void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
308 const std::array<u8, 1728>& new_prob,
309 const std::array<u8, 1728>& old_prob) {
310 constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3;
311
312 const auto needs_update = [&](u32 base_index) {
313 return !std::equal(new_prob.begin() + base_index,
314 new_prob.begin() + base_index + block_bytes,
315 old_prob.begin() + base_index);
316 };
317
318 for (u32 block_index = 0; block_index < 4; block_index++) {
319 const u32 base_index = block_index * block_bytes;
320 const bool update = needs_update(base_index);
321 writer.Write(update);
322
323 if (update) {
324 u32 index = base_index;
325 for (s32 i = 0; i < 2; i++) {
326 for (s32 j = 0; j < 2; j++) {
327 for (s32 k = 0; k < 6; k++) {
328 for (s32 l = 0; l < 6; l++) {
329 if (k != 0 || l < 3) {
330 WriteProbabilityUpdate(writer, new_prob[index + 0],
331 old_prob[index + 0]);
332 WriteProbabilityUpdate(writer, new_prob[index + 1],
333 old_prob[index + 1]);
334 WriteProbabilityUpdate(writer, new_prob[index + 2],
335 old_prob[index + 2]);
336 }
337 index += 3;
338 }
339 }
340 }
341 }
342 }
343 if (block_index == static_cast<u32>(tx_mode)) {
344 break;
345 }
346 }
347}
348
349void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
350 const bool update = new_prob != old_prob;
351 writer.Write(update, diff_update_probability);
352
353 if (update) {
354 writer.Write(new_prob >> 1, 7);
355 }
356}
357
358Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
359 PictureInfo picture_info;
360 gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
361 Vp9PictureInfo vp9_info = picture_info.Convert();
362
363 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
364
365 // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
366 // order: last, golden, altref, current.
367 std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
368 vp9_info.frame_offsets.begin());
369
370 return vp9_info;
371}
372
373void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
374 EntropyProbs entropy;
375 gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
376 entropy.Convert(dst);
377}
378
379Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
380 Vp9FrameContainer current_frame{};
381 {
382 gpu.SyncGuestHost();
383 current_frame.info = GetVp9PictureInfo(state);
384 current_frame.bit_stream.resize(current_frame.info.bitstream_size);
385 gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, current_frame.bit_stream.data(),
386 current_frame.info.bitstream_size);
387 }
388 if (!next_frame.bit_stream.empty()) {
389 Vp9FrameContainer temp{
390 .info = current_frame.info,
391 .bit_stream = std::move(current_frame.bit_stream),
392 };
393 next_frame.info.show_frame = current_frame.info.last_frame_shown;
394 current_frame.info = next_frame.info;
395 current_frame.bit_stream = std::move(next_frame.bit_stream);
396 next_frame = std::move(temp);
397 } else {
398 next_frame.info = current_frame.info;
399 next_frame.bit_stream = current_frame.bit_stream;
400 }
401 return current_frame;
402}
403
404std::vector<u8> VP9::ComposeCompressedHeader() {
405 VpxRangeEncoder writer{};
406 const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame;
407 if (!current_frame_info.lossless) {
408 if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
409 writer.Write(3, 2);
410 writer.Write(current_frame_info.transform_mode == 4);
411 } else {
412 writer.Write(current_frame_info.transform_mode, 2);
413 }
414 }
415
416 if (current_frame_info.transform_mode == 4) {
417 // tx_mode_probs() in the spec
418 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
419 prev_frame_probs.tx_8x8_prob);
420 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
421 prev_frame_probs.tx_16x16_prob);
422 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
423 prev_frame_probs.tx_32x32_prob);
424 if (update_probs) {
425 prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
426 prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
427 prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
428 }
429 }
430 // read_coef_probs() in the spec
431 WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
432 current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
433 // read_skip_probs() in the spec
434 WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
435 prev_frame_probs.skip_probs);
436
437 if (update_probs) {
438 prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
439 prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
440 }
441
442 if (!current_frame_info.intra_only) {
443 // read_inter_probs() in the spec
444 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
445 prev_frame_probs.inter_mode_prob);
446
447 if (current_frame_info.interp_filter == 4) {
448 // read_interp_filter_probs() in the spec
449 WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
450 prev_frame_probs.switchable_interp_prob);
451 if (update_probs) {
452 prev_frame_probs.switchable_interp_prob =
453 current_frame_info.entropy.switchable_interp_prob;
454 }
455 }
456
457 // read_is_inter_probs() in the spec
458 WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
459 prev_frame_probs.intra_inter_prob);
460
461 // frame_reference_mode() in the spec
462 if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
463 (current_frame_info.ref_frame_sign_bias[2] & 1) ||
464 (current_frame_info.ref_frame_sign_bias[1] & 1) !=
465 (current_frame_info.ref_frame_sign_bias[3] & 1)) {
466 if (current_frame_info.reference_mode >= 1) {
467 writer.Write(1, 1);
468 writer.Write(current_frame_info.reference_mode == 2);
469 } else {
470 writer.Write(0, 1);
471 }
472 }
473
474 // frame_reference_mode_probs() in the spec
475 if (current_frame_info.reference_mode == 2) {
476 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
477 prev_frame_probs.comp_inter_prob);
478 if (update_probs) {
479 prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
480 }
481 }
482
483 if (current_frame_info.reference_mode != 1) {
484 WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
485 prev_frame_probs.single_ref_prob);
486 if (update_probs) {
487 prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
488 }
489 }
490
491 if (current_frame_info.reference_mode != 0) {
492 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
493 prev_frame_probs.comp_ref_prob);
494 if (update_probs) {
495 prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
496 }
497 }
498
499 // read_y_mode_probs
500 for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
501 ++index) {
502 WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
503 prev_frame_probs.y_mode_prob[index]);
504 }
505
506 // read_partition_probs
507 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
508 prev_frame_probs.partition_prob);
509
510 // mv_probs
511 for (s32 i = 0; i < 3; i++) {
512 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
513 prev_frame_probs.joints[i]);
514 }
515 if (update_probs) {
516 prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
517 prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
518 prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
519 prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
520 prev_frame_probs.joints = current_frame_info.entropy.joints;
521 }
522
523 for (s32 i = 0; i < 2; i++) {
524 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
525 prev_frame_probs.sign[i]);
526 for (s32 j = 0; j < 10; j++) {
527 const int index = i * 10 + j;
528 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
529 prev_frame_probs.classes[index]);
530 }
531 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
532 prev_frame_probs.class_0[i]);
533
534 for (s32 j = 0; j < 10; j++) {
535 const int index = i * 10 + j;
536 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
537 prev_frame_probs.prob_bits[index]);
538 }
539 }
540
541 for (s32 i = 0; i < 2; i++) {
542 for (s32 j = 0; j < 2; j++) {
543 for (s32 k = 0; k < 3; k++) {
544 const int index = i * 2 * 3 + j * 3 + k;
545 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
546 prev_frame_probs.class_0_fr[index]);
547 }
548 }
549
550 for (s32 j = 0; j < 3; j++) {
551 const int index = i * 3 + j;
552 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
553 prev_frame_probs.fr[index]);
554 }
555 }
556
557 if (current_frame_info.allow_high_precision_mv) {
558 for (s32 index = 0; index < 2; index++) {
559 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
560 prev_frame_probs.class_0_hp[index]);
561 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
562 prev_frame_probs.high_precision[index]);
563 }
564 }
565
566 // save previous probs
567 if (update_probs) {
568 prev_frame_probs.sign = current_frame_info.entropy.sign;
569 prev_frame_probs.classes = current_frame_info.entropy.classes;
570 prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
571 prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
572 prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
573 prev_frame_probs.fr = current_frame_info.entropy.fr;
574 prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
575 prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
576 }
577 }
578 writer.End();
579 return writer.GetBuffer();
580}
581
582VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
583 VpxBitStreamWriter uncomp_writer{};
584
585 uncomp_writer.WriteU(2, 2); // Frame marker.
586 uncomp_writer.WriteU(0, 2); // Profile.
587 uncomp_writer.WriteBit(false); // Show existing frame.
588 uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
589 uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
590 uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
591
592 if (current_frame_info.is_key_frame) {
593 uncomp_writer.WriteU(frame_sync_code, 24);
594 uncomp_writer.WriteU(0, 3); // Color space.
595 uncomp_writer.WriteU(0, 1); // Color range.
596 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
597 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
598 uncomp_writer.WriteBit(false); // Render and frame size different.
599
600 // Reset context
601 prev_frame_probs = default_probs;
602 swap_ref_indices = false;
603 loop_filter_ref_deltas.fill(0);
604 loop_filter_mode_deltas.fill(0);
605 frame_ctxs.fill(default_probs);
606
607 // intra only, meaning the frame can be recreated with no other references
608 current_frame_info.intra_only = true;
609 } else {
610 if (!current_frame_info.show_frame) {
611 uncomp_writer.WriteBit(current_frame_info.intra_only);
612 } else {
613 current_frame_info.intra_only = false;
614 }
615 if (!current_frame_info.error_resilient_mode) {
616 uncomp_writer.WriteU(0, 2); // Reset frame context.
617 }
618 const auto& curr_offsets = current_frame_info.frame_offsets;
619 const auto& next_offsets = next_frame.info.frame_offsets;
620 const bool ref_frames_different = curr_offsets[1] != curr_offsets[2];
621 const bool next_references_swap =
622 (next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]);
623 const bool needs_ref_swap = ref_frames_different && next_references_swap;
624 if (needs_ref_swap) {
625 swap_ref_indices = !swap_ref_indices;
626 }
627 union {
628 u32 raw;
629 BitField<0, 1, u32> refresh_last;
630 BitField<1, 2, u32> refresh_golden;
631 BitField<2, 1, u32> refresh_alt;
632 } refresh_frame_flags;
633
634 refresh_frame_flags.raw = 0;
635 for (u32 index = 0; index < 3; ++index) {
636 // Refresh indices that use the current frame as an index
637 if (curr_offsets[3] == next_offsets[index]) {
638 refresh_frame_flags.raw |= 1u << index;
639 }
640 }
641 if (swap_ref_indices) {
642 const u32 temp = refresh_frame_flags.refresh_golden;
643 refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value());
644 refresh_frame_flags.refresh_alt.Assign(temp);
645 }
646 if (current_frame_info.intra_only) {
647 uncomp_writer.WriteU(frame_sync_code, 24);
648 uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
649 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
650 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
651 uncomp_writer.WriteBit(false); // Render and frame size different.
652 } else {
653 const bool swap_indices = needs_ref_swap ^ swap_ref_indices;
654 const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2};
655 uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
656 for (size_t index = 1; index < 4; index++) {
657 uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
658 uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
659 }
660 uncomp_writer.WriteBit(true); // Frame size with refs.
661 uncomp_writer.WriteBit(false); // Render and frame size different.
662 uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
663 uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
664
665 if (current_frame_info.interp_filter != 4) {
666 uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
667 }
668 }
669 }
670
671 if (!current_frame_info.error_resilient_mode) {
672 uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
673 uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
674 }
675
676 int frame_ctx_idx = 0;
677 if (!current_frame_info.show_frame) {
678 frame_ctx_idx = 1;
679 }
680
681 uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
682 prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
683 frame_ctxs[frame_ctx_idx] = current_frame_info.entropy;
684
685 uncomp_writer.WriteU(current_frame_info.first_level, 6);
686 uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
687 uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
688
689 if (current_frame_info.mode_ref_delta_enabled) {
690 // check if ref deltas are different, update accordingly
691 std::array<bool, 4> update_loop_filter_ref_deltas;
692 std::array<bool, 2> update_loop_filter_mode_deltas;
693
694 bool loop_filter_delta_update = false;
695
696 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
697 const s8 old_deltas = loop_filter_ref_deltas[index];
698 const s8 new_deltas = current_frame_info.ref_deltas[index];
699 const bool differing_delta = old_deltas != new_deltas;
700
701 update_loop_filter_ref_deltas[index] = differing_delta;
702 loop_filter_delta_update |= differing_delta;
703 }
704
705 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
706 const s8 old_deltas = loop_filter_mode_deltas[index];
707 const s8 new_deltas = current_frame_info.mode_deltas[index];
708 const bool differing_delta = old_deltas != new_deltas;
709
710 update_loop_filter_mode_deltas[index] = differing_delta;
711 loop_filter_delta_update |= differing_delta;
712 }
713
714 uncomp_writer.WriteBit(loop_filter_delta_update);
715
716 if (loop_filter_delta_update) {
717 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
718 uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
719
720 if (update_loop_filter_ref_deltas[index]) {
721 uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
722 }
723 }
724
725 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
726 uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
727
728 if (update_loop_filter_mode_deltas[index]) {
729 uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
730 }
731 }
732 // save new deltas
733 loop_filter_ref_deltas = current_frame_info.ref_deltas;
734 loop_filter_mode_deltas = current_frame_info.mode_deltas;
735 }
736 }
737
738 uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
739
740 uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
741 uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
742 uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
743
744 ASSERT(!current_frame_info.segment_enabled);
745 uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
746
747 const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
748 const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
749
750 const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
751 const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
752
753 // If it's less than the maximum, we need to add an extra 0 on the bitstream
754 // to indicate that it should stop reading.
755 if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
756 uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
757 } else {
758 uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
759 }
760
761 const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
762
763 uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
764
765 if (tile_rows_log2_is_nonzero) {
766 uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
767 }
768
769 return uncomp_writer;
770}
771
772void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
773 std::vector<u8> bitstream;
774 {
775 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
776 current_frame_info = curr_frame.info;
777 bitstream = std::move(curr_frame.bit_stream);
778 }
779 // The uncompressed header routine sets PrevProb parameters needed for the compressed header
780 auto uncomp_writer = ComposeUncompressedHeader();
781 std::vector<u8> compressed_header = ComposeCompressedHeader();
782
783 uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
784 uncomp_writer.Flush();
785 std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
786
787 // Write headers and frame to buffer
788 frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
789 std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin());
790 std::copy(compressed_header.begin(), compressed_header.end(),
791 frame.begin() + uncompressed_header.size());
792 std::copy(bitstream.begin(), bitstream.end(),
793 frame.begin() + uncompressed_header.size() + compressed_header.size());
794}
795
796VpxRangeEncoder::VpxRangeEncoder() {
797 Write(false);
798}
799
800VpxRangeEncoder::~VpxRangeEncoder() = default;
801
802void VpxRangeEncoder::Write(s32 value, s32 value_size) {
803 for (s32 bit = value_size - 1; bit >= 0; bit--) {
804 Write(((value >> bit) & 1) != 0);
805 }
806}
807
808void VpxRangeEncoder::Write(bool bit) {
809 Write(bit, half_probability);
810}
811
812void VpxRangeEncoder::Write(bool bit, s32 probability) {
813 u32 local_range = range;
814 const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
815 local_range = split;
816
817 if (bit) {
818 low_value += split;
819 local_range = range - split;
820 }
821
822 s32 shift = static_cast<s32>(norm_lut[local_range]);
823 local_range <<= shift;
824 count += shift;
825
826 if (count >= 0) {
827 const s32 offset = shift - count;
828
829 if (((low_value << (offset - 1)) >> 31) != 0) {
830 const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
831 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
832 while (PeekByte() == 0xff) {
833 base_stream.WriteByte(0);
834
835 base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
836 }
837 base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
838 base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
839 }
840 base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
841
842 low_value <<= offset;
843 shift = count;
844 low_value &= 0xffffff;
845 count -= 8;
846 }
847
848 low_value <<= shift;
849 range = local_range;
850}
851
852void VpxRangeEncoder::End() {
853 for (std::size_t index = 0; index < 32; ++index) {
854 Write(false);
855 }
856}
857
858u8 VpxRangeEncoder::PeekByte() {
859 const u8 value = base_stream.ReadByte();
860 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
861
862 return value;
863}
864
865VpxBitStreamWriter::VpxBitStreamWriter() = default;
866
867VpxBitStreamWriter::~VpxBitStreamWriter() = default;
868
869void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
870 WriteBits(value, value_size);
871}
872
873void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
874 const bool sign = value < 0;
875 if (sign) {
876 value = -value;
877 }
878
879 WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
880}
881
882void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
883 const bool delta_coded = value != 0;
884 WriteBit(delta_coded);
885
886 if (delta_coded) {
887 WriteBits(value, 4);
888 }
889}
890
891void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
892 s32 value_pos = 0;
893 s32 remaining = bit_count;
894
895 while (remaining > 0) {
896 s32 copy_size = remaining;
897
898 const s32 free = GetFreeBufferBits();
899
900 if (copy_size > free) {
901 copy_size = free;
902 }
903
904 const s32 mask = (1 << copy_size) - 1;
905
906 const s32 src_shift = (bit_count - value_pos) - copy_size;
907 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
908
909 buffer |= ((value >> src_shift) & mask) << dst_shift;
910
911 value_pos += copy_size;
912 buffer_pos += copy_size;
913 remaining -= copy_size;
914 }
915}
916
917void VpxBitStreamWriter::WriteBit(bool state) {
918 WriteBits(state ? 1 : 0, 1);
919}
920
921s32 VpxBitStreamWriter::GetFreeBufferBits() {
922 if (buffer_pos == buffer_size) {
923 Flush();
924 }
925
926 return buffer_size - buffer_pos;
927}
928
929void VpxBitStreamWriter::Flush() {
930 if (buffer_pos == 0) {
931 return;
932 }
933 byte_array.push_back(static_cast<u8>(buffer));
934 buffer = 0;
935 buffer_pos = 0;
936}
937
938std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
939 return byte_array;
940}
941
942const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
943 return byte_array;
944}
945
946} // namespace Tegra::Decoder
diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
new file mode 100644
index 000000000..a425c0fa4
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -0,0 +1,194 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <vector>
8
9#include "common/common_types.h"
10#include "common/stream.h"
11#include "video_core/host1x/codecs/vp9_types.h"
12#include "video_core/host1x/nvdec_common.h"
13
14namespace Tegra {
15class GPU;
16namespace Decoder {
17
18/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
19/// VP9 header bitstreams.
20
21class VpxRangeEncoder {
22public:
23 VpxRangeEncoder();
24 ~VpxRangeEncoder();
25
26 VpxRangeEncoder(const VpxRangeEncoder&) = delete;
27 VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
28
29 VpxRangeEncoder(VpxRangeEncoder&&) = default;
30 VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
31
32 /// Writes the rightmost value_size bits from value into the stream
33 void Write(s32 value, s32 value_size);
34
35 /// Writes a single bit with half probability
36 void Write(bool bit);
37
38 /// Writes a bit to the base_stream encoded with probability
39 void Write(bool bit, s32 probability);
40
41 /// Signal the end of the bitstream
42 void End();
43
44 [[nodiscard]] std::vector<u8>& GetBuffer() {
45 return base_stream.GetBuffer();
46 }
47
48 [[nodiscard]] const std::vector<u8>& GetBuffer() const {
49 return base_stream.GetBuffer();
50 }
51
52private:
53 u8 PeekByte();
54 Common::Stream base_stream{};
55 u32 low_value{};
56 u32 range{0xff};
57 s32 count{-24};
58 s32 half_probability{128};
59};
60
61class VpxBitStreamWriter {
62public:
63 VpxBitStreamWriter();
64 ~VpxBitStreamWriter();
65
66 VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
67 VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
68
69 VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
70 VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
71
72 /// Write an unsigned integer value
73 void WriteU(u32 value, u32 value_size);
74
75 /// Write a signed integer value
76 void WriteS(s32 value, u32 value_size);
77
78 /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
79 void WriteDeltaQ(u32 value);
80
81 /// Write a single bit.
82 void WriteBit(bool state);
83
84 /// Pushes current buffer into buffer_array, resets buffer
85 void Flush();
86
87 /// Returns byte_array
88 [[nodiscard]] std::vector<u8>& GetByteArray();
89
90 /// Returns const byte_array
91 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
92
93private:
94 /// Write bit_count bits from value into buffer
95 void WriteBits(u32 value, u32 bit_count);
96
97 /// Gets next available position in buffer, invokes Flush() if buffer is full
98 s32 GetFreeBufferBits();
99
100 s32 buffer_size{8};
101
102 s32 buffer{};
103 s32 buffer_pos{};
104 std::vector<u8> byte_array;
105};
106
107class VP9 {
108public:
109 explicit VP9(GPU& gpu_);
110 ~VP9();
111
112 VP9(const VP9&) = delete;
113 VP9& operator=(const VP9&) = delete;
114
115 VP9(VP9&&) = default;
116 VP9& operator=(VP9&&) = delete;
117
118 /// Composes the VP9 frame from the GPU state information.
119 /// Based on the official VP9 spec documentation
120 void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
121
122 /// Returns true if the most recent frame was a hidden frame.
123 [[nodiscard]] bool WasFrameHidden() const {
124 return !current_frame_info.show_frame;
125 }
126
127 /// Returns a const reference to the composed frame data.
128 [[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
129 return frame;
130 }
131
132private:
133 /// Generates compressed header probability updates in the bitstream writer
134 template <typename T, std::size_t N>
135 void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
136 const std::array<T, N>& old_prob);
137
138 /// Generates compressed header probability updates in the bitstream writer
139 /// If probs are not equal, WriteProbabilityDelta is invoked
140 void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
141
142 /// Generates compressed header probability deltas in the bitstream writer
143 void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
144
145 /// Inverse of 6.3.4 Decode term subexp
146 void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
147
148 /// Writes if the value is less than the test value
149 bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
150
151 /// Writes probability updates for the Coef probabilities
152 void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
153 const std::array<u8, 1728>& new_prob,
154 const std::array<u8, 1728>& old_prob);
155
156 /// Write probabilities for 4-byte aligned structures
157 template <typename T, std::size_t N>
158 void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
159 const std::array<T, N>& old_prob);
160
161 /// Write motion vector probability updates. 6.3.17 in the spec
162 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
163
164 /// Returns VP9 information from NVDEC provided offset and size
165 [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
166 const Host1x::NvdecCommon::NvdecRegisters& state);
167
168 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
169 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
170
171 /// Returns frame to be decoded after buffering
172 [[nodiscard]] Vp9FrameContainer GetCurrentFrame(
173 const Host1x::NvdecCommon::NvdecRegisters& state);
174
175 /// Use NVDEC providied information to compose the headers for the current frame
176 [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
177 [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
178
179 GPU& gpu;
180 std::vector<u8> frame;
181
182 std::array<s8, 4> loop_filter_ref_deltas{};
183 std::array<s8, 2> loop_filter_mode_deltas{};
184
185 Vp9FrameContainer next_frame{};
186 std::array<Vp9EntropyProbs, 4> frame_ctxs{};
187 bool swap_ref_indices{};
188
189 Vp9PictureInfo current_frame_info{};
190 Vp9EntropyProbs prev_frame_probs{};
191};
192
193} // namespace Decoder
194} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
new file mode 100644
index 000000000..bb3d8df6e
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp9_types.h
@@ -0,0 +1,306 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra {
12class GPU;
13
14namespace Decoder {
15struct Vp9FrameDimensions {
16 s16 width;
17 s16 height;
18 s16 luma_pitch;
19 s16 chroma_pitch;
20};
21static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
22
23enum class FrameFlags : u32 {
24 IsKeyFrame = 1 << 0,
25 LastFrameIsKeyFrame = 1 << 1,
26 FrameSizeChanged = 1 << 2,
27 ErrorResilientMode = 1 << 3,
28 LastShowFrame = 1 << 4,
29 IntraOnly = 1 << 5,
30};
31DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)
32
33enum class TxSize {
34 Tx4x4 = 0, // 4x4 transform
35 Tx8x8 = 1, // 8x8 transform
36 Tx16x16 = 2, // 16x16 transform
37 Tx32x32 = 3, // 32x32 transform
38 TxSizes = 4
39};
40
41enum class TxMode {
42 Only4X4 = 0, // Only 4x4 transform used
43 Allow8X8 = 1, // Allow block transform size up to 8x8
44 Allow16X16 = 2, // Allow block transform size up to 16x16
45 Allow32X32 = 3, // Allow block transform size up to 32x32
46 TxModeSelect = 4, // Transform specified for each block
47 TxModes = 5
48};
49
50struct Segmentation {
51 u8 enabled;
52 u8 update_map;
53 u8 temporal_update;
54 u8 abs_delta;
55 std::array<u32, 8> feature_mask;
56 std::array<std::array<s16, 4>, 8> feature_data;
57};
58static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
59
60struct LoopFilter {
61 u8 mode_ref_delta_enabled;
62 std::array<s8, 4> ref_deltas;
63 std::array<s8, 2> mode_deltas;
64};
65static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
66
67struct Vp9EntropyProbs {
68 std::array<u8, 36> y_mode_prob; ///< 0x0000
69 std::array<u8, 64> partition_prob; ///< 0x0024
70 std::array<u8, 1728> coef_probs; ///< 0x0064
71 std::array<u8, 8> switchable_interp_prob; ///< 0x0724
72 std::array<u8, 28> inter_mode_prob; ///< 0x072C
73 std::array<u8, 4> intra_inter_prob; ///< 0x0748
74 std::array<u8, 5> comp_inter_prob; ///< 0x074C
75 std::array<u8, 10> single_ref_prob; ///< 0x0751
76 std::array<u8, 5> comp_ref_prob; ///< 0x075B
77 std::array<u8, 6> tx_32x32_prob; ///< 0x0760
78 std::array<u8, 4> tx_16x16_prob; ///< 0x0766
79 std::array<u8, 2> tx_8x8_prob; ///< 0x076A
80 std::array<u8, 3> skip_probs; ///< 0x076C
81 std::array<u8, 3> joints; ///< 0x076F
82 std::array<u8, 2> sign; ///< 0x0772
83 std::array<u8, 20> classes; ///< 0x0774
84 std::array<u8, 2> class_0; ///< 0x0788
85 std::array<u8, 20> prob_bits; ///< 0x078A
86 std::array<u8, 12> class_0_fr; ///< 0x079E
87 std::array<u8, 6> fr; ///< 0x07AA
88 std::array<u8, 2> class_0_hp; ///< 0x07B0
89 std::array<u8, 2> high_precision; ///< 0x07B2
90};
91static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
92
93struct Vp9PictureInfo {
94 u32 bitstream_size;
95 std::array<u64, 4> frame_offsets;
96 std::array<s8, 4> ref_frame_sign_bias;
97 s32 base_q_index;
98 s32 y_dc_delta_q;
99 s32 uv_dc_delta_q;
100 s32 uv_ac_delta_q;
101 s32 transform_mode;
102 s32 interp_filter;
103 s32 reference_mode;
104 s32 log2_tile_cols;
105 s32 log2_tile_rows;
106 std::array<s8, 4> ref_deltas;
107 std::array<s8, 2> mode_deltas;
108 Vp9EntropyProbs entropy;
109 Vp9FrameDimensions frame_size;
110 u8 first_level;
111 u8 sharpness_level;
112 bool is_key_frame;
113 bool intra_only;
114 bool last_frame_was_key;
115 bool error_resilient_mode;
116 bool last_frame_shown;
117 bool show_frame;
118 bool lossless;
119 bool allow_high_precision_mv;
120 bool segment_enabled;
121 bool mode_ref_delta_enabled;
122};
123
124struct Vp9FrameContainer {
125 Vp9PictureInfo info{};
126 std::vector<u8> bit_stream;
127};
128
129struct PictureInfo {
130 INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
131 u32 bitstream_size; ///< 0x30
132 INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
133 Vp9FrameDimensions last_frame_size; ///< 0x48
134 Vp9FrameDimensions golden_frame_size; ///< 0x50
135 Vp9FrameDimensions alt_frame_size; ///< 0x58
136 Vp9FrameDimensions current_frame_size; ///< 0x60
137 FrameFlags vp9_flags; ///< 0x68
138 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
139 u8 first_level; ///< 0x70
140 u8 sharpness_level; ///< 0x71
141 u8 base_q_index; ///< 0x72
142 u8 y_dc_delta_q; ///< 0x73
143 u8 uv_ac_delta_q; ///< 0x74
144 u8 uv_dc_delta_q; ///< 0x75
145 u8 lossless; ///< 0x76
146 u8 tx_mode; ///< 0x77
147 u8 allow_high_precision_mv; ///< 0x78
148 u8 interp_filter; ///< 0x79
149 u8 reference_mode; ///< 0x7A
150 INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B
151 u8 log2_tile_cols; ///< 0x7E
152 u8 log2_tile_rows; ///< 0x7F
153 Segmentation segmentation; ///< 0x80
154 LoopFilter loop_filter; ///< 0xE4
155 INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB
156
157 [[nodiscard]] Vp9PictureInfo Convert() const {
158 return {
159 .bitstream_size = bitstream_size,
160 .frame_offsets{},
161 .ref_frame_sign_bias = ref_frame_sign_bias,
162 .base_q_index = base_q_index,
163 .y_dc_delta_q = y_dc_delta_q,
164 .uv_dc_delta_q = uv_dc_delta_q,
165 .uv_ac_delta_q = uv_ac_delta_q,
166 .transform_mode = tx_mode,
167 .interp_filter = interp_filter,
168 .reference_mode = reference_mode,
169 .log2_tile_cols = log2_tile_cols,
170 .log2_tile_rows = log2_tile_rows,
171 .ref_deltas = loop_filter.ref_deltas,
172 .mode_deltas = loop_filter.mode_deltas,
173 .entropy{},
174 .frame_size = current_frame_size,
175 .first_level = first_level,
176 .sharpness_level = sharpness_level,
177 .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
178 .intra_only = True(vp9_flags & FrameFlags::IntraOnly),
179 .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
180 .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
181 .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
182 .show_frame = true,
183 .lossless = lossless != 0,
184 .allow_high_precision_mv = allow_high_precision_mv != 0,
185 .segment_enabled = segmentation.enabled != 0,
186 .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
187 };
188 }
189};
190static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
191
192struct EntropyProbs {
193 INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
194 std::array<u8, 28> inter_mode_prob; ///< 0x0400
195 std::array<u8, 4> intra_inter_prob; ///< 0x041C
196 INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
197 std::array<u8, 2> tx_8x8_prob; ///< 0x0470
198 std::array<u8, 4> tx_16x16_prob; ///< 0x0472
199 std::array<u8, 6> tx_32x32_prob; ///< 0x0476
200 std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
201 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
202 INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
203 std::array<u8, 64> partition_prob; ///< 0x04E0
204 INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
205 std::array<u8, 8> switchable_interp_prob; ///< 0x052A
206 std::array<u8, 5> comp_inter_prob; ///< 0x0532
207 std::array<u8, 3> skip_probs; ///< 0x0537
208 INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
209 std::array<u8, 3> joints; ///< 0x053B
210 std::array<u8, 2> sign; ///< 0x053E
211 std::array<u8, 2> class_0; ///< 0x0540
212 std::array<u8, 6> fr; ///< 0x0542
213 std::array<u8, 2> class_0_hp; ///< 0x0548
214 std::array<u8, 2> high_precision; ///< 0x054A
215 std::array<u8, 20> classes; ///< 0x054C
216 std::array<u8, 12> class_0_fr; ///< 0x0560
217 std::array<u8, 20> pred_bits; ///< 0x056C
218 std::array<u8, 10> single_ref_prob; ///< 0x0580
219 std::array<u8, 5> comp_ref_prob; ///< 0x058A
220 INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
221 std::array<u8, 2304> coef_probs; ///< 0x05A0
222
223 void Convert(Vp9EntropyProbs& fc) {
224 fc.inter_mode_prob = inter_mode_prob;
225 fc.intra_inter_prob = intra_inter_prob;
226 fc.tx_8x8_prob = tx_8x8_prob;
227 fc.tx_16x16_prob = tx_16x16_prob;
228 fc.tx_32x32_prob = tx_32x32_prob;
229
230 for (std::size_t i = 0; i < 4; i++) {
231 for (std::size_t j = 0; j < 9; j++) {
232 fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
233 }
234 }
235
236 fc.partition_prob = partition_prob;
237 fc.switchable_interp_prob = switchable_interp_prob;
238 fc.comp_inter_prob = comp_inter_prob;
239 fc.skip_probs = skip_probs;
240 fc.joints = joints;
241 fc.sign = sign;
242 fc.class_0 = class_0;
243 fc.fr = fr;
244 fc.class_0_hp = class_0_hp;
245 fc.high_precision = high_precision;
246 fc.classes = classes;
247 fc.class_0_fr = class_0_fr;
248 fc.prob_bits = pred_bits;
249 fc.single_ref_prob = single_ref_prob;
250 fc.comp_ref_prob = comp_ref_prob;
251
252 // Skip the 4th element as it goes unused
253 for (std::size_t i = 0; i < coef_probs.size(); i += 4) {
254 const std::size_t j = i - i / 4;
255 fc.coef_probs[j] = coef_probs[i];
256 fc.coef_probs[j + 1] = coef_probs[i + 1];
257 fc.coef_probs[j + 2] = coef_probs[i + 2];
258 }
259 }
260};
261static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
262
263enum class Ref { Last, Golden, AltRef };
264
265struct RefPoolElement {
266 s64 frame{};
267 Ref ref{};
268 bool refresh{};
269};
270
271#define ASSERT_POSITION(field_name, position) \
272 static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
273 "Field " #field_name " has invalid position")
274
275ASSERT_POSITION(partition_prob, 0x0024);
276ASSERT_POSITION(switchable_interp_prob, 0x0724);
277ASSERT_POSITION(sign, 0x0772);
278ASSERT_POSITION(class_0_fr, 0x079E);
279ASSERT_POSITION(high_precision, 0x07B2);
280#undef ASSERT_POSITION
281
282#define ASSERT_POSITION(field_name, position) \
283 static_assert(offsetof(PictureInfo, field_name) == position, \
284 "Field " #field_name " has invalid position")
285
286ASSERT_POSITION(bitstream_size, 0x30);
287ASSERT_POSITION(last_frame_size, 0x48);
288ASSERT_POSITION(first_level, 0x70);
289ASSERT_POSITION(segmentation, 0x80);
290ASSERT_POSITION(loop_filter, 0xE4);
291#undef ASSERT_POSITION
292
293#define ASSERT_POSITION(field_name, position) \
294 static_assert(offsetof(EntropyProbs, field_name) == position, \
295 "Field " #field_name " has invalid position")
296
297ASSERT_POSITION(inter_mode_prob, 0x400);
298ASSERT_POSITION(tx_8x8_prob, 0x470);
299ASSERT_POSITION(partition_prob, 0x4E0);
300ASSERT_POSITION(class_0, 0x540);
301ASSERT_POSITION(class_0_fr, 0x560);
302ASSERT_POSITION(coef_probs, 0x5A0);
303#undef ASSERT_POSITION
304
305}; // namespace Decoder
306}; // namespace Tegra
diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp
new file mode 100644
index 000000000..b72b01aa3
--- /dev/null
+++ b/src/video_core/host1x/control.cpp
@@ -0,0 +1,35 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "video_core/gpu.h"
7#include "video_core/host1x/control.h"
8#include "video_core/host1x/host1x.h"
9
10namespace Tegra::Host1x {
11
12Control::Control(GPU& gpu_) : gpu(gpu_) {}
13
14Control::~Control() = default;
15
16void Control::ProcessMethod(Method method, u32 argument) {
17 switch (method) {
18 case Method::LoadSyncptPayload32:
19 syncpoint_value = argument;
20 break;
21 case Method::WaitSyncpt:
22 case Method::WaitSyncpt32:
23 Execute(argument);
24 break;
25 default:
26 UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method));
27 break;
28 }
29}
30
31void Control::Execute(u32 data) {
32 gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value);
33}
34
35} // namespace Tegra::Host1x
diff --git a/src/video_core/host1x/control.h b/src/video_core/host1x/control.h
new file mode 100644
index 000000000..04dac7d51
--- /dev/null
+++ b/src/video_core/host1x/control.h
@@ -0,0 +1,41 @@
1// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors
2// (https://github.com/skyline-emu/)
3// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3
4// or any later version Refer to the license.txt file included.
5
6#pragma once
7
8#include "common/common_types.h"
9
10namespace Tegra {
11class GPU;
12
13namespace Host1x {
14
15class Nvdec;
16
17class Control {
18public:
19 enum class Method : u32 {
20 WaitSyncpt = 0x8,
21 LoadSyncptPayload32 = 0x4e,
22 WaitSyncpt32 = 0x50,
23 };
24
25 explicit Control(GPU& gpu);
26 ~Control();
27
28 /// Writes the method into the state, Invoke Execute() if encountered
29 void ProcessMethod(Method method, u32 argument);
30
31private:
32 /// For Host1x, execute is waiting on a syncpoint previously written into the state
33 void Execute(u32 data);
34
35 u32 syncpoint_value{};
36 GPU& gpu;
37};
38
39} // namespace Host1x
40
41} // namespace Tegra
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
new file mode 100644
index 000000000..2971be286
--- /dev/null
+++ b/src/video_core/host1x/host1x.h
@@ -0,0 +1,33 @@
1// Copyright 2022 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9#include "video_core/host1x/syncpoint_manager.h"
10
11namespace Tegra {
12
13namespace Host1x {
14
15class Host1x {
16public:
17 Host1x() : syncpoint_manager{} {}
18
19 SyncpointManager& GetSyncpointManager() {
20 return syncpoint_manager;
21 }
22
23 const SyncpointManager& GetSyncpointManager() const {
24 return syncpoint_manager;
25 }
26
27private:
28 SyncpointManager syncpoint_manager;
29};
30
31} // namespace Host1x
32
33} // namespace Tegra
diff --git a/src/video_core/host1x/nvdec.cpp b/src/video_core/host1x/nvdec.cpp
new file mode 100644
index 000000000..5f6decd0d
--- /dev/null
+++ b/src/video_core/host1x/nvdec.cpp
@@ -0,0 +1,47 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/assert.h"
5#include "video_core/gpu.h"
6#include "video_core/host1x/nvdec.h"
7
8namespace Tegra::Host1x {
9
10#define NVDEC_REG_INDEX(field_name) \
11 (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64))
12
13Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {}
14
15Nvdec::~Nvdec() = default;
16
17void Nvdec::ProcessMethod(u32 method, u32 argument) {
18 state.reg_array[method] = static_cast<u64>(argument) << 8;
19
20 switch (method) {
21 case NVDEC_REG_INDEX(set_codec_id):
22 codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument));
23 break;
24 case NVDEC_REG_INDEX(execute):
25 Execute();
26 break;
27 }
28}
29
30AVFramePtr Nvdec::GetFrame() {
31 return codec->GetCurrentFrame();
32}
33
34void Nvdec::Execute() {
35 switch (codec->GetCurrentCodec()) {
36 case NvdecCommon::VideoCodec::H264:
37 case NvdecCommon::VideoCodec::VP8:
38 case NvdecCommon::VideoCodec::VP9:
39 codec->Decode();
40 break;
41 default:
42 UNIMPLEMENTED_MSG("Codec {}", codec->GetCurrentCodecName());
43 break;
44 }
45}
46
47} // namespace Tegra::Host1x
diff --git a/src/video_core/host1x/nvdec.h b/src/video_core/host1x/nvdec.h
new file mode 100644
index 000000000..41ba1f7a0
--- /dev/null
+++ b/src/video_core/host1x/nvdec.h
@@ -0,0 +1,38 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <memory>
7#include <vector>
8#include "common/common_types.h"
9#include "video_core/host1x/codecs/codec.h"
10
11namespace Tegra {
12class GPU;
13
14namespace Host1x {
15
16class Nvdec {
17public:
18 explicit Nvdec(GPU& gpu);
19 ~Nvdec();
20
21 /// Writes the method into the state, Invoke Execute() if encountered
22 void ProcessMethod(u32 method, u32 argument);
23
24 /// Return most recently decoded frame
25 [[nodiscard]] AVFramePtr GetFrame();
26
27private:
28 /// Invoke codec to decode a frame
29 void Execute();
30
31 GPU& gpu;
32 NvdecCommon::NvdecRegisters state;
33 std::unique_ptr<Codec> codec;
34};
35
36} // namespace Host1x
37
38} // namespace Tegra
diff --git a/src/video_core/host1x/nvdec_common.h b/src/video_core/host1x/nvdec_common.h
new file mode 100644
index 000000000..49d67ebbe
--- /dev/null
+++ b/src/video_core/host1x/nvdec_common.h
@@ -0,0 +1,97 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/bit_field.h"
7#include "common/common_funcs.h"
8#include "common/common_types.h"
9
10namespace Tegra::Host1x::NvdecCommon {
11
12enum class VideoCodec : u64 {
13 None = 0x0,
14 H264 = 0x3,
15 VP8 = 0x5,
16 H265 = 0x7,
17 VP9 = 0x9,
18};
19
20// NVDEC should use a 32-bit address space, but is mapped to 64-bit,
21// doubling the sizes here is compensating for that.
22struct NvdecRegisters {
23 static constexpr std::size_t NUM_REGS = 0x178;
24
25 union {
26 struct {
27 INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000
28 VideoCodec set_codec_id; ///< 0x0400
29 INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408
30 u64 execute; ///< 0x0600
31 INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608
32 struct { ///< 0x0800
33 union {
34 BitField<0, 3, VideoCodec> codec;
35 BitField<4, 1, u64> gp_timer_on;
36 BitField<13, 1, u64> mb_timer_on;
37 BitField<14, 1, u64> intra_frame_pslc;
38 BitField<17, 1, u64> all_intra_frame;
39 };
40 } control_params;
41 u64 picture_info_offset; ///< 0x0808
42 u64 frame_bitstream_offset; ///< 0x0810
43 u64 frame_number; ///< 0x0818
44 u64 h264_slice_data_offsets; ///< 0x0820
45 u64 h264_mv_dump_offset; ///< 0x0828
46 INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830
47 u64 frame_stats_offset; ///< 0x0848
48 u64 h264_last_surface_luma_offset; ///< 0x0850
49 u64 h264_last_surface_chroma_offset; ///< 0x0858
50 std::array<u64, 17> surface_luma_offset; ///< 0x0860
51 std::array<u64, 17> surface_chroma_offset; ///< 0x08E8
52 INSERT_PADDING_WORDS_NOINIT(68); ///< 0x0970
53 u64 vp8_prob_data_offset; ///< 0x0A80
54 u64 vp8_header_partition_buf_offset; ///< 0x0A88
55 INSERT_PADDING_WORDS_NOINIT(60); ///< 0x0A90
56 u64 vp9_entropy_probs_offset; ///< 0x0B80
57 u64 vp9_backward_updates_offset; ///< 0x0B88
58 u64 vp9_last_frame_segmap_offset; ///< 0x0B90
59 u64 vp9_curr_frame_segmap_offset; ///< 0x0B98
60 INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0
61 u64 vp9_last_frame_mvs_offset; ///< 0x0BA8
62 u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0
63 INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8
64 };
65 std::array<u64, NUM_REGS> reg_array;
66 };
67};
68static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size");
69
70#define ASSERT_REG_POSITION(field_name, position) \
71 static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \
72 "Field " #field_name " has invalid position")
73
74ASSERT_REG_POSITION(set_codec_id, 0x80);
75ASSERT_REG_POSITION(execute, 0xC0);
76ASSERT_REG_POSITION(control_params, 0x100);
77ASSERT_REG_POSITION(picture_info_offset, 0x101);
78ASSERT_REG_POSITION(frame_bitstream_offset, 0x102);
79ASSERT_REG_POSITION(frame_number, 0x103);
80ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104);
81ASSERT_REG_POSITION(frame_stats_offset, 0x109);
82ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A);
83ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B);
84ASSERT_REG_POSITION(surface_luma_offset, 0x10C);
85ASSERT_REG_POSITION(surface_chroma_offset, 0x11D);
86ASSERT_REG_POSITION(vp8_prob_data_offset, 0x150);
87ASSERT_REG_POSITION(vp8_header_partition_buf_offset, 0x151);
88ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170);
89ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171);
90ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172);
91ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173);
92ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175);
93ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176);
94
95#undef ASSERT_REG_POSITION
96
97} // namespace Tegra::Host1x::NvdecCommon
diff --git a/src/video_core/host1x/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp
new file mode 100644
index 000000000..8694f77e2
--- /dev/null
+++ b/src/video_core/host1x/sync_manager.cpp
@@ -0,0 +1,51 @@
1// SPDX-FileCopyrightText: Ryujinx Team and Contributors
2// SPDX-License-Identifier: MIT
3
4#include <algorithm>
5#include "sync_manager.h"
6#include "video_core/gpu.h"
7#include "video_core/host1x/host1x.h"
8#include "video_core/host1x/syncpoint_manager.h"
9
10namespace Tegra {
11namespace Host1x {
12
13SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {}
14SyncptIncrManager::~SyncptIncrManager() = default;
15
16void SyncptIncrManager::Increment(u32 id) {
17 increments.emplace_back(0, 0, id, true);
18 IncrementAllDone();
19}
20
21u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
22 const u32 handle = current_id++;
23 increments.emplace_back(handle, class_id, id);
24 return handle;
25}
26
27void SyncptIncrManager::SignalDone(u32 handle) {
28 const auto done_incr =
29 std::find_if(increments.begin(), increments.end(),
30 [handle](const SyncptIncr& incr) { return incr.id == handle; });
31 if (done_incr != increments.cend()) {
32 done_incr->complete = true;
33 }
34 IncrementAllDone();
35}
36
37void SyncptIncrManager::IncrementAllDone() {
38 std::size_t done_count = 0;
39 for (; done_count < increments.size(); ++done_count) {
40 if (!increments[done_count].complete) {
41 break;
42 }
43 auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
44 syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id);
45 syncpoint_manager.IncrementHost(increments[done_count].syncpt_id);
46 }
47 increments.erase(increments.begin(), increments.begin() + done_count);
48}
49
50} // namespace Host1x
51} // namespace Tegra
diff --git a/src/video_core/host1x/sync_manager.h b/src/video_core/host1x/sync_manager.h
new file mode 100644
index 000000000..aba72d5c5
--- /dev/null
+++ b/src/video_core/host1x/sync_manager.h
@@ -0,0 +1,53 @@
1// SPDX-FileCopyrightText: Ryujinx Team and Contributors
2// SPDX-License-Identifier: MIT
3
4#pragma once
5
6#include <mutex>
7#include <vector>
8#include "common/common_types.h"
9
10namespace Tegra {
11
12class GPU;
13
14namespace Host1x {
15
16struct SyncptIncr {
17 u32 id;
18 u32 class_id;
19 u32 syncpt_id;
20 bool complete;
21
22 SyncptIncr(u32 id_, u32 class_id_, u32 syncpt_id_, bool done = false)
23 : id(id_), class_id(class_id_), syncpt_id(syncpt_id_), complete(done) {}
24};
25
26class SyncptIncrManager {
27public:
28 explicit SyncptIncrManager(GPU& gpu);
29 ~SyncptIncrManager();
30
31 /// Add syncpoint id and increment all
32 void Increment(u32 id);
33
34 /// Returns a handle to increment later
35 u32 IncrementWhenDone(u32 class_id, u32 id);
36
37 /// IncrememntAllDone, including handle
38 void SignalDone(u32 handle);
39
40 /// Increment all sequential pending increments that are already done.
41 void IncrementAllDone();
42
43private:
44 std::vector<SyncptIncr> increments;
45 std::mutex increment_lock;
46 u32 current_id{};
47
48 GPU& gpu;
49};
50
51} // namespace Host1x
52
53} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp
new file mode 100644
index 000000000..c606b8bd0
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.cpp
@@ -0,0 +1,93 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/host1x/syncpoint_manager.h"
6
7namespace Tegra {
8
9namespace Host1x {
10
11SyncpointManager::ActionHandle SyncpointManager::RegisterAction(
12 std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value,
13 std::function<void(void)>& action) {
14 if (syncpoint.load(std::memory_order_acquire) >= expected_value) {
15 action();
16 return {};
17 }
18
19 std::unique_lock<std::mutex> lk(guard);
20 if (syncpoint.load(std::memory_order_relaxed) >= expected_value) {
21 action();
22 return {};
23 }
24 auto it = action_storage.begin();
25 while (it != action_storage.end()) {
26 if (it->expected_value >= expected_value) {
27 break;
28 }
29 ++it;
30 }
31 return action_storage.emplace(it, expected_value, action);
32}
33
34void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage,
35 ActionHandle& handle) {
36 std::unique_lock<std::mutex> lk(guard);
37 action_storage.erase(handle);
38}
39
40void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) {
41 DeregisterAction(guest_action_storage[syncpoint_id], handle);
42}
43
44void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) {
45 DeregisterAction(host_action_storage[syncpoint_id], handle);
46}
47
48void SyncpointManager::IncrementGuest(u32 syncpoint_id) {
49 Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]);
50}
51
52void SyncpointManager::IncrementHost(u32 syncpoint_id) {
53 Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]);
54}
55
56void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) {
57 Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value);
58}
59
60void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) {
61 Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value);
62}
63
64void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
65 std::list<RegisteredAction>& action_storage) {
66 auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1};
67
68 std::unique_lock<std::mutex> lk(guard);
69 auto it = action_storage.begin();
70 while (it != action_storage.end()) {
71 if (it->expected_value > new_value) {
72 break;
73 }
74 it->action();
75 it = action_storage.erase(it);
76 }
77 wait_cv.notify_all();
78}
79
80void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
81 u32 expected_value) {
82 const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; };
83 if (pred()) {
84 return;
85 }
86
87 std::unique_lock<std::mutex> lk(guard);
88 wait_cv.wait(lk, pred);
89}
90
91} // namespace Host1x
92
93} // namespace Tegra
diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h
new file mode 100644
index 000000000..0ecc040ab
--- /dev/null
+++ b/src/video_core/host1x/syncpoint_manager.h
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv3 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <condition_variable>
10#include <functional>
11#include <list>
12#include <mutex>
13
14#include "common/common_types.h"
15
16namespace Tegra {
17
18namespace Host1x {
19
20class SyncpointManager {
21public:
22 u32 GetGuestSyncpointValue(u32 id) {
23 return syncpoints_guest[id].load(std::memory_order_acquire);
24 }
25
26 u32 GetHostSyncpointValue(u32 id) {
27 return syncpoints_host[id].load(std::memory_order_acquire);
28 }
29
30 struct RegisteredAction {
31 RegisteredAction(u32 expected_value_, std::function<void(void)>& action_)
32 : expected_value{expected_value_}, action{action_} {}
33 u32 expected_value;
34 std::function<void(void)> action;
35 };
36 using ActionHandle = std::list<RegisteredAction>::iterator;
37
38 template <typename Func>
39 ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
40 std::function<void(void)> func(action);
41 return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id],
42 expected_value, func);
43 }
44
45 template <typename Func>
46 ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) {
47 std::function<void(void)> func(action);
48 return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id],
49 expected_value, func);
50 }
51
52 void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle);
53
54 void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle);
55
56 void IncrementGuest(u32 syncpoint_id);
57
58 void IncrementHost(u32 syncpoint_id);
59
60 void WaitGuest(u32 syncpoint_id, u32 expected_value);
61
62 void WaitHost(u32 syncpoint_id, u32 expected_value);
63
64 bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) {
65 return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
66 }
67
68 bool IsReadyHost(u32 syncpoint_id, u32 expected_value) {
69 return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value;
70 }
71
72private:
73 void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv,
74 std::list<RegisteredAction>& action_storage);
75
76 ActionHandle RegisterAction(std::atomic<u32>& syncpoint,
77 std::list<RegisteredAction>& action_storage, u32 expected_value,
78 std::function<void(void)>& action);
79
80 void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle);
81
82 void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value);
83
84 static constexpr size_t NUM_MAX_SYNCPOINTS = 192;
85
86 std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{};
87 std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{};
88
89 std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage;
90 std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage;
91
92 std::mutex guard;
93 std::condition_variable wait_guest_cv;
94 std::condition_variable wait_host_cv;
95};
96
97} // namespace Host1x
98
99} // namespace Tegra
diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp
new file mode 100644
index 000000000..a9422670a
--- /dev/null
+++ b/src/video_core/host1x/vic.cpp
@@ -0,0 +1,243 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <array>
5
6extern "C" {
7#if defined(__GNUC__) || defined(__clang__)
8#pragma GCC diagnostic push
9#pragma GCC diagnostic ignored "-Wconversion"
10#endif
11#include <libswscale/swscale.h>
12#if defined(__GNUC__) || defined(__clang__)
13#pragma GCC diagnostic pop
14#endif
15}
16
17#include "common/assert.h"
18#include "common/bit_field.h"
19#include "common/logging/log.h"
20
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/gpu.h"
23#include "video_core/host1x/nvdec.h"
24#include "video_core/host1x/vic.h"
25#include "video_core/memory_manager.h"
26#include "video_core/textures/decoders.h"
27
28namespace Tegra {
29
30namespace Host1x {
31
32namespace {
33enum class VideoPixelFormat : u64_le {
34 RGBA8 = 0x1f,
35 BGRA8 = 0x20,
36 RGBX8 = 0x23,
37 YUV420 = 0x44,
38};
39} // Anonymous namespace
40
41union VicConfig {
42 u64_le raw{};
43 BitField<0, 7, VideoPixelFormat> pixel_format;
44 BitField<7, 2, u64_le> chroma_loc_horiz;
45 BitField<9, 2, u64_le> chroma_loc_vert;
46 BitField<11, 4, u64_le> block_linear_kind;
47 BitField<15, 4, u64_le> block_linear_height_log2;
48 BitField<32, 14, u64_le> surface_width_minus1;
49 BitField<46, 14, u64_le> surface_height_minus1;
50};
51
52Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
53 : gpu(gpu_),
54 nvdec_processor(std::move(nvdec_processor_)), converted_frame_buffer{nullptr, av_free} {}
55
56Vic::~Vic() = default;
57
58void Vic::ProcessMethod(Method method, u32 argument) {
59 LOG_DEBUG(HW_GPU, "Vic method 0x{:X}", static_cast<u32>(method));
60 const u64 arg = static_cast<u64>(argument) << 8;
61 switch (method) {
62 case Method::Execute:
63 Execute();
64 break;
65 case Method::SetConfigStructOffset:
66 config_struct_address = arg;
67 break;
68 case Method::SetOutputSurfaceLumaOffset:
69 output_surface_luma_address = arg;
70 break;
71 case Method::SetOutputSurfaceChromaOffset:
72 output_surface_chroma_address = arg;
73 break;
74 default:
75 break;
76 }
77}
78
79void Vic::Execute() {
80 if (output_surface_luma_address == 0) {
81 LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
82 return;
83 }
84 const VicConfig config{gpu.MemoryManager().Read<u64>(config_struct_address + 0x20)};
85 const AVFramePtr frame_ptr = nvdec_processor->GetFrame();
86 const auto* frame = frame_ptr.get();
87 if (!frame) {
88 return;
89 }
90 const u64 surface_width = config.surface_width_minus1 + 1;
91 const u64 surface_height = config.surface_height_minus1 + 1;
92 if (static_cast<u64>(frame->width) != surface_width ||
93 static_cast<u64>(frame->height) != surface_height) {
94 // TODO: Properly support multiple video streams with differing frame dimensions
95 LOG_WARNING(Service_NVDRV, "Frame dimensions {}x{} don't match surface dimensions {}x{}",
96 frame->width, frame->height, surface_width, surface_height);
97 }
98 switch (config.pixel_format) {
99 case VideoPixelFormat::RGBA8:
100 case VideoPixelFormat::BGRA8:
101 case VideoPixelFormat::RGBX8:
102 WriteRGBFrame(frame, config);
103 break;
104 case VideoPixelFormat::YUV420:
105 WriteYUVFrame(frame, config);
106 break;
107 default:
108 UNIMPLEMENTED_MSG("Unknown video pixel format {:X}", config.pixel_format.Value());
109 break;
110 }
111}
112
113void Vic::WriteRGBFrame(const AVFrame* frame, const VicConfig& config) {
114 LOG_TRACE(Service_NVDRV, "Writing RGB Frame");
115
116 if (!scaler_ctx || frame->width != scaler_width || frame->height != scaler_height) {
117 const AVPixelFormat target_format = [pixel_format = config.pixel_format]() {
118 switch (pixel_format) {
119 case VideoPixelFormat::RGBA8:
120 return AV_PIX_FMT_RGBA;
121 case VideoPixelFormat::BGRA8:
122 return AV_PIX_FMT_BGRA;
123 case VideoPixelFormat::RGBX8:
124 return AV_PIX_FMT_RGB0;
125 default:
126 return AV_PIX_FMT_RGBA;
127 }
128 }();
129
130 sws_freeContext(scaler_ctx);
131 // Frames are decoded into either YUV420 or NV12 formats. Convert to desired RGB format
132 scaler_ctx = sws_getContext(frame->width, frame->height,
133 static_cast<AVPixelFormat>(frame->format), frame->width,
134 frame->height, target_format, 0, nullptr, nullptr, nullptr);
135 scaler_width = frame->width;
136 scaler_height = frame->height;
137 converted_frame_buffer.reset();
138 }
139 if (!converted_frame_buffer) {
140 const size_t frame_size = frame->width * frame->height * 4;
141 converted_frame_buffer = AVMallocPtr{static_cast<u8*>(av_malloc(frame_size)), av_free};
142 }
143 const std::array<int, 4> converted_stride{frame->width * 4, frame->height * 4, 0, 0};
144 u8* const converted_frame_buf_addr{converted_frame_buffer.get()};
145 sws_scale(scaler_ctx, frame->data, frame->linesize, 0, frame->height, &converted_frame_buf_addr,
146 converted_stride.data());
147
148 // Use the minimum of surface/frame dimensions to avoid buffer overflow.
149 const u32 surface_width = static_cast<u32>(config.surface_width_minus1) + 1;
150 const u32 surface_height = static_cast<u32>(config.surface_height_minus1) + 1;
151 const u32 width = std::min(surface_width, static_cast<u32>(frame->width));
152 const u32 height = std::min(surface_height, static_cast<u32>(frame->height));
153 const u32 blk_kind = static_cast<u32>(config.block_linear_kind);
154 if (blk_kind != 0) {
155 // swizzle pitch linear to block linear
156 const u32 block_height = static_cast<u32>(config.block_linear_height_log2);
157 const auto size = Texture::CalculateSize(true, 4, width, height, 1, block_height, 0);
158 luma_buffer.resize(size);
159 Texture::SwizzleSubrect(width, height, width * 4, width, 4, luma_buffer.data(),
160 converted_frame_buf_addr, block_height, 0, 0);
161
162 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
163 } else {
164 // send pitch linear frame
165 const size_t linear_size = width * height * 4;
166 gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
167 linear_size);
168 }
169}
170
171void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) {
172 LOG_TRACE(Service_NVDRV, "Writing YUV420 Frame");
173
174 const std::size_t surface_width = config.surface_width_minus1 + 1;
175 const std::size_t surface_height = config.surface_height_minus1 + 1;
176 const std::size_t aligned_width = (surface_width + 0xff) & ~0xffUL;
177 // Use the minimum of surface/frame dimensions to avoid buffer overflow.
178 const auto frame_width = std::min(surface_width, static_cast<size_t>(frame->width));
179 const auto frame_height = std::min(surface_height, static_cast<size_t>(frame->height));
180
181 const auto stride = static_cast<size_t>(frame->linesize[0]);
182
183 luma_buffer.resize(aligned_width * surface_height);
184 chroma_buffer.resize(aligned_width * surface_height / 2);
185
186 // Populate luma buffer
187 const u8* luma_src = frame->data[0];
188 for (std::size_t y = 0; y < frame_height; ++y) {
189 const std::size_t src = y * stride;
190 const std::size_t dst = y * aligned_width;
191 for (std::size_t x = 0; x < frame_width; ++x) {
192 luma_buffer[dst + x] = luma_src[src + x];
193 }
194 }
195 gpu.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(),
196 luma_buffer.size());
197
198 // Chroma
199 const std::size_t half_height = frame_height / 2;
200 const auto half_stride = static_cast<size_t>(frame->linesize[1]);
201
202 switch (frame->format) {
203 case AV_PIX_FMT_YUV420P: {
204 // Frame from FFmpeg software
205 // Populate chroma buffer from both channels with interleaving.
206 const std::size_t half_width = frame_width / 2;
207 const u8* chroma_b_src = frame->data[1];
208 const u8* chroma_r_src = frame->data[2];
209 for (std::size_t y = 0; y < half_height; ++y) {
210 const std::size_t src = y * half_stride;
211 const std::size_t dst = y * aligned_width;
212
213 for (std::size_t x = 0; x < half_width; ++x) {
214 chroma_buffer[dst + x * 2] = chroma_b_src[src + x];
215 chroma_buffer[dst + x * 2 + 1] = chroma_r_src[src + x];
216 }
217 }
218 break;
219 }
220 case AV_PIX_FMT_NV12: {
221 // Frame from VA-API hardware
222 // This is already interleaved so just copy
223 const u8* chroma_src = frame->data[1];
224 for (std::size_t y = 0; y < half_height; ++y) {
225 const std::size_t src = y * stride;
226 const std::size_t dst = y * aligned_width;
227 for (std::size_t x = 0; x < frame_width; ++x) {
228 chroma_buffer[dst + x] = chroma_src[src + x];
229 }
230 }
231 break;
232 }
233 default:
234 ASSERT(false);
235 break;
236 }
237 gpu.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
238 chroma_buffer.size());
239}
240
241} // namespace Host1x
242
243} // namespace Tegra
diff --git a/src/video_core/host1x/vic.h b/src/video_core/host1x/vic.h
new file mode 100644
index 000000000..c51f8af7e
--- /dev/null
+++ b/src/video_core/host1x/vic.h
@@ -0,0 +1,66 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <memory>
7#include <vector>
8#include "common/common_types.h"
9
10struct SwsContext;
11
12namespace Tegra {
13class GPU;
14
15namespace Host1x {
16
17class Nvdec;
18union VicConfig;
19
20class Vic {
21public:
22 enum class Method : u32 {
23 Execute = 0xc0,
24 SetControlParams = 0x1c1,
25 SetConfigStructOffset = 0x1c2,
26 SetOutputSurfaceLumaOffset = 0x1c8,
27 SetOutputSurfaceChromaOffset = 0x1c9,
28 SetOutputSurfaceChromaUnusedOffset = 0x1ca
29 };
30
31 explicit Vic(GPU& gpu, std::shared_ptr<Nvdec> nvdec_processor);
32
33 ~Vic();
34
35 /// Write to the device state.
36 void ProcessMethod(Method method, u32 argument);
37
38private:
39 void Execute();
40
41 void WriteRGBFrame(const AVFrame* frame, const VicConfig& config);
42
43 void WriteYUVFrame(const AVFrame* frame, const VicConfig& config);
44
45 GPU& gpu;
46 std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor;
47
48 /// Avoid reallocation of the following buffers every frame, as their
49 /// size does not change during a stream
50 using AVMallocPtr = std::unique_ptr<u8, decltype(&av_free)>;
51 AVMallocPtr converted_frame_buffer;
52 std::vector<u8> luma_buffer;
53 std::vector<u8> chroma_buffer;
54
55 GPUVAddr config_struct_address{};
56 GPUVAddr output_surface_luma_address{};
57 GPUVAddr output_surface_chroma_address{};
58
59 SwsContext* scaler_ctx{};
60 s32 scaler_width{};
61 s32 scaler_height{};
62};
63
64} // namespace Host1x
65
66} // namespace Tegra