summaryrefslogtreecommitdiff
path: root/src/video_core/host1x/codecs
diff options
context:
space:
mode:
authorGravatar Fernando S2022-10-06 21:29:53 +0200
committerGravatar GitHub2022-10-06 21:29:53 +0200
commit1effa578f12f79d7816e3543291f302f126cc1d2 (patch)
tree14803b31b6817294d40d57446f6fa94c5ff3fe9a /src/video_core/host1x/codecs
parentMerge pull request #9025 from FernandoS27/slava-ukrayini (diff)
parentvulkan_blitter: Fix pool allocation double free. (diff)
downloadyuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.gz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.xz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.zip
Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
Diffstat (limited to 'src/video_core/host1x/codecs')
-rw-r--r--src/video_core/host1x/codecs/codec.cpp310
-rw-r--r--src/video_core/host1x/codecs/codec.h84
-rw-r--r--src/video_core/host1x/codecs/h264.cpp278
-rw-r--r--src/video_core/host1x/codecs/h264.h177
-rw-r--r--src/video_core/host1x/codecs/vp8.cpp53
-rw-r--r--src/video_core/host1x/codecs/vp8.h78
-rw-r--r--src/video_core/host1x/codecs/vp9.cpp947
-rw-r--r--src/video_core/host1x/codecs/vp9.h198
-rw-r--r--src/video_core/host1x/codecs/vp9_types.h305
9 files changed, 2430 insertions, 0 deletions
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
new file mode 100644
index 000000000..42e7d6e4f
--- /dev/null
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -0,0 +1,310 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <algorithm>
5#include <fstream>
6#include <vector>
7#include "common/assert.h"
8#include "common/settings.h"
9#include "video_core/host1x/codecs/codec.h"
10#include "video_core/host1x/codecs/h264.h"
11#include "video_core/host1x/codecs/vp8.h"
12#include "video_core/host1x/codecs/vp9.h"
13#include "video_core/host1x/host1x.h"
14#include "video_core/memory_manager.h"
15
16extern "C" {
17#include <libavutil/opt.h>
18#ifdef LIBVA_FOUND
19// for querying VAAPI driver information
20#include <libavutil/hwcontext_vaapi.h>
21#endif
22}
23
24namespace Tegra {
25namespace {
26constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12;
27constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P;
28constexpr std::array PREFERRED_GPU_DECODERS = {
29 AV_HWDEVICE_TYPE_CUDA,
30#ifdef _WIN32
31 AV_HWDEVICE_TYPE_D3D11VA,
32 AV_HWDEVICE_TYPE_DXVA2,
33#elif defined(__unix__)
34 AV_HWDEVICE_TYPE_VAAPI,
35 AV_HWDEVICE_TYPE_VDPAU,
36#endif
37 // last resort for Linux Flatpak (w/ NVIDIA)
38 AV_HWDEVICE_TYPE_VULKAN,
39};
40
41void AVPacketDeleter(AVPacket* ptr) {
42 av_packet_free(&ptr);
43}
44
45using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>;
46
47AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
48 for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
49 if (*p == av_codec_ctx->pix_fmt) {
50 return av_codec_ctx->pix_fmt;
51 }
52 }
53 LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
54 av_buffer_unref(&av_codec_ctx->hw_device_ctx);
55 av_codec_ctx->pix_fmt = PREFERRED_CPU_FMT;
56 return PREFERRED_CPU_FMT;
57}
58
59// List all the currently available hwcontext in ffmpeg
60std::vector<AVHWDeviceType> ListSupportedContexts() {
61 std::vector<AVHWDeviceType> contexts{};
62 AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
63 do {
64 current_device_type = av_hwdevice_iterate_types(current_device_type);
65 contexts.push_back(current_device_type);
66 } while (current_device_type != AV_HWDEVICE_TYPE_NONE);
67 return contexts;
68}
69
70} // namespace
71
72void AVFrameDeleter(AVFrame* ptr) {
73 av_frame_free(&ptr);
74}
75
76Codec::Codec(Host1x::Host1x& host1x_, const Host1x::NvdecCommon::NvdecRegisters& regs)
77 : host1x(host1x_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(host1x)),
78 vp8_decoder(std::make_unique<Decoder::VP8>(host1x)),
79 vp9_decoder(std::make_unique<Decoder::VP9>(host1x)) {}
80
81Codec::~Codec() {
82 if (!initialized) {
83 return;
84 }
85 // Free libav memory
86 avcodec_free_context(&av_codec_ctx);
87 av_buffer_unref(&av_gpu_decoder);
88}
89
90bool Codec::CreateGpuAvDevice() {
91 static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
92 static const auto supported_contexts = ListSupportedContexts();
93 for (const auto& type : PREFERRED_GPU_DECODERS) {
94 if (std::none_of(supported_contexts.begin(), supported_contexts.end(),
95 [&type](const auto& context) { return context == type; })) {
96 LOG_DEBUG(Service_NVDRV, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
97 continue;
98 }
99 // Avoid memory leak from not cleaning up after av_hwdevice_ctx_create
100 av_buffer_unref(&av_gpu_decoder);
101 const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
102 if (hwdevice_res < 0) {
103 LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
104 av_hwdevice_get_type_name(type), hwdevice_res);
105 continue;
106 }
107#ifdef LIBVA_FOUND
108 if (type == AV_HWDEVICE_TYPE_VAAPI) {
109 // we need to determine if this is an impersonated VAAPI driver
110 AVHWDeviceContext* hwctx =
111 static_cast<AVHWDeviceContext*>(static_cast<void*>(av_gpu_decoder->data));
112 AVVAAPIDeviceContext* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
113 const char* vendor_name = vaQueryVendorString(vactx->display);
114 if (strstr(vendor_name, "VDPAU backend")) {
115 // VDPAU impersonated VAAPI impl's are super buggy, we need to skip them
116 LOG_DEBUG(Service_NVDRV, "Skipping vdapu impersonated VAAPI driver");
117 continue;
118 } else {
119 // according to some user testing, certain vaapi driver (Intel?) could be buggy
120 // so let's log the driver name which may help the developers/supporters
121 LOG_DEBUG(Service_NVDRV, "Using VAAPI driver: {}", vendor_name);
122 }
123 }
124#endif
125 for (int i = 0;; i++) {
126 const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
127 if (!config) {
128 LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
129 av_codec->name, av_hwdevice_get_type_name(type));
130 break;
131 }
132 if ((config->methods & HW_CONFIG_METHOD) != 0 && config->device_type == type) {
133#if defined(__unix__)
134 // Some linux decoding backends are reported to crash with this config method
135 // TODO(ameerj): Properly support this method
136 if ((config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) != 0) {
137 // skip zero-copy decoders, we don't currently support them
138 LOG_DEBUG(Service_NVDRV, "Skipping decoder {} with unsupported capability {}.",
139 av_hwdevice_get_type_name(type), config->methods);
140 continue;
141 }
142#endif
143 LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
144 av_codec_ctx->pix_fmt = config->pix_fmt;
145 return true;
146 }
147 }
148 }
149 return false;
150}
151
152void Codec::InitializeAvCodecContext() {
153 av_codec_ctx = avcodec_alloc_context3(av_codec);
154 av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
155}
156
157void Codec::InitializeGpuDecoder() {
158 if (!CreateGpuAvDevice()) {
159 av_buffer_unref(&av_gpu_decoder);
160 return;
161 }
162 auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
163 ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
164 av_codec_ctx->hw_device_ctx = hw_device_ctx;
165 av_codec_ctx->get_format = GetGpuFormat;
166}
167
168void Codec::Initialize() {
169 const AVCodecID codec = [&] {
170 switch (current_codec) {
171 case Host1x::NvdecCommon::VideoCodec::H264:
172 return AV_CODEC_ID_H264;
173 case Host1x::NvdecCommon::VideoCodec::VP8:
174 return AV_CODEC_ID_VP8;
175 case Host1x::NvdecCommon::VideoCodec::VP9:
176 return AV_CODEC_ID_VP9;
177 default:
178 UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
179 return AV_CODEC_ID_NONE;
180 }
181 }();
182 av_codec = avcodec_find_decoder(codec);
183
184 InitializeAvCodecContext();
185 if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
186 InitializeGpuDecoder();
187 }
188 if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
189 LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
190 avcodec_free_context(&av_codec_ctx);
191 av_buffer_unref(&av_gpu_decoder);
192 return;
193 }
194 if (!av_codec_ctx->hw_device_ctx) {
195 LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
196 }
197 initialized = true;
198}
199
200void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) {
201 if (current_codec != codec) {
202 current_codec = codec;
203 LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName());
204 }
205}
206
207void Codec::Decode() {
208 const bool is_first_frame = !initialized;
209 if (is_first_frame) {
210 Initialize();
211 }
212 if (!initialized) {
213 return;
214 }
215 bool vp9_hidden_frame = false;
216 const auto& frame_data = [&]() {
217 switch (current_codec) {
218 case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
219 return h264_decoder->ComposeFrame(state, is_first_frame);
220 case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
221 return vp8_decoder->ComposeFrame(state);
222 case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
223 vp9_decoder->ComposeFrame(state);
224 vp9_hidden_frame = vp9_decoder->WasFrameHidden();
225 return vp9_decoder->GetFrameBytes();
226 default:
227 ASSERT(false);
228 return std::vector<u8>{};
229 }
230 }();
231 AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
232 if (!packet) {
233 LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
234 return;
235 }
236 packet->data = const_cast<u8*>(frame_data.data());
237 packet->size = static_cast<s32>(frame_data.size());
238 if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
239 LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
240 return;
241 }
242 // Only receive/store visible frames
243 if (vp9_hidden_frame) {
244 return;
245 }
246 AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter};
247 AVFramePtr final_frame{nullptr, AVFrameDeleter};
248 ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed");
249 if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) {
250 LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
251 return;
252 }
253 if (initial_frame->width == 0 || initial_frame->height == 0) {
254 LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
255 return;
256 }
257 if (av_codec_ctx->hw_device_ctx) {
258 final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
259 ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
260 // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
261 // because Intel drivers crash unless using AV_PIX_FMT_NV12
262 final_frame->format = PREFERRED_GPU_FMT;
263 const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0);
264 ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret);
265 } else {
266 final_frame = std::move(initial_frame);
267 }
268 if (final_frame->format != PREFERRED_CPU_FMT && final_frame->format != PREFERRED_GPU_FMT) {
269 UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
270 return;
271 }
272 av_frames.push(std::move(final_frame));
273 if (av_frames.size() > 10) {
274 LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
275 av_frames.pop();
276 }
277}
278
279AVFramePtr Codec::GetCurrentFrame() {
280 // Sometimes VIC will request more frames than have been decoded.
281 // in this case, return a nullptr and don't overwrite previous frame data
282 if (av_frames.empty()) {
283 return AVFramePtr{nullptr, AVFrameDeleter};
284 }
285 AVFramePtr frame = std::move(av_frames.front());
286 av_frames.pop();
287 return frame;
288}
289
290Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const {
291 return current_codec;
292}
293
294std::string_view Codec::GetCurrentCodecName() const {
295 switch (current_codec) {
296 case Host1x::NvdecCommon::VideoCodec::None:
297 return "None";
298 case Host1x::NvdecCommon::VideoCodec::H264:
299 return "H264";
300 case Host1x::NvdecCommon::VideoCodec::VP8:
301 return "VP8";
302 case Host1x::NvdecCommon::VideoCodec::H265:
303 return "H265";
304 case Host1x::NvdecCommon::VideoCodec::VP9:
305 return "VP9";
306 default:
307 return "Unknown";
308 }
309}
310} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/codec.h b/src/video_core/host1x/codecs/codec.h
new file mode 100644
index 000000000..0d45fb7fe
--- /dev/null
+++ b/src/video_core/host1x/codecs/codec.h
@@ -0,0 +1,84 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <memory>
7#include <string_view>
8#include <queue>
9#include "common/common_types.h"
10#include "video_core/host1x/nvdec_common.h"
11
12extern "C" {
13#if defined(__GNUC__) || defined(__clang__)
14#pragma GCC diagnostic push
15#pragma GCC diagnostic ignored "-Wconversion"
16#endif
17#include <libavcodec/avcodec.h>
18#if defined(__GNUC__) || defined(__clang__)
19#pragma GCC diagnostic pop
20#endif
21}
22
23namespace Tegra {
24
25void AVFrameDeleter(AVFrame* ptr);
26using AVFramePtr = std::unique_ptr<AVFrame, decltype(&AVFrameDeleter)>;
27
28namespace Decoder {
29class H264;
30class VP8;
31class VP9;
32} // namespace Decoder
33
34namespace Host1x {
35class Host1x;
36} // namespace Host1x
37
38class Codec {
39public:
40 explicit Codec(Host1x::Host1x& host1x, const Host1x::NvdecCommon::NvdecRegisters& regs);
41 ~Codec();
42
43 /// Initialize the codec, returning success or failure
44 void Initialize();
45
46 /// Sets NVDEC video stream codec
47 void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec);
48
49 /// Call decoders to construct headers, decode AVFrame with ffmpeg
50 void Decode();
51
52 /// Returns next decoded frame
53 [[nodiscard]] AVFramePtr GetCurrentFrame();
54
55 /// Returns the value of current_codec
56 [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const;
57
58 /// Return name of the current codec
59 [[nodiscard]] std::string_view GetCurrentCodecName() const;
60
61private:
62 void InitializeAvCodecContext();
63
64 void InitializeGpuDecoder();
65
66 bool CreateGpuAvDevice();
67
68 bool initialized{};
69 Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None};
70
71 const AVCodec* av_codec{nullptr};
72 AVCodecContext* av_codec_ctx{nullptr};
73 AVBufferRef* av_gpu_decoder{nullptr};
74
75 Host1x::Host1x& host1x;
76 const Host1x::NvdecCommon::NvdecRegisters& state;
77 std::unique_ptr<Decoder::H264> h264_decoder;
78 std::unique_ptr<Decoder::VP8> vp8_decoder;
79 std::unique_ptr<Decoder::VP9> vp9_decoder;
80
81 std::queue<AVFramePtr> av_frames{};
82};
83
84} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
new file mode 100644
index 000000000..e87bd65fa
--- /dev/null
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -0,0 +1,278 @@
1// SPDX-FileCopyrightText: Ryujinx Team and Contributors
2// SPDX-License-Identifier: MIT
3
4#include <array>
5#include <bit>
6
7#include "common/settings.h"
8#include "video_core/host1x/codecs/h264.h"
9#include "video_core/host1x/host1x.h"
10#include "video_core/memory_manager.h"
11
12namespace Tegra::Decoder {
13namespace {
14// ZigZag LUTs from libavcodec.
15constexpr std::array<u8, 64> zig_zag_direct{
16 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48,
17 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23,
18 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
19};
20
21constexpr std::array<u8, 16> zig_zag_scan{
22 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
23 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
24};
25} // Anonymous namespace
26
27H264::H264(Host1x::Host1x& host1x_) : host1x{host1x_} {}
28
29H264::~H264() = default;
30
31const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
32 bool is_first_frame) {
33 H264DecoderContext context;
34 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context,
35 sizeof(H264DecoderContext));
36
37 const s64 frame_number = context.h264_parameter_set.frame_number.Value();
38 if (!is_first_frame && frame_number != 0) {
39 frame.resize(context.stream_len);
40 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
41 return frame;
42 }
43
44 // Encode header
45 H264BitWriter writer{};
46 writer.WriteU(1, 24);
47 writer.WriteU(0, 1);
48 writer.WriteU(3, 2);
49 writer.WriteU(7, 5);
50 writer.WriteU(100, 8);
51 writer.WriteU(0, 8);
52 writer.WriteU(31, 8);
53 writer.WriteUe(0);
54 const u32 chroma_format_idc =
55 static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value());
56 writer.WriteUe(chroma_format_idc);
57 if (chroma_format_idc == 3) {
58 writer.WriteBit(false);
59 }
60
61 writer.WriteUe(0);
62 writer.WriteUe(0);
63 writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag
64 writer.WriteBit(false); // Scaling matrix present flag
65
66 writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value()));
67
68 const auto order_cnt_type =
69 static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value());
70 writer.WriteUe(order_cnt_type);
71 if (order_cnt_type == 0) {
72 writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4);
73 } else if (order_cnt_type == 1) {
74 writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0);
75
76 writer.WriteSe(0);
77 writer.WriteSe(0);
78 writer.WriteUe(0);
79 }
80
81 const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units /
82 (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2);
83
84 // TODO (ameerj): Where do we get this number, it seems to be particular for each stream
85 const auto nvdec_decoding = Settings::values.nvdec_emulation.GetValue();
86 const bool uses_gpu_decoding = nvdec_decoding == Settings::NvdecEmulation::GPU;
87 const u32 max_num_ref_frames = uses_gpu_decoding ? 6u : 16u;
88 writer.WriteUe(max_num_ref_frames);
89 writer.WriteBit(false);
90 writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1);
91 writer.WriteUe(pic_height - 1);
92 writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0);
93
94 if (!context.h264_parameter_set.frame_mbs_only_flag) {
95 writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0);
96 }
97
98 writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0);
99 writer.WriteBit(false); // Frame cropping flag
100 writer.WriteBit(false); // VUI parameter present flag
101
102 writer.End();
103
104 // H264 PPS
105 writer.WriteU(1, 24);
106 writer.WriteU(0, 1);
107 writer.WriteU(3, 2);
108 writer.WriteU(8, 5);
109
110 writer.WriteUe(0);
111 writer.WriteUe(0);
112
113 writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0);
114 writer.WriteBit(false);
115 writer.WriteUe(0);
116 writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active);
117 writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active);
118 writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0);
119 writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2);
120 s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value());
121 writer.WriteSe(pic_init_qp);
122 writer.WriteSe(0);
123 s32 chroma_qp_index_offset =
124 static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value());
125
126 writer.WriteSe(chroma_qp_index_offset);
127 writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0);
128 writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0);
129 writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0);
130 writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0);
131
132 writer.WriteBit(true);
133
134 for (s32 index = 0; index < 6; index++) {
135 writer.WriteBit(true);
136 std::span<const u8> matrix{context.weight_scale};
137 writer.WriteScalingList(matrix, index * 16, 16);
138 }
139
140 if (context.h264_parameter_set.transform_8x8_mode_flag) {
141 for (s32 index = 0; index < 2; index++) {
142 writer.WriteBit(true);
143 std::span<const u8> matrix{context.weight_scale_8x8};
144 writer.WriteScalingList(matrix, index * 64, 64);
145 }
146 }
147
148 s32 chroma_qp_index_offset2 =
149 static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value());
150
151 writer.WriteSe(chroma_qp_index_offset2);
152
153 writer.End();
154
155 const auto& encoded_header = writer.GetByteArray();
156 frame.resize(encoded_header.size() + context.stream_len);
157 std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
158
159 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
160 frame.data() + encoded_header.size(), context.stream_len);
161
162 return frame;
163}
164
165H264BitWriter::H264BitWriter() = default;
166
167H264BitWriter::~H264BitWriter() = default;
168
169void H264BitWriter::WriteU(s32 value, s32 value_sz) {
170 WriteBits(value, value_sz);
171}
172
173void H264BitWriter::WriteSe(s32 value) {
174 WriteExpGolombCodedInt(value);
175}
176
177void H264BitWriter::WriteUe(u32 value) {
178 WriteExpGolombCodedUInt(value);
179}
180
181void H264BitWriter::End() {
182 WriteBit(true);
183 Flush();
184}
185
186void H264BitWriter::WriteBit(bool state) {
187 WriteBits(state ? 1 : 0, 1);
188}
189
190void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
191 std::vector<u8> scan(count);
192 if (count == 16) {
193 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
194 } else {
195 std::memcpy(scan.data(), zig_zag_direct.data(), scan.size());
196 }
197 u8 last_scale = 8;
198
199 for (s32 index = 0; index < count; index++) {
200 const u8 value = list[start + scan[index]];
201 const s32 delta_scale = static_cast<s32>(value - last_scale);
202
203 WriteSe(delta_scale);
204
205 last_scale = value;
206 }
207}
208
209std::vector<u8>& H264BitWriter::GetByteArray() {
210 return byte_array;
211}
212
213const std::vector<u8>& H264BitWriter::GetByteArray() const {
214 return byte_array;
215}
216
217void H264BitWriter::WriteBits(s32 value, s32 bit_count) {
218 s32 value_pos = 0;
219
220 s32 remaining = bit_count;
221
222 while (remaining > 0) {
223 s32 copy_size = remaining;
224
225 const s32 free_bits = GetFreeBufferBits();
226
227 if (copy_size > free_bits) {
228 copy_size = free_bits;
229 }
230
231 const s32 mask = (1 << copy_size) - 1;
232
233 const s32 src_shift = (bit_count - value_pos) - copy_size;
234 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
235
236 buffer |= ((value >> src_shift) & mask) << dst_shift;
237
238 value_pos += copy_size;
239 buffer_pos += copy_size;
240 remaining -= copy_size;
241 }
242}
243
244void H264BitWriter::WriteExpGolombCodedInt(s32 value) {
245 const s32 sign = value <= 0 ? 0 : 1;
246 if (value < 0) {
247 value = -value;
248 }
249 value = (value << 1) - sign;
250 WriteExpGolombCodedUInt(value);
251}
252
253void H264BitWriter::WriteExpGolombCodedUInt(u32 value) {
254 const s32 size = 32 - std::countl_zero(value + 1);
255 WriteBits(1, size);
256
257 value -= (1U << (size - 1)) - 1;
258 WriteBits(static_cast<s32>(value), size - 1);
259}
260
261s32 H264BitWriter::GetFreeBufferBits() {
262 if (buffer_pos == buffer_size) {
263 Flush();
264 }
265
266 return buffer_size - buffer_pos;
267}
268
269void H264BitWriter::Flush() {
270 if (buffer_pos == 0) {
271 return;
272 }
273 byte_array.push_back(static_cast<u8>(buffer));
274
275 buffer = 0;
276 buffer_pos = 0;
277}
278} // namespace Tegra::Decoder
diff --git a/src/video_core/host1x/codecs/h264.h b/src/video_core/host1x/codecs/h264.h
new file mode 100644
index 000000000..5cc86454e
--- /dev/null
+++ b/src/video_core/host1x/codecs/h264.h
@@ -0,0 +1,177 @@
1// SPDX-FileCopyrightText: Ryujinx Team and Contributors
2// SPDX-License-Identifier: MIT
3
4#pragma once
5
6#include <span>
7#include <vector>
8#include "common/bit_field.h"
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "video_core/host1x/nvdec_common.h"
12
13namespace Tegra {
14
15namespace Host1x {
16class Host1x;
17} // namespace Host1x
18
19namespace Decoder {
20
21class H264BitWriter {
22public:
23 H264BitWriter();
24 ~H264BitWriter();
25
26 /// The following Write methods are based on clause 9.1 in the H.264 specification.
27 /// WriteSe and WriteUe write in the Exp-Golomb-coded syntax
28 void WriteU(s32 value, s32 value_sz);
29 void WriteSe(s32 value);
30 void WriteUe(u32 value);
31
32 /// Finalize the bitstream
33 void End();
34
35 /// append a bit to the stream, equivalent value to the state parameter
36 void WriteBit(bool state);
37
38 /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification
39 /// Writes the scaling matrices of the sream
40 void WriteScalingList(std::span<const u8> list, s32 start, s32 count);
41
42 /// Return the bitstream as a vector.
43 [[nodiscard]] std::vector<u8>& GetByteArray();
44 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
45
46private:
47 void WriteBits(s32 value, s32 bit_count);
48 void WriteExpGolombCodedInt(s32 value);
49 void WriteExpGolombCodedUInt(u32 value);
50 [[nodiscard]] s32 GetFreeBufferBits();
51 void Flush();
52
53 s32 buffer_size{8};
54
55 s32 buffer{};
56 s32 buffer_pos{};
57 std::vector<u8> byte_array;
58};
59
60class H264 {
61public:
62 explicit H264(Host1x::Host1x& host1x);
63 ~H264();
64
65 /// Compose the H264 frame for FFmpeg decoding
66 [[nodiscard]] const std::vector<u8>& ComposeFrame(
67 const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false);
68
69private:
70 std::vector<u8> frame;
71 Host1x::Host1x& host1x;
72
73 struct H264ParameterSet {
74 s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00
75 s32 delta_pic_order_always_zero_flag; ///< 0x04
76 s32 frame_mbs_only_flag; ///< 0x08
77 u32 pic_width_in_mbs; ///< 0x0C
78 u32 frame_height_in_map_units; ///< 0x10
79 union { ///< 0x14
80 BitField<0, 2, u32> tile_format;
81 BitField<2, 3, u32> gob_height;
82 };
83 u32 entropy_coding_mode_flag; ///< 0x18
84 s32 pic_order_present_flag; ///< 0x1C
85 s32 num_refidx_l0_default_active; ///< 0x20
86 s32 num_refidx_l1_default_active; ///< 0x24
87 s32 deblocking_filter_control_present_flag; ///< 0x28
88 s32 redundant_pic_cnt_present_flag; ///< 0x2C
89 u32 transform_8x8_mode_flag; ///< 0x30
90 u32 pitch_luma; ///< 0x34
91 u32 pitch_chroma; ///< 0x38
92 u32 luma_top_offset; ///< 0x3C
93 u32 luma_bot_offset; ///< 0x40
94 u32 luma_frame_offset; ///< 0x44
95 u32 chroma_top_offset; ///< 0x48
96 u32 chroma_bot_offset; ///< 0x4C
97 u32 chroma_frame_offset; ///< 0x50
98 u32 hist_buffer_size; ///< 0x54
99 union { ///< 0x58
100 union {
101 BitField<0, 1, u64> mbaff_frame;
102 BitField<1, 1, u64> direct_8x8_inference;
103 BitField<2, 1, u64> weighted_pred;
104 BitField<3, 1, u64> constrained_intra_pred;
105 BitField<4, 1, u64> ref_pic;
106 BitField<5, 1, u64> field_pic;
107 BitField<6, 1, u64> bottom_field;
108 BitField<7, 1, u64> second_field;
109 } flags;
110 BitField<8, 4, u64> log2_max_frame_num_minus4;
111 BitField<12, 2, u64> chroma_format_idc;
112 BitField<14, 2, u64> pic_order_cnt_type;
113 BitField<16, 6, s64> pic_init_qp_minus26;
114 BitField<22, 5, s64> chroma_qp_index_offset;
115 BitField<27, 5, s64> second_chroma_qp_index_offset;
116 BitField<32, 2, u64> weighted_bipred_idc;
117 BitField<34, 7, u64> curr_pic_idx;
118 BitField<41, 5, u64> curr_col_idx;
119 BitField<46, 16, u64> frame_number;
120 BitField<62, 1, u64> frame_surfaces;
121 BitField<63, 1, u64> output_memory_layout;
122 };
123 };
124 static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size");
125
126 struct H264DecoderContext {
127 INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000
128 u32 stream_len; ///< 0x0048
129 INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C
130 H264ParameterSet h264_parameter_set; ///< 0x0058
131 INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8
132 std::array<u8, 0x60> weight_scale; ///< 0x01C0
133 std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220
134 };
135 static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size");
136
137#define ASSERT_POSITION(field_name, position) \
138 static_assert(offsetof(H264ParameterSet, field_name) == position, \
139 "Field " #field_name " has invalid position")
140
141 ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00);
142 ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04);
143 ASSERT_POSITION(frame_mbs_only_flag, 0x08);
144 ASSERT_POSITION(pic_width_in_mbs, 0x0C);
145 ASSERT_POSITION(frame_height_in_map_units, 0x10);
146 ASSERT_POSITION(tile_format, 0x14);
147 ASSERT_POSITION(entropy_coding_mode_flag, 0x18);
148 ASSERT_POSITION(pic_order_present_flag, 0x1C);
149 ASSERT_POSITION(num_refidx_l0_default_active, 0x20);
150 ASSERT_POSITION(num_refidx_l1_default_active, 0x24);
151 ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28);
152 ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C);
153 ASSERT_POSITION(transform_8x8_mode_flag, 0x30);
154 ASSERT_POSITION(pitch_luma, 0x34);
155 ASSERT_POSITION(pitch_chroma, 0x38);
156 ASSERT_POSITION(luma_top_offset, 0x3C);
157 ASSERT_POSITION(luma_bot_offset, 0x40);
158 ASSERT_POSITION(luma_frame_offset, 0x44);
159 ASSERT_POSITION(chroma_top_offset, 0x48);
160 ASSERT_POSITION(chroma_bot_offset, 0x4C);
161 ASSERT_POSITION(chroma_frame_offset, 0x50);
162 ASSERT_POSITION(hist_buffer_size, 0x54);
163 ASSERT_POSITION(flags, 0x58);
164#undef ASSERT_POSITION
165
166#define ASSERT_POSITION(field_name, position) \
167 static_assert(offsetof(H264DecoderContext, field_name) == position, \
168 "Field " #field_name " has invalid position")
169
170 ASSERT_POSITION(stream_len, 0x48);
171 ASSERT_POSITION(h264_parameter_set, 0x58);
172 ASSERT_POSITION(weight_scale, 0x1C0);
173#undef ASSERT_POSITION
174};
175
176} // namespace Decoder
177} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp
new file mode 100644
index 000000000..28fb12cb8
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp8.cpp
@@ -0,0 +1,53 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <vector>
5
6#include "video_core/host1x/codecs/vp8.h"
7#include "video_core/host1x/host1x.h"
8#include "video_core/memory_manager.h"
9
10namespace Tegra::Decoder {
11VP8::VP8(Host1x::Host1x& host1x_) : host1x{host1x_} {}
12
13VP8::~VP8() = default;
14
15const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
16 VP8PictureInfo info;
17 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
18
19 const bool is_key_frame = info.key_frame == 1u;
20 const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
21 const size_t header_size = is_key_frame ? 10u : 3u;
22 frame.resize(header_size + bitstream_size);
23
24 // Based on page 30 of the VP8 specification.
25 // https://datatracker.ietf.org/doc/rfc6386/
26 frame[0] = is_key_frame ? 0u : 1u; // 1-bit frame type (0: keyframe, 1: interframes).
27 frame[0] |= static_cast<u8>((info.version & 7u) << 1u); // 3-bit version number
28 frame[0] |= static_cast<u8>(1u << 4u); // 1-bit show_frame flag
29
30 // The next 19-bits are the first partition size
31 frame[0] |= static_cast<u8>((info.first_part_size & 7u) << 5u);
32 frame[1] = static_cast<u8>((info.first_part_size & 0x7f8u) >> 3u);
33 frame[2] = static_cast<u8>((info.first_part_size & 0x7f800u) >> 11u);
34
35 if (is_key_frame) {
36 frame[3] = 0x9du;
37 frame[4] = 0x01u;
38 frame[5] = 0x2au;
39 // TODO(ameerj): Horizontal/Vertical Scale
40 // 16 bits: (2 bits Horizontal Scale << 14) | Width (14 bits)
41 frame[6] = static_cast<u8>(info.frame_width & 0xff);
42 frame[7] = static_cast<u8>(((info.frame_width >> 8) & 0x3f));
43 // 16 bits:(2 bits Vertical Scale << 14) | Height (14 bits)
44 frame[8] = static_cast<u8>(info.frame_height & 0xff);
45 frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
46 }
47 const u64 bitstream_offset = state.frame_bitstream_offset;
48 host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
49
50 return frame;
51}
52
53} // namespace Tegra::Decoder
diff --git a/src/video_core/host1x/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h
new file mode 100644
index 000000000..5bf07ecab
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp8.h
@@ -0,0 +1,78 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <vector>
8
9#include "common/common_funcs.h"
10#include "common/common_types.h"
11#include "video_core/host1x/nvdec_common.h"
12
13namespace Tegra {
14
15namespace Host1x {
16class Host1x;
17} // namespace Host1x
18
19namespace Decoder {
20
21class VP8 {
22public:
23 explicit VP8(Host1x::Host1x& host1x);
24 ~VP8();
25
26 /// Compose the VP8 frame for FFmpeg decoding
27 [[nodiscard]] const std::vector<u8>& ComposeFrame(
28 const Host1x::NvdecCommon::NvdecRegisters& state);
29
30private:
31 std::vector<u8> frame;
32 Host1x::Host1x& host1x;
33
34 struct VP8PictureInfo {
35 INSERT_PADDING_WORDS_NOINIT(14);
36 u16 frame_width; // actual frame width
37 u16 frame_height; // actual frame height
38 u8 key_frame;
39 u8 version;
40 union {
41 u8 raw;
42 BitField<0, 2, u8> tile_format;
43 BitField<2, 3, u8> gob_height;
44 BitField<5, 3, u8> reserverd_surface_format;
45 };
46 u8 error_conceal_on; // 1: error conceal on; 0: off
47 u32 first_part_size; // the size of first partition(frame header and mb header partition)
48 u32 hist_buffer_size; // in units of 256
49 u32 vld_buffer_size; // in units of 1
50 // Current frame buffers
51 std::array<u32, 2> frame_stride; // [y_c]
52 u32 luma_top_offset; // offset of luma top field in units of 256
53 u32 luma_bot_offset; // offset of luma bottom field in units of 256
54 u32 luma_frame_offset; // offset of luma frame in units of 256
55 u32 chroma_top_offset; // offset of chroma top field in units of 256
56 u32 chroma_bot_offset; // offset of chroma bottom field in units of 256
57 u32 chroma_frame_offset; // offset of chroma frame in units of 256
58
59 INSERT_PADDING_BYTES_NOINIT(0x1c); // NvdecDisplayParams
60
61 // Decode picture buffer related
62 s8 current_output_memory_layout;
63 // output NV12/NV24 setting. index 0: golden; 1: altref; 2: last
64 std::array<s8, 3> output_memory_layout;
65
66 u8 segmentation_feature_data_update;
67 INSERT_PADDING_BYTES_NOINIT(3);
68
69 // ucode return result
70 u32 result_value;
71 std::array<u32, 8> partition_offset;
72 INSERT_PADDING_WORDS_NOINIT(3);
73 };
74 static_assert(sizeof(VP8PictureInfo) == 0xc0, "PictureInfo is an invalid size");
75};
76
77} // namespace Decoder
78} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp
new file mode 100644
index 000000000..cf40c9012
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp9.cpp
@@ -0,0 +1,947 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <algorithm> // for std::copy
5#include <numeric>
6#include "common/assert.h"
7#include "video_core/host1x/codecs/vp9.h"
8#include "video_core/host1x/host1x.h"
9#include "video_core/memory_manager.h"
10
11namespace Tegra::Decoder {
12namespace {
13constexpr u32 diff_update_probability = 252;
14constexpr u32 frame_sync_code = 0x498342;
15
16// Default compressed header probabilities once frame context resets
17constexpr Vp9EntropyProbs default_probs{
18 .y_mode_prob{
19 65, 32, 18, 144, 162, 194, 41, 51, 98, 132, 68, 18, 165, 217, 196, 45, 40, 78,
20 173, 80, 19, 176, 240, 193, 64, 35, 46, 221, 135, 38, 194, 248, 121, 96, 85, 29,
21 },
22 .partition_prob{
23 199, 122, 141, 0, 147, 63, 159, 0, 148, 133, 118, 0, 121, 104, 114, 0,
24 174, 73, 87, 0, 92, 41, 83, 0, 82, 99, 50, 0, 53, 39, 39, 0,
25 177, 58, 59, 0, 68, 26, 63, 0, 52, 79, 25, 0, 17, 14, 12, 0,
26 222, 34, 30, 0, 72, 16, 44, 0, 58, 32, 12, 0, 10, 7, 6, 0,
27 },
28 .coef_probs{
29 195, 29, 183, 84, 49, 136, 8, 42, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 31, 107, 169, 35, 99, 159, 17, 82, 140, 8, 66, 114, 2, 44, 76, 1, 19, 32,
31 40, 132, 201, 29, 114, 187, 13, 91, 157, 7, 75, 127, 3, 58, 95, 1, 28, 47,
32 69, 142, 221, 42, 122, 201, 15, 91, 159, 6, 67, 121, 1, 42, 77, 1, 17, 31,
33 102, 148, 228, 67, 117, 204, 17, 82, 154, 6, 59, 114, 2, 39, 75, 1, 15, 29,
34 156, 57, 233, 119, 57, 212, 58, 48, 163, 29, 40, 124, 12, 30, 81, 3, 12, 31,
35 191, 107, 226, 124, 117, 204, 25, 99, 155, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 29, 148, 210, 37, 126, 194, 8, 93, 157, 2, 68, 118, 1, 39, 69, 1, 17, 33,
37 41, 151, 213, 27, 123, 193, 3, 82, 144, 1, 58, 105, 1, 32, 60, 1, 13, 26,
38 59, 159, 220, 23, 126, 198, 4, 88, 151, 1, 66, 114, 1, 38, 71, 1, 18, 34,
39 114, 136, 232, 51, 114, 207, 11, 83, 155, 3, 56, 105, 1, 33, 65, 1, 17, 34,
40 149, 65, 234, 121, 57, 215, 61, 49, 166, 28, 36, 114, 12, 25, 76, 3, 16, 42,
41 214, 49, 220, 132, 63, 188, 42, 65, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42 85, 137, 221, 104, 131, 216, 49, 111, 192, 21, 87, 155, 2, 49, 87, 1, 16, 28,
43 89, 163, 230, 90, 137, 220, 29, 100, 183, 10, 70, 135, 2, 42, 81, 1, 17, 33,
44 108, 167, 237, 55, 133, 222, 15, 97, 179, 4, 72, 135, 1, 45, 85, 1, 19, 38,
45 124, 146, 240, 66, 124, 224, 17, 88, 175, 4, 58, 122, 1, 36, 75, 1, 18, 37,
46 141, 79, 241, 126, 70, 227, 66, 58, 182, 30, 44, 136, 12, 34, 96, 2, 20, 47,
47 229, 99, 249, 143, 111, 235, 46, 109, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 82, 158, 236, 94, 146, 224, 25, 117, 191, 9, 87, 149, 3, 56, 99, 1, 33, 57,
49 83, 167, 237, 68, 145, 222, 10, 103, 177, 2, 72, 131, 1, 41, 79, 1, 20, 39,
50 99, 167, 239, 47, 141, 224, 10, 104, 178, 2, 73, 133, 1, 44, 85, 1, 22, 47,
51 127, 145, 243, 71, 129, 228, 17, 93, 177, 3, 61, 124, 1, 41, 84, 1, 21, 52,
52 157, 78, 244, 140, 72, 231, 69, 58, 184, 31, 44, 137, 14, 38, 105, 8, 23, 61,
53 125, 34, 187, 52, 41, 133, 6, 31, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54 37, 109, 153, 51, 102, 147, 23, 87, 128, 8, 67, 101, 1, 41, 63, 1, 19, 29,
55 31, 154, 185, 17, 127, 175, 6, 96, 145, 2, 73, 114, 1, 51, 82, 1, 28, 45,
56 23, 163, 200, 10, 131, 185, 2, 93, 148, 1, 67, 111, 1, 41, 69, 1, 14, 24,
57 29, 176, 217, 12, 145, 201, 3, 101, 156, 1, 69, 111, 1, 39, 63, 1, 14, 23,
58 57, 192, 233, 25, 154, 215, 6, 109, 167, 3, 78, 118, 1, 48, 69, 1, 21, 29,
59 202, 105, 245, 108, 106, 216, 18, 90, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 33, 172, 219, 64, 149, 206, 14, 117, 177, 5, 90, 141, 2, 61, 95, 1, 37, 57,
61 33, 179, 220, 11, 140, 198, 1, 89, 148, 1, 60, 104, 1, 33, 57, 1, 12, 21,
62 30, 181, 221, 8, 141, 198, 1, 87, 145, 1, 58, 100, 1, 31, 55, 1, 12, 20,
63 32, 186, 224, 7, 142, 198, 1, 86, 143, 1, 58, 100, 1, 31, 55, 1, 12, 22,
64 57, 192, 227, 20, 143, 204, 3, 96, 154, 1, 68, 112, 1, 42, 69, 1, 19, 32,
65 212, 35, 215, 113, 47, 169, 29, 48, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66 74, 129, 203, 106, 120, 203, 49, 107, 178, 19, 84, 144, 4, 50, 84, 1, 15, 25,
67 71, 172, 217, 44, 141, 209, 15, 102, 173, 6, 76, 133, 2, 51, 89, 1, 24, 42,
68 64, 185, 231, 31, 148, 216, 8, 103, 175, 3, 74, 131, 1, 46, 81, 1, 18, 30,
69 65, 196, 235, 25, 157, 221, 5, 105, 174, 1, 67, 120, 1, 38, 69, 1, 15, 30,
70 65, 204, 238, 30, 156, 224, 7, 107, 177, 2, 70, 124, 1, 42, 73, 1, 18, 34,
71 225, 86, 251, 144, 104, 235, 42, 99, 181, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72 85, 175, 239, 112, 165, 229, 29, 136, 200, 12, 103, 162, 6, 77, 123, 2, 53, 84,
73 75, 183, 239, 30, 155, 221, 3, 106, 171, 1, 74, 128, 1, 44, 76, 1, 17, 28,
74 73, 185, 240, 27, 159, 222, 2, 107, 172, 1, 75, 127, 1, 42, 73, 1, 17, 29,
75 62, 190, 238, 21, 159, 222, 2, 107, 172, 1, 72, 122, 1, 40, 71, 1, 18, 32,
76 61, 199, 240, 27, 161, 226, 4, 113, 180, 1, 76, 129, 1, 46, 80, 1, 23, 41,
77 7, 27, 153, 5, 30, 95, 1, 16, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78 50, 75, 127, 57, 75, 124, 27, 67, 108, 10, 54, 86, 1, 33, 52, 1, 12, 18,
79 43, 125, 151, 26, 108, 148, 7, 83, 122, 2, 59, 89, 1, 38, 60, 1, 17, 27,
80 23, 144, 163, 13, 112, 154, 2, 75, 117, 1, 50, 81, 1, 31, 51, 1, 14, 23,
81 18, 162, 185, 6, 123, 171, 1, 78, 125, 1, 51, 86, 1, 31, 54, 1, 14, 23,
82 15, 199, 227, 3, 150, 204, 1, 91, 146, 1, 55, 95, 1, 30, 53, 1, 11, 20,
83 19, 55, 240, 19, 59, 196, 3, 52, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 41, 166, 207, 104, 153, 199, 31, 123, 181, 14, 101, 152, 5, 72, 106, 1, 36, 52,
85 35, 176, 211, 12, 131, 190, 2, 88, 144, 1, 60, 101, 1, 36, 60, 1, 16, 28,
86 28, 183, 213, 8, 134, 191, 1, 86, 142, 1, 56, 96, 1, 30, 53, 1, 12, 20,
87 20, 190, 215, 4, 135, 192, 1, 84, 139, 1, 53, 91, 1, 28, 49, 1, 11, 20,
88 13, 196, 216, 2, 137, 192, 1, 86, 143, 1, 57, 99, 1, 32, 56, 1, 13, 24,
89 211, 29, 217, 96, 47, 156, 22, 43, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 78, 120, 193, 111, 116, 186, 46, 102, 164, 15, 80, 128, 2, 49, 76, 1, 18, 28,
91 71, 161, 203, 42, 132, 192, 10, 98, 150, 3, 69, 109, 1, 44, 70, 1, 18, 29,
92 57, 186, 211, 30, 140, 196, 4, 93, 146, 1, 62, 102, 1, 38, 65, 1, 16, 27,
93 47, 199, 217, 14, 145, 196, 1, 88, 142, 1, 57, 98, 1, 36, 62, 1, 15, 26,
94 26, 219, 229, 5, 155, 207, 1, 94, 151, 1, 60, 104, 1, 36, 62, 1, 16, 28,
95 233, 29, 248, 146, 47, 220, 43, 52, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 100, 163, 232, 179, 161, 222, 63, 142, 204, 37, 113, 174, 26, 89, 137, 18, 68, 97,
97 85, 181, 230, 32, 146, 209, 7, 100, 164, 3, 71, 121, 1, 45, 77, 1, 18, 30,
98 65, 187, 230, 20, 148, 207, 2, 97, 159, 1, 68, 116, 1, 40, 70, 1, 14, 29,
99 40, 194, 227, 8, 147, 204, 1, 94, 155, 1, 65, 112, 1, 39, 66, 1, 14, 26,
100 16, 208, 228, 3, 151, 207, 1, 98, 160, 1, 67, 117, 1, 41, 74, 1, 17, 31,
101 17, 38, 140, 7, 34, 80, 1, 17, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102 37, 75, 128, 41, 76, 128, 26, 66, 116, 12, 52, 94, 2, 32, 55, 1, 10, 16,
103 50, 127, 154, 37, 109, 152, 16, 82, 121, 5, 59, 85, 1, 35, 54, 1, 13, 20,
104 40, 142, 167, 17, 110, 157, 2, 71, 112, 1, 44, 72, 1, 27, 45, 1, 11, 17,
105 30, 175, 188, 9, 124, 169, 1, 74, 116, 1, 48, 78, 1, 30, 49, 1, 11, 18,
106 10, 222, 223, 2, 150, 194, 1, 83, 128, 1, 48, 79, 1, 27, 45, 1, 11, 17,
107 36, 41, 235, 29, 36, 193, 10, 27, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 85, 165, 222, 177, 162, 215, 110, 135, 195, 57, 113, 168, 23, 83, 120, 10, 49, 61,
109 85, 190, 223, 36, 139, 200, 5, 90, 146, 1, 60, 103, 1, 38, 65, 1, 18, 30,
110 72, 202, 223, 23, 141, 199, 2, 86, 140, 1, 56, 97, 1, 36, 61, 1, 16, 27,
111 55, 218, 225, 13, 145, 200, 1, 86, 141, 1, 57, 99, 1, 35, 61, 1, 13, 22,
112 15, 235, 212, 1, 132, 184, 1, 84, 139, 1, 57, 97, 1, 34, 56, 1, 14, 23,
113 181, 21, 201, 61, 37, 123, 10, 38, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0,
114 47, 106, 172, 95, 104, 173, 42, 93, 159, 18, 77, 131, 4, 50, 81, 1, 17, 23,
115 62, 147, 199, 44, 130, 189, 28, 102, 154, 18, 75, 115, 2, 44, 65, 1, 12, 19,
116 55, 153, 210, 24, 130, 194, 3, 93, 146, 1, 61, 97, 1, 31, 50, 1, 10, 16,
117 49, 186, 223, 17, 148, 204, 1, 96, 142, 1, 53, 83, 1, 26, 44, 1, 11, 17,
118 13, 217, 212, 2, 136, 180, 1, 78, 124, 1, 50, 83, 1, 29, 49, 1, 14, 23,
119 197, 13, 247, 82, 17, 222, 25, 17, 162, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120 126, 186, 247, 234, 191, 243, 176, 177, 234, 104, 158, 220, 66, 128, 186, 55, 90, 137,
121 111, 197, 242, 46, 158, 219, 9, 104, 171, 2, 65, 125, 1, 44, 80, 1, 17, 91,
122 104, 208, 245, 39, 168, 224, 3, 109, 162, 1, 79, 124, 1, 50, 102, 1, 43, 102,
123 84, 220, 246, 31, 177, 231, 2, 115, 180, 1, 79, 134, 1, 55, 77, 1, 60, 79,
124 43, 243, 240, 8, 180, 217, 1, 115, 166, 1, 84, 121, 1, 51, 67, 1, 16, 6,
125 },
126 .switchable_interp_prob{235, 162, 36, 255, 34, 3, 149, 144},
127 .inter_mode_prob{
128 2, 173, 34, 0, 7, 145, 85, 0, 7, 166, 63, 0, 7, 94,
129 66, 0, 8, 64, 46, 0, 17, 81, 31, 0, 25, 29, 30, 0,
130 },
131 .intra_inter_prob{9, 102, 187, 225},
132 .comp_inter_prob{9, 102, 187, 225, 0},
133 .single_ref_prob{33, 16, 77, 74, 142, 142, 172, 170, 238, 247},
134 .comp_ref_prob{50, 126, 123, 221, 226},
135 .tx_32x32_prob{3, 136, 37, 5, 52, 13},
136 .tx_16x16_prob{20, 152, 15, 101},
137 .tx_8x8_prob{100, 66},
138 .skip_probs{192, 128, 64},
139 .joints{32, 64, 96},
140 .sign{128, 128},
141 .classes{
142 224, 144, 192, 168, 192, 176, 192, 198, 198, 245,
143 216, 128, 176, 160, 176, 176, 192, 198, 198, 208,
144 },
145 .class_0{216, 208},
146 .prob_bits{
147 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
148 136, 140, 148, 160, 176, 192, 224, 234, 234, 240,
149 },
150 .class_0_fr{128, 128, 64, 96, 112, 64, 128, 128, 64, 96, 112, 64},
151 .fr{64, 96, 64, 64, 96, 64},
152 .class_0_hp{160, 160},
153 .high_precision{128, 128},
154};
155
156constexpr std::array<u8, 256> norm_lut{
157 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
159 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165};
166
167constexpr std::array<u8, 254> map_lut{
168 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
169 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54,
170 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
171 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89,
172 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
173 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124,
174 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142,
175 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159,
176 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177,
177 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194,
178 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212,
179 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17,
180 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247,
181 248, 249, 250, 251, 252, 253, 19,
182};
183
184// 6.2.14 Tile size calculation
185
186[[nodiscard]] s32 CalcMinLog2TileCols(s32 frame_width) {
187 const s32 sb64_cols = (frame_width + 63) / 64;
188 s32 min_log2 = 0;
189
190 while ((64 << min_log2) < sb64_cols) {
191 min_log2++;
192 }
193
194 return min_log2;
195}
196
197[[nodiscard]] s32 CalcMaxLog2TileCols(s32 frame_width) {
198 const s32 sb64_cols = (frame_width + 63) / 64;
199 s32 max_log2 = 1;
200
201 while ((sb64_cols >> max_log2) >= 4) {
202 max_log2++;
203 }
204
205 return max_log2 - 1;
206}
207
208// Recenters probability. Based on section 6.3.6 of VP9 Specification
209[[nodiscard]] s32 RecenterNonNeg(s32 new_prob, s32 old_prob) {
210 if (new_prob > old_prob * 2) {
211 return new_prob;
212 }
213
214 if (new_prob >= old_prob) {
215 return (new_prob - old_prob) * 2;
216 }
217
218 return (old_prob - new_prob) * 2 - 1;
219}
220
221// Adjusts old_prob depending on new_prob. Based on section 6.3.5 of VP9 Specification
222[[nodiscard]] s32 RemapProbability(s32 new_prob, s32 old_prob) {
223 new_prob--;
224 old_prob--;
225
226 std::size_t index{};
227
228 if (old_prob * 2 <= 0xff) {
229 index = static_cast<std::size_t>(std::max(0, RecenterNonNeg(new_prob, old_prob) - 1));
230 } else {
231 index = static_cast<std::size_t>(
232 std::max(0, RecenterNonNeg(0xff - 1 - new_prob, 0xff - 1 - old_prob) - 1));
233 }
234
235 return static_cast<s32>(map_lut[index]);
236}
237} // Anonymous namespace
238
239VP9::VP9(Host1x::Host1x& host1x_) : host1x{host1x_} {}
240
241VP9::~VP9() = default;
242
243void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
244 const bool update = new_prob != old_prob;
245
246 writer.Write(update, diff_update_probability);
247
248 if (update) {
249 WriteProbabilityDelta(writer, new_prob, old_prob);
250 }
251}
252template <typename T, std::size_t N>
253void VP9::WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
254 const std::array<T, N>& old_prob) {
255 for (std::size_t offset = 0; offset < new_prob.size(); ++offset) {
256 WriteProbabilityUpdate(writer, new_prob[offset], old_prob[offset]);
257 }
258}
259
260template <typename T, std::size_t N>
261void VP9::WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
262 const std::array<T, N>& old_prob) {
263 for (std::size_t offset = 0; offset < new_prob.size(); offset += 4) {
264 WriteProbabilityUpdate(writer, new_prob[offset + 0], old_prob[offset + 0]);
265 WriteProbabilityUpdate(writer, new_prob[offset + 1], old_prob[offset + 1]);
266 WriteProbabilityUpdate(writer, new_prob[offset + 2], old_prob[offset + 2]);
267 }
268}
269
270void VP9::WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
271 const int delta = RemapProbability(new_prob, old_prob);
272
273 EncodeTermSubExp(writer, delta);
274}
275
276void VP9::EncodeTermSubExp(VpxRangeEncoder& writer, s32 value) {
277 if (WriteLessThan(writer, value, 16)) {
278 writer.Write(value, 4);
279 } else if (WriteLessThan(writer, value, 32)) {
280 writer.Write(value - 16, 4);
281 } else if (WriteLessThan(writer, value, 64)) {
282 writer.Write(value - 32, 5);
283 } else {
284 value -= 64;
285
286 constexpr s32 size = 8;
287
288 const s32 mask = (1 << size) - 191;
289
290 const s32 delta = value - mask;
291
292 if (delta < 0) {
293 writer.Write(value, size - 1);
294 } else {
295 writer.Write(delta / 2 + mask, size - 1);
296 writer.Write(delta & 1, 1);
297 }
298 }
299}
300
301bool VP9::WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test) {
302 const bool is_lt = value < test;
303 writer.Write(!is_lt);
304 return is_lt;
305}
306
307void VP9::WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
308 const std::array<u8, 1728>& new_prob,
309 const std::array<u8, 1728>& old_prob) {
310 constexpr u32 block_bytes = 2 * 2 * 6 * 6 * 3;
311
312 const auto needs_update = [&](u32 base_index) {
313 return !std::equal(new_prob.begin() + base_index,
314 new_prob.begin() + base_index + block_bytes,
315 old_prob.begin() + base_index);
316 };
317
318 for (u32 block_index = 0; block_index < 4; block_index++) {
319 const u32 base_index = block_index * block_bytes;
320 const bool update = needs_update(base_index);
321 writer.Write(update);
322
323 if (update) {
324 u32 index = base_index;
325 for (s32 i = 0; i < 2; i++) {
326 for (s32 j = 0; j < 2; j++) {
327 for (s32 k = 0; k < 6; k++) {
328 for (s32 l = 0; l < 6; l++) {
329 if (k != 0 || l < 3) {
330 WriteProbabilityUpdate(writer, new_prob[index + 0],
331 old_prob[index + 0]);
332 WriteProbabilityUpdate(writer, new_prob[index + 1],
333 old_prob[index + 1]);
334 WriteProbabilityUpdate(writer, new_prob[index + 2],
335 old_prob[index + 2]);
336 }
337 index += 3;
338 }
339 }
340 }
341 }
342 }
343 if (block_index == static_cast<u32>(tx_mode)) {
344 break;
345 }
346 }
347}
348
349void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob) {
350 const bool update = new_prob != old_prob;
351 writer.Write(update, diff_update_probability);
352
353 if (update) {
354 writer.Write(new_prob >> 1, 7);
355 }
356}
357
358Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
359 PictureInfo picture_info;
360 host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
361 Vp9PictureInfo vp9_info = picture_info.Convert();
362
363 InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
364
365 // surface_luma_offset[0:3] contains the address of the reference frame offsets in the following
366 // order: last, golden, altref, current.
367 std::copy(state.surface_luma_offset.begin(), state.surface_luma_offset.begin() + 4,
368 vp9_info.frame_offsets.begin());
369
370 return vp9_info;
371}
372
373void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
374 EntropyProbs entropy;
375 host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
376 entropy.Convert(dst);
377}
378
379Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
380 Vp9FrameContainer current_frame{};
381 {
382 // gpu.SyncGuestHost(); epic, why?
383 current_frame.info = GetVp9PictureInfo(state);
384 current_frame.bit_stream.resize(current_frame.info.bitstream_size);
385 host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset,
386 current_frame.bit_stream.data(),
387 current_frame.info.bitstream_size);
388 }
389 if (!next_frame.bit_stream.empty()) {
390 Vp9FrameContainer temp{
391 .info = current_frame.info,
392 .bit_stream = std::move(current_frame.bit_stream),
393 };
394 next_frame.info.show_frame = current_frame.info.last_frame_shown;
395 current_frame.info = next_frame.info;
396 current_frame.bit_stream = std::move(next_frame.bit_stream);
397 next_frame = std::move(temp);
398 } else {
399 next_frame.info = current_frame.info;
400 next_frame.bit_stream = current_frame.bit_stream;
401 }
402 return current_frame;
403}
404
405std::vector<u8> VP9::ComposeCompressedHeader() {
406 VpxRangeEncoder writer{};
407 const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame;
408 if (!current_frame_info.lossless) {
409 if (static_cast<u32>(current_frame_info.transform_mode) >= 3) {
410 writer.Write(3, 2);
411 writer.Write(current_frame_info.transform_mode == 4);
412 } else {
413 writer.Write(current_frame_info.transform_mode, 2);
414 }
415 }
416
417 if (current_frame_info.transform_mode == 4) {
418 // tx_mode_probs() in the spec
419 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_8x8_prob,
420 prev_frame_probs.tx_8x8_prob);
421 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_16x16_prob,
422 prev_frame_probs.tx_16x16_prob);
423 WriteProbabilityUpdate(writer, current_frame_info.entropy.tx_32x32_prob,
424 prev_frame_probs.tx_32x32_prob);
425 if (update_probs) {
426 prev_frame_probs.tx_8x8_prob = current_frame_info.entropy.tx_8x8_prob;
427 prev_frame_probs.tx_16x16_prob = current_frame_info.entropy.tx_16x16_prob;
428 prev_frame_probs.tx_32x32_prob = current_frame_info.entropy.tx_32x32_prob;
429 }
430 }
431 // read_coef_probs() in the spec
432 WriteCoefProbabilityUpdate(writer, current_frame_info.transform_mode,
433 current_frame_info.entropy.coef_probs, prev_frame_probs.coef_probs);
434 // read_skip_probs() in the spec
435 WriteProbabilityUpdate(writer, current_frame_info.entropy.skip_probs,
436 prev_frame_probs.skip_probs);
437
438 if (update_probs) {
439 prev_frame_probs.coef_probs = current_frame_info.entropy.coef_probs;
440 prev_frame_probs.skip_probs = current_frame_info.entropy.skip_probs;
441 }
442
443 if (!current_frame_info.intra_only) {
444 // read_inter_probs() in the spec
445 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.inter_mode_prob,
446 prev_frame_probs.inter_mode_prob);
447
448 if (current_frame_info.interp_filter == 4) {
449 // read_interp_filter_probs() in the spec
450 WriteProbabilityUpdate(writer, current_frame_info.entropy.switchable_interp_prob,
451 prev_frame_probs.switchable_interp_prob);
452 if (update_probs) {
453 prev_frame_probs.switchable_interp_prob =
454 current_frame_info.entropy.switchable_interp_prob;
455 }
456 }
457
458 // read_is_inter_probs() in the spec
459 WriteProbabilityUpdate(writer, current_frame_info.entropy.intra_inter_prob,
460 prev_frame_probs.intra_inter_prob);
461
462 // frame_reference_mode() in the spec
463 if ((current_frame_info.ref_frame_sign_bias[1] & 1) !=
464 (current_frame_info.ref_frame_sign_bias[2] & 1) ||
465 (current_frame_info.ref_frame_sign_bias[1] & 1) !=
466 (current_frame_info.ref_frame_sign_bias[3] & 1)) {
467 if (current_frame_info.reference_mode >= 1) {
468 writer.Write(1, 1);
469 writer.Write(current_frame_info.reference_mode == 2);
470 } else {
471 writer.Write(0, 1);
472 }
473 }
474
475 // frame_reference_mode_probs() in the spec
476 if (current_frame_info.reference_mode == 2) {
477 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_inter_prob,
478 prev_frame_probs.comp_inter_prob);
479 if (update_probs) {
480 prev_frame_probs.comp_inter_prob = current_frame_info.entropy.comp_inter_prob;
481 }
482 }
483
484 if (current_frame_info.reference_mode != 1) {
485 WriteProbabilityUpdate(writer, current_frame_info.entropy.single_ref_prob,
486 prev_frame_probs.single_ref_prob);
487 if (update_probs) {
488 prev_frame_probs.single_ref_prob = current_frame_info.entropy.single_ref_prob;
489 }
490 }
491
492 if (current_frame_info.reference_mode != 0) {
493 WriteProbabilityUpdate(writer, current_frame_info.entropy.comp_ref_prob,
494 prev_frame_probs.comp_ref_prob);
495 if (update_probs) {
496 prev_frame_probs.comp_ref_prob = current_frame_info.entropy.comp_ref_prob;
497 }
498 }
499
500 // read_y_mode_probs
501 for (std::size_t index = 0; index < current_frame_info.entropy.y_mode_prob.size();
502 ++index) {
503 WriteProbabilityUpdate(writer, current_frame_info.entropy.y_mode_prob[index],
504 prev_frame_probs.y_mode_prob[index]);
505 }
506
507 // read_partition_probs
508 WriteProbabilityUpdateAligned4(writer, current_frame_info.entropy.partition_prob,
509 prev_frame_probs.partition_prob);
510
511 // mv_probs
512 for (s32 i = 0; i < 3; i++) {
513 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.joints[i],
514 prev_frame_probs.joints[i]);
515 }
516 if (update_probs) {
517 prev_frame_probs.inter_mode_prob = current_frame_info.entropy.inter_mode_prob;
518 prev_frame_probs.intra_inter_prob = current_frame_info.entropy.intra_inter_prob;
519 prev_frame_probs.y_mode_prob = current_frame_info.entropy.y_mode_prob;
520 prev_frame_probs.partition_prob = current_frame_info.entropy.partition_prob;
521 prev_frame_probs.joints = current_frame_info.entropy.joints;
522 }
523
524 for (s32 i = 0; i < 2; i++) {
525 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.sign[i],
526 prev_frame_probs.sign[i]);
527 for (s32 j = 0; j < 10; j++) {
528 const int index = i * 10 + j;
529 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.classes[index],
530 prev_frame_probs.classes[index]);
531 }
532 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0[i],
533 prev_frame_probs.class_0[i]);
534
535 for (s32 j = 0; j < 10; j++) {
536 const int index = i * 10 + j;
537 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.prob_bits[index],
538 prev_frame_probs.prob_bits[index]);
539 }
540 }
541
542 for (s32 i = 0; i < 2; i++) {
543 for (s32 j = 0; j < 2; j++) {
544 for (s32 k = 0; k < 3; k++) {
545 const int index = i * 2 * 3 + j * 3 + k;
546 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_fr[index],
547 prev_frame_probs.class_0_fr[index]);
548 }
549 }
550
551 for (s32 j = 0; j < 3; j++) {
552 const int index = i * 3 + j;
553 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.fr[index],
554 prev_frame_probs.fr[index]);
555 }
556 }
557
558 if (current_frame_info.allow_high_precision_mv) {
559 for (s32 index = 0; index < 2; index++) {
560 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.class_0_hp[index],
561 prev_frame_probs.class_0_hp[index]);
562 WriteMvProbabilityUpdate(writer, current_frame_info.entropy.high_precision[index],
563 prev_frame_probs.high_precision[index]);
564 }
565 }
566
567 // save previous probs
568 if (update_probs) {
569 prev_frame_probs.sign = current_frame_info.entropy.sign;
570 prev_frame_probs.classes = current_frame_info.entropy.classes;
571 prev_frame_probs.class_0 = current_frame_info.entropy.class_0;
572 prev_frame_probs.prob_bits = current_frame_info.entropy.prob_bits;
573 prev_frame_probs.class_0_fr = current_frame_info.entropy.class_0_fr;
574 prev_frame_probs.fr = current_frame_info.entropy.fr;
575 prev_frame_probs.class_0_hp = current_frame_info.entropy.class_0_hp;
576 prev_frame_probs.high_precision = current_frame_info.entropy.high_precision;
577 }
578 }
579 writer.End();
580 return writer.GetBuffer();
581}
582
583VpxBitStreamWriter VP9::ComposeUncompressedHeader() {
584 VpxBitStreamWriter uncomp_writer{};
585
586 uncomp_writer.WriteU(2, 2); // Frame marker.
587 uncomp_writer.WriteU(0, 2); // Profile.
588 uncomp_writer.WriteBit(false); // Show existing frame.
589 uncomp_writer.WriteBit(!current_frame_info.is_key_frame); // is key frame?
590 uncomp_writer.WriteBit(current_frame_info.show_frame); // show frame?
591 uncomp_writer.WriteBit(current_frame_info.error_resilient_mode); // error reslience
592
593 if (current_frame_info.is_key_frame) {
594 uncomp_writer.WriteU(frame_sync_code, 24);
595 uncomp_writer.WriteU(0, 3); // Color space.
596 uncomp_writer.WriteU(0, 1); // Color range.
597 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
598 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
599 uncomp_writer.WriteBit(false); // Render and frame size different.
600
601 // Reset context
602 prev_frame_probs = default_probs;
603 swap_ref_indices = false;
604 loop_filter_ref_deltas.fill(0);
605 loop_filter_mode_deltas.fill(0);
606 frame_ctxs.fill(default_probs);
607
608 // intra only, meaning the frame can be recreated with no other references
609 current_frame_info.intra_only = true;
610 } else {
611 if (!current_frame_info.show_frame) {
612 uncomp_writer.WriteBit(current_frame_info.intra_only);
613 } else {
614 current_frame_info.intra_only = false;
615 }
616 if (!current_frame_info.error_resilient_mode) {
617 uncomp_writer.WriteU(0, 2); // Reset frame context.
618 }
619 const auto& curr_offsets = current_frame_info.frame_offsets;
620 const auto& next_offsets = next_frame.info.frame_offsets;
621 const bool ref_frames_different = curr_offsets[1] != curr_offsets[2];
622 const bool next_references_swap =
623 (next_offsets[1] == curr_offsets[2]) || (next_offsets[2] == curr_offsets[1]);
624 const bool needs_ref_swap = ref_frames_different && next_references_swap;
625 if (needs_ref_swap) {
626 swap_ref_indices = !swap_ref_indices;
627 }
628 union {
629 u32 raw;
630 BitField<0, 1, u32> refresh_last;
631 BitField<1, 2, u32> refresh_golden;
632 BitField<2, 1, u32> refresh_alt;
633 } refresh_frame_flags;
634
635 refresh_frame_flags.raw = 0;
636 for (u32 index = 0; index < 3; ++index) {
637 // Refresh indices that use the current frame as an index
638 if (curr_offsets[3] == next_offsets[index]) {
639 refresh_frame_flags.raw |= 1u << index;
640 }
641 }
642 if (swap_ref_indices) {
643 const u32 temp = refresh_frame_flags.refresh_golden;
644 refresh_frame_flags.refresh_golden.Assign(refresh_frame_flags.refresh_alt.Value());
645 refresh_frame_flags.refresh_alt.Assign(temp);
646 }
647 if (current_frame_info.intra_only) {
648 uncomp_writer.WriteU(frame_sync_code, 24);
649 uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
650 uncomp_writer.WriteU(current_frame_info.frame_size.width - 1, 16);
651 uncomp_writer.WriteU(current_frame_info.frame_size.height - 1, 16);
652 uncomp_writer.WriteBit(false); // Render and frame size different.
653 } else {
654 const bool swap_indices = needs_ref_swap ^ swap_ref_indices;
655 const auto ref_frame_index = swap_indices ? std::array{0, 2, 1} : std::array{0, 1, 2};
656 uncomp_writer.WriteU(refresh_frame_flags.raw, 8);
657 for (size_t index = 1; index < 4; index++) {
658 uncomp_writer.WriteU(ref_frame_index[index - 1], 3);
659 uncomp_writer.WriteU(current_frame_info.ref_frame_sign_bias[index], 1);
660 }
661 uncomp_writer.WriteBit(true); // Frame size with refs.
662 uncomp_writer.WriteBit(false); // Render and frame size different.
663 uncomp_writer.WriteBit(current_frame_info.allow_high_precision_mv);
664 uncomp_writer.WriteBit(current_frame_info.interp_filter == 4);
665
666 if (current_frame_info.interp_filter != 4) {
667 uncomp_writer.WriteU(current_frame_info.interp_filter, 2);
668 }
669 }
670 }
671
672 if (!current_frame_info.error_resilient_mode) {
673 uncomp_writer.WriteBit(true); // Refresh frame context. where do i get this info from?
674 uncomp_writer.WriteBit(true); // Frame parallel decoding mode.
675 }
676
677 int frame_ctx_idx = 0;
678 if (!current_frame_info.show_frame) {
679 frame_ctx_idx = 1;
680 }
681
682 uncomp_writer.WriteU(frame_ctx_idx, 2); // Frame context index.
683 prev_frame_probs = frame_ctxs[frame_ctx_idx]; // reference probabilities for compressed header
684 frame_ctxs[frame_ctx_idx] = current_frame_info.entropy;
685
686 uncomp_writer.WriteU(current_frame_info.first_level, 6);
687 uncomp_writer.WriteU(current_frame_info.sharpness_level, 3);
688 uncomp_writer.WriteBit(current_frame_info.mode_ref_delta_enabled);
689
690 if (current_frame_info.mode_ref_delta_enabled) {
691 // check if ref deltas are different, update accordingly
692 std::array<bool, 4> update_loop_filter_ref_deltas;
693 std::array<bool, 2> update_loop_filter_mode_deltas;
694
695 bool loop_filter_delta_update = false;
696
697 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
698 const s8 old_deltas = loop_filter_ref_deltas[index];
699 const s8 new_deltas = current_frame_info.ref_deltas[index];
700 const bool differing_delta = old_deltas != new_deltas;
701
702 update_loop_filter_ref_deltas[index] = differing_delta;
703 loop_filter_delta_update |= differing_delta;
704 }
705
706 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
707 const s8 old_deltas = loop_filter_mode_deltas[index];
708 const s8 new_deltas = current_frame_info.mode_deltas[index];
709 const bool differing_delta = old_deltas != new_deltas;
710
711 update_loop_filter_mode_deltas[index] = differing_delta;
712 loop_filter_delta_update |= differing_delta;
713 }
714
715 uncomp_writer.WriteBit(loop_filter_delta_update);
716
717 if (loop_filter_delta_update) {
718 for (std::size_t index = 0; index < current_frame_info.ref_deltas.size(); index++) {
719 uncomp_writer.WriteBit(update_loop_filter_ref_deltas[index]);
720
721 if (update_loop_filter_ref_deltas[index]) {
722 uncomp_writer.WriteS(current_frame_info.ref_deltas[index], 6);
723 }
724 }
725
726 for (std::size_t index = 0; index < current_frame_info.mode_deltas.size(); index++) {
727 uncomp_writer.WriteBit(update_loop_filter_mode_deltas[index]);
728
729 if (update_loop_filter_mode_deltas[index]) {
730 uncomp_writer.WriteS(current_frame_info.mode_deltas[index], 6);
731 }
732 }
733 // save new deltas
734 loop_filter_ref_deltas = current_frame_info.ref_deltas;
735 loop_filter_mode_deltas = current_frame_info.mode_deltas;
736 }
737 }
738
739 uncomp_writer.WriteU(current_frame_info.base_q_index, 8);
740
741 uncomp_writer.WriteDeltaQ(current_frame_info.y_dc_delta_q);
742 uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q);
743 uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q);
744
745 ASSERT(!current_frame_info.segment_enabled);
746 uncomp_writer.WriteBit(false); // Segmentation enabled (TODO).
747
748 const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width);
749 const s32 max_tile_cols_log2 = CalcMaxLog2TileCols(current_frame_info.frame_size.width);
750
751 const s32 tile_cols_log2_diff = current_frame_info.log2_tile_cols - min_tile_cols_log2;
752 const s32 tile_cols_log2_inc_mask = (1 << tile_cols_log2_diff) - 1;
753
754 // If it's less than the maximum, we need to add an extra 0 on the bitstream
755 // to indicate that it should stop reading.
756 if (current_frame_info.log2_tile_cols < max_tile_cols_log2) {
757 uncomp_writer.WriteU(tile_cols_log2_inc_mask << 1, tile_cols_log2_diff + 1);
758 } else {
759 uncomp_writer.WriteU(tile_cols_log2_inc_mask, tile_cols_log2_diff);
760 }
761
762 const bool tile_rows_log2_is_nonzero = current_frame_info.log2_tile_rows != 0;
763
764 uncomp_writer.WriteBit(tile_rows_log2_is_nonzero);
765
766 if (tile_rows_log2_is_nonzero) {
767 uncomp_writer.WriteBit(current_frame_info.log2_tile_rows > 1);
768 }
769
770 return uncomp_writer;
771}
772
773void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
774 std::vector<u8> bitstream;
775 {
776 Vp9FrameContainer curr_frame = GetCurrentFrame(state);
777 current_frame_info = curr_frame.info;
778 bitstream = std::move(curr_frame.bit_stream);
779 }
780 // The uncompressed header routine sets PrevProb parameters needed for the compressed header
781 auto uncomp_writer = ComposeUncompressedHeader();
782 std::vector<u8> compressed_header = ComposeCompressedHeader();
783
784 uncomp_writer.WriteU(static_cast<s32>(compressed_header.size()), 16);
785 uncomp_writer.Flush();
786 std::vector<u8> uncompressed_header = uncomp_writer.GetByteArray();
787
788 // Write headers and frame to buffer
789 frame.resize(uncompressed_header.size() + compressed_header.size() + bitstream.size());
790 std::copy(uncompressed_header.begin(), uncompressed_header.end(), frame.begin());
791 std::copy(compressed_header.begin(), compressed_header.end(),
792 frame.begin() + uncompressed_header.size());
793 std::copy(bitstream.begin(), bitstream.end(),
794 frame.begin() + uncompressed_header.size() + compressed_header.size());
795}
796
797VpxRangeEncoder::VpxRangeEncoder() {
798 Write(false);
799}
800
801VpxRangeEncoder::~VpxRangeEncoder() = default;
802
803void VpxRangeEncoder::Write(s32 value, s32 value_size) {
804 for (s32 bit = value_size - 1; bit >= 0; bit--) {
805 Write(((value >> bit) & 1) != 0);
806 }
807}
808
809void VpxRangeEncoder::Write(bool bit) {
810 Write(bit, half_probability);
811}
812
813void VpxRangeEncoder::Write(bool bit, s32 probability) {
814 u32 local_range = range;
815 const u32 split = 1 + (((local_range - 1) * static_cast<u32>(probability)) >> 8);
816 local_range = split;
817
818 if (bit) {
819 low_value += split;
820 local_range = range - split;
821 }
822
823 s32 shift = static_cast<s32>(norm_lut[local_range]);
824 local_range <<= shift;
825 count += shift;
826
827 if (count >= 0) {
828 const s32 offset = shift - count;
829
830 if (((low_value << (offset - 1)) >> 31) != 0) {
831 const s32 current_pos = static_cast<s32>(base_stream.GetPosition());
832 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
833 while (PeekByte() == 0xff) {
834 base_stream.WriteByte(0);
835
836 base_stream.Seek(-2, Common::SeekOrigin::FromCurrentPos);
837 }
838 base_stream.WriteByte(static_cast<u8>((PeekByte() + 1)));
839 base_stream.Seek(current_pos, Common::SeekOrigin::SetOrigin);
840 }
841 base_stream.WriteByte(static_cast<u8>((low_value >> (24 - offset))));
842
843 low_value <<= offset;
844 shift = count;
845 low_value &= 0xffffff;
846 count -= 8;
847 }
848
849 low_value <<= shift;
850 range = local_range;
851}
852
853void VpxRangeEncoder::End() {
854 for (std::size_t index = 0; index < 32; ++index) {
855 Write(false);
856 }
857}
858
859u8 VpxRangeEncoder::PeekByte() {
860 const u8 value = base_stream.ReadByte();
861 base_stream.Seek(-1, Common::SeekOrigin::FromCurrentPos);
862
863 return value;
864}
865
866VpxBitStreamWriter::VpxBitStreamWriter() = default;
867
868VpxBitStreamWriter::~VpxBitStreamWriter() = default;
869
870void VpxBitStreamWriter::WriteU(u32 value, u32 value_size) {
871 WriteBits(value, value_size);
872}
873
874void VpxBitStreamWriter::WriteS(s32 value, u32 value_size) {
875 const bool sign = value < 0;
876 if (sign) {
877 value = -value;
878 }
879
880 WriteBits(static_cast<u32>(value << 1) | (sign ? 1 : 0), value_size + 1);
881}
882
883void VpxBitStreamWriter::WriteDeltaQ(u32 value) {
884 const bool delta_coded = value != 0;
885 WriteBit(delta_coded);
886
887 if (delta_coded) {
888 WriteBits(value, 4);
889 }
890}
891
892void VpxBitStreamWriter::WriteBits(u32 value, u32 bit_count) {
893 s32 value_pos = 0;
894 s32 remaining = bit_count;
895
896 while (remaining > 0) {
897 s32 copy_size = remaining;
898
899 const s32 free = GetFreeBufferBits();
900
901 if (copy_size > free) {
902 copy_size = free;
903 }
904
905 const s32 mask = (1 << copy_size) - 1;
906
907 const s32 src_shift = (bit_count - value_pos) - copy_size;
908 const s32 dst_shift = (buffer_size - buffer_pos) - copy_size;
909
910 buffer |= ((value >> src_shift) & mask) << dst_shift;
911
912 value_pos += copy_size;
913 buffer_pos += copy_size;
914 remaining -= copy_size;
915 }
916}
917
918void VpxBitStreamWriter::WriteBit(bool state) {
919 WriteBits(state ? 1 : 0, 1);
920}
921
922s32 VpxBitStreamWriter::GetFreeBufferBits() {
923 if (buffer_pos == buffer_size) {
924 Flush();
925 }
926
927 return buffer_size - buffer_pos;
928}
929
930void VpxBitStreamWriter::Flush() {
931 if (buffer_pos == 0) {
932 return;
933 }
934 byte_array.push_back(static_cast<u8>(buffer));
935 buffer = 0;
936 buffer_pos = 0;
937}
938
939std::vector<u8>& VpxBitStreamWriter::GetByteArray() {
940 return byte_array;
941}
942
943const std::vector<u8>& VpxBitStreamWriter::GetByteArray() const {
944 return byte_array;
945}
946
947} // namespace Tegra::Decoder
diff --git a/src/video_core/host1x/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h
new file mode 100644
index 000000000..d4083e8d3
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp9.h
@@ -0,0 +1,198 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <vector>
8
9#include "common/common_types.h"
10#include "common/stream.h"
11#include "video_core/host1x/codecs/vp9_types.h"
12#include "video_core/host1x/nvdec_common.h"
13
14namespace Tegra {
15
16namespace Host1x {
17class Host1x;
18} // namespace Host1x
19
20namespace Decoder {
21
22/// The VpxRangeEncoder, and VpxBitStreamWriter classes are used to compose the
23/// VP9 header bitstreams.
24
25class VpxRangeEncoder {
26public:
27 VpxRangeEncoder();
28 ~VpxRangeEncoder();
29
30 VpxRangeEncoder(const VpxRangeEncoder&) = delete;
31 VpxRangeEncoder& operator=(const VpxRangeEncoder&) = delete;
32
33 VpxRangeEncoder(VpxRangeEncoder&&) = default;
34 VpxRangeEncoder& operator=(VpxRangeEncoder&&) = default;
35
36 /// Writes the rightmost value_size bits from value into the stream
37 void Write(s32 value, s32 value_size);
38
39 /// Writes a single bit with half probability
40 void Write(bool bit);
41
42 /// Writes a bit to the base_stream encoded with probability
43 void Write(bool bit, s32 probability);
44
45 /// Signal the end of the bitstream
46 void End();
47
48 [[nodiscard]] std::vector<u8>& GetBuffer() {
49 return base_stream.GetBuffer();
50 }
51
52 [[nodiscard]] const std::vector<u8>& GetBuffer() const {
53 return base_stream.GetBuffer();
54 }
55
56private:
57 u8 PeekByte();
58 Common::Stream base_stream{};
59 u32 low_value{};
60 u32 range{0xff};
61 s32 count{-24};
62 s32 half_probability{128};
63};
64
65class VpxBitStreamWriter {
66public:
67 VpxBitStreamWriter();
68 ~VpxBitStreamWriter();
69
70 VpxBitStreamWriter(const VpxBitStreamWriter&) = delete;
71 VpxBitStreamWriter& operator=(const VpxBitStreamWriter&) = delete;
72
73 VpxBitStreamWriter(VpxBitStreamWriter&&) = default;
74 VpxBitStreamWriter& operator=(VpxBitStreamWriter&&) = default;
75
76 /// Write an unsigned integer value
77 void WriteU(u32 value, u32 value_size);
78
79 /// Write a signed integer value
80 void WriteS(s32 value, u32 value_size);
81
82 /// Based on 6.2.10 of VP9 Spec, writes a delta coded value
83 void WriteDeltaQ(u32 value);
84
85 /// Write a single bit.
86 void WriteBit(bool state);
87
88 /// Pushes current buffer into buffer_array, resets buffer
89 void Flush();
90
91 /// Returns byte_array
92 [[nodiscard]] std::vector<u8>& GetByteArray();
93
94 /// Returns const byte_array
95 [[nodiscard]] const std::vector<u8>& GetByteArray() const;
96
97private:
98 /// Write bit_count bits from value into buffer
99 void WriteBits(u32 value, u32 bit_count);
100
101 /// Gets next available position in buffer, invokes Flush() if buffer is full
102 s32 GetFreeBufferBits();
103
104 s32 buffer_size{8};
105
106 s32 buffer{};
107 s32 buffer_pos{};
108 std::vector<u8> byte_array;
109};
110
111class VP9 {
112public:
113 explicit VP9(Host1x::Host1x& host1x);
114 ~VP9();
115
116 VP9(const VP9&) = delete;
117 VP9& operator=(const VP9&) = delete;
118
119 VP9(VP9&&) = default;
120 VP9& operator=(VP9&&) = delete;
121
122 /// Composes the VP9 frame from the GPU state information.
123 /// Based on the official VP9 spec documentation
124 void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state);
125
126 /// Returns true if the most recent frame was a hidden frame.
127 [[nodiscard]] bool WasFrameHidden() const {
128 return !current_frame_info.show_frame;
129 }
130
131 /// Returns a const reference to the composed frame data.
132 [[nodiscard]] const std::vector<u8>& GetFrameBytes() const {
133 return frame;
134 }
135
136private:
137 /// Generates compressed header probability updates in the bitstream writer
138 template <typename T, std::size_t N>
139 void WriteProbabilityUpdate(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
140 const std::array<T, N>& old_prob);
141
142 /// Generates compressed header probability updates in the bitstream writer
143 /// If probs are not equal, WriteProbabilityDelta is invoked
144 void WriteProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
145
146 /// Generates compressed header probability deltas in the bitstream writer
147 void WriteProbabilityDelta(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
148
149 /// Inverse of 6.3.4 Decode term subexp
150 void EncodeTermSubExp(VpxRangeEncoder& writer, s32 value);
151
152 /// Writes if the value is less than the test value
153 bool WriteLessThan(VpxRangeEncoder& writer, s32 value, s32 test);
154
155 /// Writes probability updates for the Coef probabilities
156 void WriteCoefProbabilityUpdate(VpxRangeEncoder& writer, s32 tx_mode,
157 const std::array<u8, 1728>& new_prob,
158 const std::array<u8, 1728>& old_prob);
159
160 /// Write probabilities for 4-byte aligned structures
161 template <typename T, std::size_t N>
162 void WriteProbabilityUpdateAligned4(VpxRangeEncoder& writer, const std::array<T, N>& new_prob,
163 const std::array<T, N>& old_prob);
164
165 /// Write motion vector probability updates. 6.3.17 in the spec
166 void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob);
167
168 /// Returns VP9 information from NVDEC provided offset and size
169 [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(
170 const Host1x::NvdecCommon::NvdecRegisters& state);
171
172 /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct
173 void InsertEntropy(u64 offset, Vp9EntropyProbs& dst);
174
175 /// Returns frame to be decoded after buffering
176 [[nodiscard]] Vp9FrameContainer GetCurrentFrame(
177 const Host1x::NvdecCommon::NvdecRegisters& state);
178
179 /// Use NVDEC providied information to compose the headers for the current frame
180 [[nodiscard]] std::vector<u8> ComposeCompressedHeader();
181 [[nodiscard]] VpxBitStreamWriter ComposeUncompressedHeader();
182
183 Host1x::Host1x& host1x;
184 std::vector<u8> frame;
185
186 std::array<s8, 4> loop_filter_ref_deltas{};
187 std::array<s8, 2> loop_filter_mode_deltas{};
188
189 Vp9FrameContainer next_frame{};
190 std::array<Vp9EntropyProbs, 4> frame_ctxs{};
191 bool swap_ref_indices{};
192
193 Vp9PictureInfo current_frame_info{};
194 Vp9EntropyProbs prev_frame_probs{};
195};
196
197} // namespace Decoder
198} // namespace Tegra
diff --git a/src/video_core/host1x/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h
new file mode 100644
index 000000000..adad8ed7e
--- /dev/null
+++ b/src/video_core/host1x/codecs/vp9_types.h
@@ -0,0 +1,305 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <array>
7#include <vector>
8#include "common/common_funcs.h"
9#include "common/common_types.h"
10
11namespace Tegra {
12
13namespace Decoder {
14struct Vp9FrameDimensions {
15 s16 width;
16 s16 height;
17 s16 luma_pitch;
18 s16 chroma_pitch;
19};
20static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size");
21
22enum class FrameFlags : u32 {
23 IsKeyFrame = 1 << 0,
24 LastFrameIsKeyFrame = 1 << 1,
25 FrameSizeChanged = 1 << 2,
26 ErrorResilientMode = 1 << 3,
27 LastShowFrame = 1 << 4,
28 IntraOnly = 1 << 5,
29};
30DECLARE_ENUM_FLAG_OPERATORS(FrameFlags)
31
32enum class TxSize {
33 Tx4x4 = 0, // 4x4 transform
34 Tx8x8 = 1, // 8x8 transform
35 Tx16x16 = 2, // 16x16 transform
36 Tx32x32 = 3, // 32x32 transform
37 TxSizes = 4
38};
39
40enum class TxMode {
41 Only4X4 = 0, // Only 4x4 transform used
42 Allow8X8 = 1, // Allow block transform size up to 8x8
43 Allow16X16 = 2, // Allow block transform size up to 16x16
44 Allow32X32 = 3, // Allow block transform size up to 32x32
45 TxModeSelect = 4, // Transform specified for each block
46 TxModes = 5
47};
48
49struct Segmentation {
50 u8 enabled;
51 u8 update_map;
52 u8 temporal_update;
53 u8 abs_delta;
54 std::array<u32, 8> feature_mask;
55 std::array<std::array<s16, 4>, 8> feature_data;
56};
57static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size");
58
59struct LoopFilter {
60 u8 mode_ref_delta_enabled;
61 std::array<s8, 4> ref_deltas;
62 std::array<s8, 2> mode_deltas;
63};
64static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size");
65
66struct Vp9EntropyProbs {
67 std::array<u8, 36> y_mode_prob; ///< 0x0000
68 std::array<u8, 64> partition_prob; ///< 0x0024
69 std::array<u8, 1728> coef_probs; ///< 0x0064
70 std::array<u8, 8> switchable_interp_prob; ///< 0x0724
71 std::array<u8, 28> inter_mode_prob; ///< 0x072C
72 std::array<u8, 4> intra_inter_prob; ///< 0x0748
73 std::array<u8, 5> comp_inter_prob; ///< 0x074C
74 std::array<u8, 10> single_ref_prob; ///< 0x0751
75 std::array<u8, 5> comp_ref_prob; ///< 0x075B
76 std::array<u8, 6> tx_32x32_prob; ///< 0x0760
77 std::array<u8, 4> tx_16x16_prob; ///< 0x0766
78 std::array<u8, 2> tx_8x8_prob; ///< 0x076A
79 std::array<u8, 3> skip_probs; ///< 0x076C
80 std::array<u8, 3> joints; ///< 0x076F
81 std::array<u8, 2> sign; ///< 0x0772
82 std::array<u8, 20> classes; ///< 0x0774
83 std::array<u8, 2> class_0; ///< 0x0788
84 std::array<u8, 20> prob_bits; ///< 0x078A
85 std::array<u8, 12> class_0_fr; ///< 0x079E
86 std::array<u8, 6> fr; ///< 0x07AA
87 std::array<u8, 2> class_0_hp; ///< 0x07B0
88 std::array<u8, 2> high_precision; ///< 0x07B2
89};
90static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size");
91
92struct Vp9PictureInfo {
93 u32 bitstream_size;
94 std::array<u64, 4> frame_offsets;
95 std::array<s8, 4> ref_frame_sign_bias;
96 s32 base_q_index;
97 s32 y_dc_delta_q;
98 s32 uv_dc_delta_q;
99 s32 uv_ac_delta_q;
100 s32 transform_mode;
101 s32 interp_filter;
102 s32 reference_mode;
103 s32 log2_tile_cols;
104 s32 log2_tile_rows;
105 std::array<s8, 4> ref_deltas;
106 std::array<s8, 2> mode_deltas;
107 Vp9EntropyProbs entropy;
108 Vp9FrameDimensions frame_size;
109 u8 first_level;
110 u8 sharpness_level;
111 bool is_key_frame;
112 bool intra_only;
113 bool last_frame_was_key;
114 bool error_resilient_mode;
115 bool last_frame_shown;
116 bool show_frame;
117 bool lossless;
118 bool allow_high_precision_mv;
119 bool segment_enabled;
120 bool mode_ref_delta_enabled;
121};
122
123struct Vp9FrameContainer {
124 Vp9PictureInfo info{};
125 std::vector<u8> bit_stream;
126};
127
128struct PictureInfo {
129 INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00
130 u32 bitstream_size; ///< 0x30
131 INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34
132 Vp9FrameDimensions last_frame_size; ///< 0x48
133 Vp9FrameDimensions golden_frame_size; ///< 0x50
134 Vp9FrameDimensions alt_frame_size; ///< 0x58
135 Vp9FrameDimensions current_frame_size; ///< 0x60
136 FrameFlags vp9_flags; ///< 0x68
137 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C
138 u8 first_level; ///< 0x70
139 u8 sharpness_level; ///< 0x71
140 u8 base_q_index; ///< 0x72
141 u8 y_dc_delta_q; ///< 0x73
142 u8 uv_ac_delta_q; ///< 0x74
143 u8 uv_dc_delta_q; ///< 0x75
144 u8 lossless; ///< 0x76
145 u8 tx_mode; ///< 0x77
146 u8 allow_high_precision_mv; ///< 0x78
147 u8 interp_filter; ///< 0x79
148 u8 reference_mode; ///< 0x7A
149 INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B
150 u8 log2_tile_cols; ///< 0x7E
151 u8 log2_tile_rows; ///< 0x7F
152 Segmentation segmentation; ///< 0x80
153 LoopFilter loop_filter; ///< 0xE4
154 INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB
155
156 [[nodiscard]] Vp9PictureInfo Convert() const {
157 return {
158 .bitstream_size = bitstream_size,
159 .frame_offsets{},
160 .ref_frame_sign_bias = ref_frame_sign_bias,
161 .base_q_index = base_q_index,
162 .y_dc_delta_q = y_dc_delta_q,
163 .uv_dc_delta_q = uv_dc_delta_q,
164 .uv_ac_delta_q = uv_ac_delta_q,
165 .transform_mode = tx_mode,
166 .interp_filter = interp_filter,
167 .reference_mode = reference_mode,
168 .log2_tile_cols = log2_tile_cols,
169 .log2_tile_rows = log2_tile_rows,
170 .ref_deltas = loop_filter.ref_deltas,
171 .mode_deltas = loop_filter.mode_deltas,
172 .entropy{},
173 .frame_size = current_frame_size,
174 .first_level = first_level,
175 .sharpness_level = sharpness_level,
176 .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame),
177 .intra_only = True(vp9_flags & FrameFlags::IntraOnly),
178 .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame),
179 .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode),
180 .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame),
181 .show_frame = true,
182 .lossless = lossless != 0,
183 .allow_high_precision_mv = allow_high_precision_mv != 0,
184 .segment_enabled = segmentation.enabled != 0,
185 .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0,
186 };
187 }
188};
189static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size");
190
191struct EntropyProbs {
192 INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000
193 std::array<u8, 28> inter_mode_prob; ///< 0x0400
194 std::array<u8, 4> intra_inter_prob; ///< 0x041C
195 INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420
196 std::array<u8, 2> tx_8x8_prob; ///< 0x0470
197 std::array<u8, 4> tx_16x16_prob; ///< 0x0472
198 std::array<u8, 6> tx_32x32_prob; ///< 0x0476
199 std::array<u8, 4> y_mode_prob_e8; ///< 0x047C
200 std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480
201 INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0
202 std::array<u8, 64> partition_prob; ///< 0x04E0
203 INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520
204 std::array<u8, 8> switchable_interp_prob; ///< 0x052A
205 std::array<u8, 5> comp_inter_prob; ///< 0x0532
206 std::array<u8, 3> skip_probs; ///< 0x0537
207 INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A
208 std::array<u8, 3> joints; ///< 0x053B
209 std::array<u8, 2> sign; ///< 0x053E
210 std::array<u8, 2> class_0; ///< 0x0540
211 std::array<u8, 6> fr; ///< 0x0542
212 std::array<u8, 2> class_0_hp; ///< 0x0548
213 std::array<u8, 2> high_precision; ///< 0x054A
214 std::array<u8, 20> classes; ///< 0x054C
215 std::array<u8, 12> class_0_fr; ///< 0x0560
216 std::array<u8, 20> pred_bits; ///< 0x056C
217 std::array<u8, 10> single_ref_prob; ///< 0x0580
218 std::array<u8, 5> comp_ref_prob; ///< 0x058A
219 INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F
220 std::array<u8, 2304> coef_probs; ///< 0x05A0
221
222 void Convert(Vp9EntropyProbs& fc) {
223 fc.inter_mode_prob = inter_mode_prob;
224 fc.intra_inter_prob = intra_inter_prob;
225 fc.tx_8x8_prob = tx_8x8_prob;
226 fc.tx_16x16_prob = tx_16x16_prob;
227 fc.tx_32x32_prob = tx_32x32_prob;
228
229 for (std::size_t i = 0; i < 4; i++) {
230 for (std::size_t j = 0; j < 9; j++) {
231 fc.y_mode_prob[j + 9 * i] = j < 8 ? y_mode_prob_e0e7[i][j] : y_mode_prob_e8[i];
232 }
233 }
234
235 fc.partition_prob = partition_prob;
236 fc.switchable_interp_prob = switchable_interp_prob;
237 fc.comp_inter_prob = comp_inter_prob;
238 fc.skip_probs = skip_probs;
239 fc.joints = joints;
240 fc.sign = sign;
241 fc.class_0 = class_0;
242 fc.fr = fr;
243 fc.class_0_hp = class_0_hp;
244 fc.high_precision = high_precision;
245 fc.classes = classes;
246 fc.class_0_fr = class_0_fr;
247 fc.prob_bits = pred_bits;
248 fc.single_ref_prob = single_ref_prob;
249 fc.comp_ref_prob = comp_ref_prob;
250
251 // Skip the 4th element as it goes unused
252 for (std::size_t i = 0; i < coef_probs.size(); i += 4) {
253 const std::size_t j = i - i / 4;
254 fc.coef_probs[j] = coef_probs[i];
255 fc.coef_probs[j + 1] = coef_probs[i + 1];
256 fc.coef_probs[j + 2] = coef_probs[i + 2];
257 }
258 }
259};
260static_assert(sizeof(EntropyProbs) == 0xEA0, "EntropyProbs is an invalid size");
261
262enum class Ref { Last, Golden, AltRef };
263
264struct RefPoolElement {
265 s64 frame{};
266 Ref ref{};
267 bool refresh{};
268};
269
270#define ASSERT_POSITION(field_name, position) \
271 static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \
272 "Field " #field_name " has invalid position")
273
274ASSERT_POSITION(partition_prob, 0x0024);
275ASSERT_POSITION(switchable_interp_prob, 0x0724);
276ASSERT_POSITION(sign, 0x0772);
277ASSERT_POSITION(class_0_fr, 0x079E);
278ASSERT_POSITION(high_precision, 0x07B2);
279#undef ASSERT_POSITION
280
281#define ASSERT_POSITION(field_name, position) \
282 static_assert(offsetof(PictureInfo, field_name) == position, \
283 "Field " #field_name " has invalid position")
284
285ASSERT_POSITION(bitstream_size, 0x30);
286ASSERT_POSITION(last_frame_size, 0x48);
287ASSERT_POSITION(first_level, 0x70);
288ASSERT_POSITION(segmentation, 0x80);
289ASSERT_POSITION(loop_filter, 0xE4);
290#undef ASSERT_POSITION
291
292#define ASSERT_POSITION(field_name, position) \
293 static_assert(offsetof(EntropyProbs, field_name) == position, \
294 "Field " #field_name " has invalid position")
295
296ASSERT_POSITION(inter_mode_prob, 0x400);
297ASSERT_POSITION(tx_8x8_prob, 0x470);
298ASSERT_POSITION(partition_prob, 0x4E0);
299ASSERT_POSITION(class_0, 0x540);
300ASSERT_POSITION(class_0_fr, 0x560);
301ASSERT_POSITION(coef_probs, 0x5A0);
302#undef ASSERT_POSITION
303
304}; // namespace Decoder
305}; // namespace Tegra